#! /bin/sh

#================================================================
# estwolels
# List the path of cache files of wwwoffle
#================================================================


# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="estwolels"
spooldir="/var/spool/wwwoffle"
conffile="/etc/wwwoffle/wwwoffle.conf"
denysufs="css|js|dtd|rdf|rss|swf|class|md5"
denysufs="$denysufs|png|gif|jpg|jpeg|jpe|bmp|tif|tiff"
denysufs="$denysufs|pnm|pbm|pgm|ppm|xbm|xpm|ps|eps|au|svg|dvi|ico"
denysufs="$denysufs|mid|midi|kar|mp3|mp2|au|snd|wav|aif|aiff"
denysufs="$denysufs|mpg|mpeg|mpe|qt|mov|avi"
denysufs="$denysufs|pdf|rtf|rtx|doc|xls|ppt|xdw|csv|tsv"
denysufs="$denysufs|gz|zip|bz2|lzh|lha|tar|bin|cpio|shar|jar|war"
sizemax="1048576"

# show help message
if [ "$1" = "--help" ]
then
  printf 'List the path and the URL of cache files of wwwoffle.\n'
  printf '\n'
  printf 'Usage:\n'
  printf '  %s [spooldir]\n' "$progname"
  printf '\n'
  exit 0
fi


# check the spool directory
if [ -n "$1" ]
then
  spooldir="$1"
fi
if [ -d "$spooldir/http" ] && [ -r "$spooldir/http" ] && [ -x "$spooldir/http" ]
then
  true
else
  printf '%s: cannot scan %s\n' "$progname" "$spooldir/http" 1>&2
  exit 1
fi


# check the configuration file
confopt=""
if [ -f "$conffile" ]
then
  confopt="-c $conffile"
fi


# function to output URL list
outputlist(){
  \ls "$spooldir/http" |
  while read domain
  do
    wwwoffle-ls $confopt "http://$domain"
  done
  \ls "$spooldir/ftp" |
  while read domain
  do
    wwwoffle-ls $confopt "ftp://$domain"
  done
}


# list the path and the URL of cache files
outputlist |
egrep " (http|ftp)://" |
egrep -i -v "\.($denysufs)$" |
awk -F ' ' -v sdir="$spooldir" -v sizemax="$sizemax" '
{
  if($2 < 1 || $2 > sizemax) next
  path = $0
  sub(/ .*/, "", path)
  prot = "http"
  if($0 ~ / ftp:\/\//) prot = "ftp"
  domain = $0
  sub(/.* [A-Za-z]+:\/\//, "", domain)
  sub(/\/.*/, "", domain)
  url = $0
  sub(/.* [A-Za-z]+:\/\//, "", url)
  lfile = url
  sub(/\?.*/, "", lfile)
  sub(/.*\//, "", lfile)
  printf("%s/%s/%s/%s\t%s://%s\t%s\n", sdir, prot, domain, path, prot, url, lfile)
}
'


# exit normally
exit 0



# END OF FILE
