#! /bin/sh # wwwoffle-chunks -- chop a wwwoffle index into smaller chunks that stick around # Copyright : http://www.fsf.org/copyleft/gpl.html # Author : Dan Jacobson -- http://jidanni.org/comp/wwwoffle/ # Created On : Aug 2001 # Last Modified On: Sat Apr 12 10:29:27 2008 # Update Count : 468 #Try wwwoffle-chunks --help this_prog=`basename $0` test $# -gt 1 -o "x$1" = x--help && exec cat <<\EOF Concept: There's no way we are going to obediently read all the stuff in the "lasttime" index before the "thirst for info" drives us back online. Prevtime indexes are a help, but better yet, each time when we go offline, let's chop up lasttime into oh, screen sized separate parts that stick around until you are finished with them." You end up with (default=)32 entry chunks of "lasttime" that pile up until you are confident you have finished savoring each entry, at which time you leave your browser to do "wwwoffle-chunks -1" which zaps the last chunk you read, and (if $BROWSER is set) hopefully then sends you back to read some other chunk. Usage: You are a wwwoffle user who is now offline. $ mv wwwoffle-chunks ~/bin/ #put this program in your bin dir. chmod +x ... $ wwwoffle-chunks #run this program $ $BROWSER ~/wwwoffle-chunks/index.html #use your favorite browser When you are finshed with (all the links in) a chunk, you will probably want to delete it: $ wwwoffle-chunks -u #interactivly delete chunks in order of last access (ls -tu) (^C to quit), or $ wwwoffle-chunks -t #interactivly delete chunks in order of content time (ls -t) or $ wwwoffle-chunks -1 #delete last chunk accessed, no questions asked. Both of which call wwwoffle-chunks -i to remake the index automatically. Note, we said last chunks accessed, not last chunks created. wwwoffle-chunks -1 would be nice to be able to do from isnide the browser, but we don't know how. Wait: one can copy "wwwoffle-chunks -1 " (newline included) once with the mouse and then each time: select the shell window, paste, and return back to the browser, all with the mouse! However, be careful that after a while you don't acidentally cut and send other text to the shell! Then you will probably want to automate this after every call. The script I hang up with is: poff -a;(su -c 'm=~/bin/wwwoffle-chunks;$m&&$m -i' jidanni)& Also make a bookmark in your browser to ~/wwwoffle-chunks/index.html -c gives date comparison of chunks vs. last/prevtime indexes --help prints this help. Do wade thru the source code to see any further customizations possible. E.g. to cut the prevtime3 into chunks 44 lines long: $ size=44 wwwoffle-chunks prevtime3 or: $ export WWWOFFLE_CHUNKS='size=44' #best done in .bash_profile $ wwwoffle-chunks prevtime3 -e edits the last accessed chunk. Addtional option flags can be found in by digging in the source. EOF export LC_ALL=C LANG=C DIR=${HOME?}/wwwoffle-chunks : ${wwwoffle_cache=/var/spool/wwwoffle} #if not otherwise set by the user case "$*" in -e) #edit last accessed chunk : ${EDITOR?}; set -e; cd $DIR; set -- `ls -tu|sed '/^index\.html$/d;q'` # ed $1 </d #wq #EOF cp $1 /tmp perl -nwi -e 'next if /\.(js|ico|jpe?g|bmp|swf|png|gif|css)">/i; print' $1 case $EDITOR in emacs*) # what a hack grep Local\ Variables >/dev/null $1 || case $? in 1) cat >> $1 <<\FFF FFF #does make-backup-files off work? esac;esac $EDITOR $1; exit;; -c) #compare dates of chunks and last/prevtime indexes ls -dtog $DIR/* $wwwoffle_cache/*time*/.timestamp| perl -anlwe 'print "@F[3..6]"'; exit;; -r)echo $this_prog: try -u nowadays 1>&2; exit 56;; -p) #secret plucker mess option set -eu; cd $DIR; O=-M\ 2 pluckme `ls -tu|sed '/index\.html/d;q'`; exit;; -x) #secret exporter option, uses my wwwoffle-cpio, includes the chunk index set -eu; cd $DIR; set -- `ls -tu|sed '/index\.html/d;q'` o=/tmp/$(basename $1 .html).cpio echo $PWD/$1|cpio -ovF $o lynx -dump -listonly $1|perl -nwe 's/^\s+\d+\.\s//&&print'| wwwoffle-cpio -ovAF $o bzip2 $o; ls -og $o*; exit;; -s) #secret spider option, but doesn't touch the U* files! set -eu; cd $DIR wget --spider --force-html -i `ls -tu|sed '/index\.html/d;q'`; exit;; -l) #secret link report option set -eu; cd $DIR lynx -listonly -dump `ls -tu|sed '/index\.html/d;q'`| perl -nwe 's/^\s+\d+\.\s+//&&print'; exit;; -[tu1]) #simple chunk removal #no simple way to put a button at the bottom of a chunk # to remove it and show another chunk, so: set -e cd $DIR if test $1 = -1 then candidate=`ls -tu|sed '/index\.html/d;q'` atime='3 minutes ago' test $candidate || { echo 2>&1 $this_prog: nothing to remove; exit 87;} if test `stat -c %X $candidate` -lt `date +%s -d "$atime"` then echo Sir, $candidate was last accessed more than $atime. echo Therefore, I assume you made a mistake.; exit 44; fi mv -v $candidate /tmp cd `pwd` && sleep 1 && $this_prog -i test $BROWSER && pidof $BROWSER > /dev/null && $BROWSER index.html exit fi trap "echo && echo ... && cd `pwd` && sleep 1 && $this_prog -i" 0 echo Times read of first 10: ls $1tog|head find `ls $1tu` -maxdepth 0 -type f ! -name index.html -ok rm -f {} \; false esac doctype='' charset='' case "$*" in -i) #refresh index set -e cd $DIR i=index.html if test -f $i -a ! $i -ot . then echo index need not be remade else echo making index cat< $i $doctype $charset WCI-WWWOFFLE Chunks Index
WWWOFFLE Chunks Index
EOF
#not all ls' have ls -l --time-style=iso, nor all find have -printf, so:
#        ls -lr $DIR|awk '$NF=="index.html"{next}
#        NF>7{if($6$7$8!=n){printf"--"}else{printf"  "};n=$6$7$8;
#        printf "%s %2s %s %s\n", $6,$7,$8,$9,$9}' >> $i

#4/2004 above becomes unordered if we ever edit a chunk, so
      ls -r|perl -wne 'use POSIX qw(strftime);
        next if /^index\.html$/; chomp; $f=$_;
        s/(^[[:xdigit:]]{8})-.*/strftime "%a %b %e %H:%M %Y ",
          localtime(hex $1)/e && print;
        print "$f<\/A>\n";'>> $i
      echo '
' >> $i fi exit esac test -d $wwwoffle_cache||{ echo $this_prog: $wwwoffle_cache: invalid wwwoffle_cache directory 1>&2 exit 24;} #actually we don't really need it below, but it's nice index=${1-lasttime} test -d $DIR||mkdir $DIR cd $DIR||exit 22 set -e #over paranoid indexdir=$wwwoffle_cache/$index ts=$indexdir/.timestamp if test -r $ts then set -- $(stat -c '%Y %y' $ts) datestr=$(printf %x $1) #trying hard to order the directory no matter prevtime, etc. #for lynx, netscape, now ls, ls -t will give same order... that of $ts date oIFS=$IFS; IFS=-.$oIFS; set -- $@; IFS=$oIFS nice_date=$4.$3.$2\ $5 #want guts first for skinny icons else #too bad nice_date="[`date` saved]" datestr=$(printf %x $(date +%s)) fi prefix=$datestr-$index- #don't mention $index else confusing... eval $WWWOFFLE_CHUNKS : ${size:=32} #now back to wwwoffle-ls, as wwwoffle -o won't print user:passwd@site 2003.11 #but then Chinese just becomes %..%.. : ${sort:=alpha}; wwwoffle -o http://localhost:8080/index/$index?sort=$sort| sed -n 's/^
  •  //p'| split -$size - $prefix set -- $prefix?? if test -f $1 then tmp=($prefix??) else echo $this_prog: Seems there were no files fetched for $index! 1>&2; exit 34 fi shortrefs=(${tmp[@]##*-}) #??: better than * for i in ${tmp[*]}; do mv -vf $i $i.html; done #-f: handy when testing etc. refs=($prefix??.html) i=0; while test ${refs[$i]} do links[$i]="${shortrefs[$i]}" : $((i++)) done echo making ${refs[*]}|xargs -n 1 #neater m=0; for i in ${refs[*]} do : $((m++)) oldlink=${links[$m-1]}; links[$m-1]=[${shortrefs[$m-1]}]" " test "${refs[$m]}"||sorry="(current=last page)" #well, maybe they read from last to first... frag=${i##*-}; frag=${frag%.*} ed -s $i < $charset $nice_date $frag WWWOFFLE chunk
    WWWOFFLE chunk $i $nice_date${sort:+ sort=$sort}
    .
    \$a
     --
    
    . w q ! #

    [ #(up) ${links[@]} #${refs[$m]+ . . . . . . Next:} ${shortrefs[$m]}$sorry #]

    links[$m-1]=$oldlink #I wanted to use align=center, but I also want the strict DTD... #with a mouse it feels better when "Next" is near the middle of the #page. With lynx, it being at the beginning of line is better, or hit ^E #Dan Mercer, comp.unix.shell: You can also use: : "${a[@]}"; echo $_ #I should add the standard footer WWWOFFLE links, but wait, I'm trying #to correct a size & slowness problem, so i should make this as clean #as possible. #because WWWOFFLE 2.6a at least produces illegal html with raw & in URLs #tidy -q $i | tr -d \\240 > /tmp/WWW$$; mv /tmp/WWW$$ $i #also tidy causes \240 where nbsp was, this eats the next char, at #least with lynx... test -r $ts && touch -r $ts $i done #OK, now perhaps browse the index #and after a while clean up the old *-chunk-* files by hand #wish I could put a button at the bottom of each chunk to remove #that chunk with just a click... #I have put #m=wwwoffle-chunks; $m && $m -i #in a script that is run when my modem is hung up. :||<<\EOF #more usage stuff, parked here After going offline, run this program to break a too-big WWWOFFLE index into chunks to save wear and tear and waiting each time one hits the BACK key in their favorite browser, or if they don't read their Last Time indexes before they drop off the back of the [1] [2] [3] ... list. Then one points their browser to $DIR/... optional argument is what index to get, default is "$defaultindex". One needn't be root to run this program. #Thanks to Felix Karpfen, only know other user, for tips.