#!/bin/sh -e # wwwoffle-duplicate-headers -- Investigate headers of articles repeatedly fetched # Copyright : http://www.fsf.org/copyleft/gpl.html # Author : Dan Jacobson -- http://jidanni.org/comp/wwwoffle/ # Created On : Fri Jul 4 15:58:16 2003 # Last Modified On: Fri Nov 21 09:24:40 2003 # Update Count : 9 #Discussion: http://article.gmane.org/gmane.network.wwwoffle.user/297 #Usage: just run this script any time when offline #Output: repeat count, URL, and HTTP headers by repeat count if test $# -ne 0; then echo 1>&2 $0: no args allowed; exit 33; fi cd ${wwwoffle_cache-/var/spool/wwwoffle} find *time* -name U\* -print|sort -t/ -k2|uniq -t/ -f1 -cd|sort -nr| while read n u;do echo -e "\n$n: \c";sed : $u;echo awk '/^\r/{nextfile};{sub("\r","")};1' *time*/D${u#*/U}|sort|uniq -c;done