# makefile -- what I use to manage my website # Author: Dan Jacobson https://www.jidanni.org/ # Copyright: https://www.gnu.org/licenses/gpl.html # Created: Jan 2001 # Last-Updated: 2024-03-18T04:35:01+0000 # Update #: 1707 boom:; false #avoid accidental "make" accidents U=0077 J=jidanni.org S=dreamhost #radioscanningtw #taizhongbus sitemap.html:/proc/self#$W#Remake each time as titles might have changed cp -av $@ /tmp/$@.old comp/sitemap-jidanni-ch > /tmp/TT$@ #Else its own entry has no title. mv /tmp/TT$@ $@ tidy -qe $@ diff /tmp/$@.old $@; test $$? != 2 #$(shell) wasteful #depth checks, equivalent: Nonascii=[^[:ascii:]]#one day strip less, e.g., symbols, accents... IPA?! tidy0=tidy --tidy-mark no -utf8 tidy=$(tidy0) -qm $@ getp: sh ~/bin/update-website-programs disallow_images_check: find * -name images -type d -printf "Disallow: /%p/\n"|\ sort robots.txt -|uniq -u robots_all_valid_check:robots.txt perl -nwe 'next unless s@^Disallow: /@@;chomp;$(\ )if(!-e$$_){print"gone: $$_\n"}' $? hidden_trees:robots.txt @LC_ALL=C date perl -nwe 's@^Disallow: */@@||next;/images|sitemap/&&next;print' $?|\ xargs ls -RA1F>/tmp/$@ N=$@ pluckme /tmp/$@ #"When defining fragment identifiers #to be backward-compatible, only strings matching the pattern #[A-Za-z][A-Za-z0-9:_.-]* should be used. See Section 6.2 of [HTML4] #for more information. http://www.w3.org/TR/html/#ref-html4" https: find * -name \*.html|xargs grep $@ name_check: find * -name \*.html|\ xargs lynx -listonly -dump|perl -nwe \ 'if(/#/&&!/#[A-Za-z][A-Za-z0-9:_.-]*$$/){s@.*$J/@@;print}' access: find * -name \*.html|\ xargs $(tidy0) -e -access 3 2>&1|\ perl -pwe 'if(/^line/){s/[^-]+//}'|sort|uniq -c access2: set -e -- $$(find * -name \*.html);\ for i do echo $$i:; $(tidy0) -e -access 3 $$i 2>&1|sed '/^$$/d'; done #One day clean up comments in HTML #GREAT analysis of long lines zf8:#/tmp/zhlist set -e -- $$(find $(uhrange) -name \*.html);\ for i do echo $$i:;perl -C -nlwe \ '$$l=$$_;s/$(Nonascii)/12/g;print $$l if length>80' $$i;done Noenus: find * -name \*html|\ xargs grep --files-without-match content=\"..-..\"|tee /tmp/$@ tggz: find * -name \*.zip -o -name \*.gz show_filetypes: find * -type f|perl -pwe's/[^.]*//'|sort|uniq -c|sort -rn lastmod1: find * -name \*.html|\ xargs grep --files-without-match Last\ modified: no_charset: find * -name \*.html|\ xargs grep --files-without-match charset= no_strict: find * -name \*.html|\ xargs fgrep --files-without-match \ '"http://www.w3.org/TR/html4/strict.dtd">' html5: find * -name \*.html|\ xargs fgrep --files-with-matches '' no_html5: find * -name \*.html|\ xargs fgrep --files-without-match \ '' #Viewport bad advice and recovery: VP= # noviewportCand: # find * -name \*.html|\ # xargs fgrep --files-without-match '$(VP)'|\ # sed /^google/d > $@ # noviewport:noviewportCand # mkdir -p /tmp/r # set -xue -- $$(cat $?); while expr $$# > /dev/null; do mv $$1 /tmp/r/$$#;\ # perl -pwle 's!!$$&\n $(VP)!' /tmp/r/$$# > $$1; shift; done viewport_disater_victims: find * -name \*.html|\ xargs fgrep --files-with-match '$(VP)' viewport_not_set: find * -name \*.html|xargs fgrep --files-without-match viewport #End: viewport bad advice and recovery wide_whitespace: find * -type f|\ xargs fgrep -n ' ' ### before connecting modem any_leftover_backups: if find .|grep \#;\ then echo BACKUP FILES LEFT OVER, TURKEY; exit 99; fi #b5char=[\x80-\xFE][\x40-\x7E\xA1-\xFE]#but not below find_big5: set -e -- $$(find . -type f ! -name '*'.png ! \ -name '*'.jpg ! -name '*'.zip ! -name '*'.gz ! -name '*'.tgz \ ! -name '*'.pdf ! -name '*'.ico ! -name '*'.kmz)&&\ for i do perl -nlwe \ 'if(/[[:print:]][\x80-\xFE][\x40-\x7E]/){print$$ARGV;exit}' $$i;done valid:#wow, fast find * -name '*.html'|grep -v ^googleefff|xargs tidy -q -e --gnu-emacs yes overlyenglish: set -- $$(find -name \*.html ! -name \*_en.html);\ for i do test -f $${i/.html/_en.html}||echo $$i; done|\ xargs fgrep -l content=\"en-us|xargs fgrep _en.html modem: make any_leftover_backups make sitemap.html # make latest.txt make valid #http://wiki.dreamhost.com/index.php/KB_/_Web_Programming_/_Error_messages hardlinks:; find . -type f ! -links 1 -ls images_with_no_width_and_height: find -name \*.html \( -exec perl -nwle \ 'BEGIN{$$i=$$w=$$h=0;}for(//tmp/`hostname`.org.`date +%s`.cpio.gz z-variants: #see also comp/index.html find \( -name \*.html -o -name \*.txt \) \ -print -exec decompozdiff {} \; iconvdiff:; find \( -name \*.html -o -name \*.txt \) -print -exec $@ {} \; #comp/configuration/index.html: Can't set viewport. So do by hand. # cd $(dir $@) && tree -a -H . -T "Dan Jacobson's configuration files" \ # -I $(notdir $@) | tee $(notdir $@) | w3m -dump -T text/html #Bug#565214: --verify doesn't proceed deeper than the top directory, so need: verify_but_ignore_permissions:/tmp/local_list.np /tmp/remote_list.np ls -l /tmp/*_list* sum /tmp/*_list* diff -U 0 $? verify:/tmp/local_list /tmp/remote_list; diff -U 0 $? verify_clear_cache:; rm -v /tmp/local_list* /tmp/remote_list* /tmp/*_sums||: verify_permissions1:/tmp/local_list /tmp/remote_list for i in $?; do echo $$i:; cut -f 1 $$i|sort|uniq -c; done odd_permiss:; find * -ls|perl -nwle 'next if /-rw-r--r--|drwxr-xr-x|-rw-r--r--|-rwxr-xr-x/; print;' Stat=find|sort|xargs stat --format="%a %n" stat_list:; $(Stat) stat_get:; set -eu; ssh $J cd $J '&&' make stat_list > /tmp/StatRem; $(Stat) > /tmp/StatLocal stat_compare: perl -we 'my %k; open FL, "/tmp/StatLocal" or die; while(){next unless @F=split; $$k{$$F[1]}=$$F[0]};$(\ )close FL or die;$(\ )open FR, "/tmp/StatRem" or die; while(){next unless @F=split; if(exists $$k{$$F[1]} && $$k{$$F[1]}==$$F[0]){delete $$k{$$F[1]}}};$(\ )close FR or die; for(sort keys %k){print "chmod $$k{$$_} $$_\n"};' %.np:%; cut -f 2- $? > $@ /tmp/remote_list:; ssh $J 'umask $U && cd $J && make -s local_list' > $@ /tmp/local_list:; umask $U && make -s local_list > $@ && test -s $@ /tmp/remote_sums:; ssh $J 'umask $U && cd $J && make -s local_sums' > $@ /tmp/local_sums:; umask $U && make -s local_sums > $@ && test -s $@ verify_sums:/tmp/local_sums /tmp/remote_sums; diff -U 0 $? local_list: find .??* * -ls|\ perl -alnwe 'die $$_ unless $$F[2] =~ /^(-r..r-.r-.|dr.xr-xr-x)/; if($$F[2]=~/^d/){$$F[6]="----"};$(\ )print join "\t",@F[2,6,-1]'|sort -k 3 local_sums:; find -type f|sort|xargs sum external_robots: mech-dump --links index.html|perl -nwle 'next unless m!http://[^/]+/!; print $$& . "robots.txt"'|xargs -n 1 wwwoffle scp: #mmin, not cmin! set -ex -- $$(find * -type f -mmin -$${min-111});\ for i do $${TEST1+echo} scp $$i $J:$J/$$i; $${TEST2+echo} xcpiome $$i; sleep 3; done kml_no_name: set -- $$(find -name \*.km[lz]); for i do zcat -f $$i | xml2 | grep -q /kml/Document/name || echo $$i; done staticmap panoramio:; find * -name \*.html | xargs grep -nH -e $@ transparent: # Fix with: https://stackoverflow.com/a/8437562 then maybe pngcrush set -eu -- $$(find * -name \*.png); identify -verbose $$@|egrep 'png:tRNS|Alpha:|Image:' wide_images: #but what about wide
 tables?
	set $$(find * -name \*.html); perl -nwle 'next unless /width="(\d+)"/ && $$1 > 320;$(\
	)for($$k{$$ARGV}){$$_=$$1 if $$1 > ($$_||0);}; END{for(sort keys %k){print "$$k{$$_}\t$$_";};}' $$@|sort -rn
http-equiv:
	grep --include=\*.html --recursive --count $@ *|perl -F: -awnle 'print $$F[1]," ",$$F[0];'|sort -n
# http://validator.w3.org/i18n-checker/
index_back_to_home_page_depth_checker: #rough
	for i in 0 1 2 3 4 5; do find * -maxdepth $$i -mindepth $$i -name \*.html|xargs grep -h index.html.*積;\
	echo ==========================================; done|perl -pwle 's/.*href//;'
files_without_extensions:
	find -type f ! -name '*.*'|xargs file --mime-encoding
old_header_format:
	find -type f ! -name \*.html ! -name \*.jpg -newermt 1/1/2023|\
	xargs grep -F Update\ Count