#!/usr/bin/perl # 輸入:某台灣法律之 .txt 檔。 # 輸出:該法,旁邊註逐行之[幾章]幾條幾項幾款幾目。 # 故名 z,t,x,k,m. # Annotate Taiwan law sections with their numbers. # 當然我僅測試過某一法。 # Wow, works, at least for one law. # Author: Dan Jacobson 積丹尼 https://www.jidanni.org/ # Copyright: https://www.gnu.org/licenses/gpl.html # Created: 2023-03-28T22:45:26+0000 # Last-Updated: 2023-04-04T01:51:48+0000 # Update #: 114 # use strict; use warnings q(all); use Text::CharWidth qw(mbswidth); use open qw/:std :encoding(utf8)/; use utf8; my @s = qw/章 條 項 款 目/; my @digs = qw/〇 一 二 三 四 五 六 七 八 九/; my $nums = join "", @digs, "十百"; my %c; for (@s) { $c{$_} = 0; } my $period_ends = 0; my $width_of_this_tiaos_indenting; my $maximum_note_width = 0; my @lines; my $last_note = ""; while (<>) { chomp; s/ / /g; s/\s+$//; if (/\s第([$nums]+)(章)\s/) { $c{$2} = $1; $c{條} = $c{項} = $c{款} = $c{目} = 0; } next unless $c{章}; #get rid of junk before the law last if /:::/; #get rid of stuff after the law if (/\s第([$nums]+)(條)(之[$nums]+)?\s/) { $c{$2} = $1 . ( $3 || "" ); $c{項} = 1; $c{款} = $c{目} = 0; /^\s+\S+\s+/ or die; $width_of_this_tiaos_indenting = mbswidth($&); } elsif (/([$nums]+)、/) { $c{款} = $1; $c{目} = 0; } elsif (/(([$nums]+))/) { $c{目} = $1; } elsif ($period_ends) { #previous line had period m/^\s+/; #m for emacs perl-mode bug if ( length $& == $width_of_this_tiaos_indenting ) { $c{項}++; $c{款} = $c{目} = 0; } } $period_ends = /。$/; push @lines, { text => $_, %c }; } { # If a 條 has only one 項, then in fact it has no 項s. my %max_xiang; for (@lines) { $max_xiang{ $_->{條} } = $_->{項} } for (@lines) { if ( $max_xiang{ $_->{條} } == 1 ) { $_->{項} = 0; } } } for (@lines) { $_->{note} = ""; if ( length $_->{text} ) { for my $pn ( pp( \%$_ ) ) { next if $pn eq $last_note; $_->{note} = $last_note = $pn; $_->{note_width} = mbswidth( $_->{note} ); if ( $_->{note_width} > $maximum_note_width ) { $maximum_note_width = $_->{note_width}; } } } } for (@lines) { if ( $_->{text} ) { print $_->{note} || ""; print " " x ( $maximum_note_width - ( $_->{note_width} || 0 ) ); print $_->{text}; } print "\n"; } sub pp { my $cc = $_[0]; my @o; for (@s) { next if $_ eq "章" and !$ENV{yesZhang}; # Else too verbose. if ( %$cc{$_} ) { my $ss = %$cc{$_}; $ss = zhongwenize($ss) if $_ eq "項"; push @o, $ss . $_; } } unless ( $ENV{noDi} ) { s/^/第/ for @o } return join "", @o; } sub zhongwenize { my $st = $_[0]; for ($st) { # No I don't want to import any more programs, so I'll do it # here, quick and dirty. Looks good for 1..99: s/(.)(.)$/$1十$2/; s/0$//; s/^1\B//; for my $nn ( 0 .. $#digs ) { s/$nn/$digs[$nn]/g; } } return $st; }