#!/usr/bin/perl # 輸入:某台灣法律之 .txt 檔。 # 輸出:該法,旁邊註逐行之阿拉伯數字幾條(T)幾項(X)幾款(K)幾目(M)。 # 故名 DiGital Z,T,X,K,M. # Annotate Taiwan law sections with their numbers. # 當然我僅測試過某一法。 # Wow, works, at least for one law. # # In this version we use Arabic numbers for a compact result: # 18-1T|1X|2K|1M (一)使用之單層樓地板... # We stuff them in the blank part of the left margin, # without increasing the width. So far we haven't ran out of space... # # Author: Dan Jacobson 積丹尼 https://www.jidanni.org/ # Copyright: https://www.gnu.org/licenses/gpl.html # Created: 2023-03-28T22:45:26+0000 # Last-Updated: 2023-04-04T01:28:25+0000 # Update #: 184 # use strict; use warnings q(all); use open qw/:std :encoding(utf8)/; use utf8; use Text::CharWidth qw(mbswidth); my $separator_char = $ENV{SEP} || ""; #q(_); my $initial_char = $ENV{INI} || " "; #q(.); my @s = qw/章 條 項 款 目/; my @digs = qw/〇 一 二 三 四 五 六 七 八 九/; my @letters = qw/t x k m/; #§ my $nums = join "", @digs, "十百"; my %c; $c{$_} = 0 for @s; my $period_ends = 0; my $width_of_this_tiaos_indenting; my @lines; while (<>) { chomp; s/ / /g; s/\s+$//; if (/\s第([$nums]+)(章)\s/) { $c{$2} = ar($1); $c{條} = $c{項} = $c{款} = $c{目} = 0; } next unless $c{章}; #get rid of junk before the law last if /:::/; #get rid of stuff after the law if (length) { s/^ // or die "I thought we could always shave one blank"; } if (/^第([$nums]+)(條)(?:之([$nums]+))?\s/) { $c{$2} = ar($1); $c{$2} .= "-" . ar($3) if $3; $c{項} = 1; $c{款} = $c{目} = 0; /^\S+\s+/ or die; $width_of_this_tiaos_indenting = mbswidth($&); } elsif (/([$nums]+)、/) { $c{款} = ar($1); $c{目} = 0; } elsif (/(([$nums]+))/) { $c{目} = ar($1); } elsif ($period_ends) { #previous line had period m/^\s+/; #m for emacs perl-mode bug if ( length $& == $width_of_this_tiaos_indenting ) { $c{項}++; $c{款} = $c{目} = 0; } } $period_ends = /。$/; push @lines, { text => $_, %c }; } { # If a 條 has only one 項, then in fact it has no 項s. my %max_xiang; for (@lines) { $max_xiang{ $_->{條} } = $_->{項} } for (@lines) { if ( $max_xiang{ $_->{條} } == 1 ) { $_->{項} = 0; } } } { my $last_info = ""; for my $l (@lines) { next unless $l->{text} =~ /^ /; next if $l->{text} =~ / 第[$nums]+章 /; my @en = @letters; my @zh = qw/條 項 款 目/; my @protoA; for ( 0 .. $#zh ) { next unless $l->{ $zh[$_] }; push @protoA, $l->{ $zh[$_] } . $en[$_]; } my $proto = join $separator_char, @protoA; next if $proto eq $last_info; $l->{info} = $last_info = $proto; } } for (@lines) { my $info_length = length $_->{info}; if ($info_length) { $_->{text} =~ s/^ *//; my $blank_length = length $&; die "Not enough blanks to eat on: $_->{text}.", " I need to make shorter {info} strings." if $info_length > $blank_length; ## Print as close to the law text as possible, to reduce ## horizontal scrolling need, but do pad by one blank: print $initial_char x ( $blank_length - $info_length - 1 ), $_->{info}, " "; } print $_->{text}, "\n"; } sub ar { #Make Arabic numerals, quick and dirty my $num = $_[0]; for ($num) { s/^十/1$&/; s/十$/0/; s/十//; for my $nn ( 0 .. $#digs ) { s/$digs[$nn]/$nn/g; } } return $num; }