#!/usr/bin/perl
# 輸入：某台灣法律之 .txt 檔。
# 輸出：該法，旁邊註逐行之[幾章]幾條幾項幾款幾目。
# 故名 z,t,x,k,m.
# Annotate Taiwan law sections with their numbers.
# 當然我僅測試過某一法。
# Wow, works, at least for one law.
# Author: Dan Jacobson 積丹尼 https://www.jidanni.org/
# Copyright: https://www.gnu.org/licenses/gpl.html
# Created: 2023-03-28T22:45:26+0000
# Last-Updated: 2023-04-04T01:51:48+0000
#     Update #: 114
#
use strict;
use warnings q(all);
use Text::CharWidth qw(mbswidth);
use open qw/:std :encoding(utf8)/;
use utf8;
my @s    = qw/章 條 項 款 目/;
my @digs = qw/〇 一 二 三 四 五 六 七 八 九/;
my $nums = join "", @digs, "十百";
my %c;
for (@s) { $c{$_} = 0; }
my $period_ends = 0;
my $width_of_this_tiaos_indenting;
my $maximum_note_width = 0;
my @lines;
my $last_note = "";
while (<>) {
    chomp;
    s/　/  /g;
    s/\s+$//;
    if (/\s第([$nums]+)(章)\s/) {
        $c{$2} = $1;
        $c{條} = $c{項} = $c{款} = $c{目} = 0;
    }
    next unless $c{章};    #get rid of junk before the law
    last if /:::/;        #get rid of stuff after the law
    if (/\s第([$nums]+)(條)(之[$nums]+)?\s/) {
        $c{$2} = $1 . ( $3 || "" );
        $c{項}  = 1;
        $c{款}  = $c{目} = 0;
        /^\s+\S+\s+/ or die;
        $width_of_this_tiaos_indenting = mbswidth($&);
    }
    elsif (/([$nums]+)、/)  { $c{款} = $1; $c{目} = 0; }
    elsif (/（([$nums]+)）/) { $c{目} = $1; }
    elsif ($period_ends) {    #previous line had period
        m/^\s+/;              #m for emacs perl-mode bug
        if ( length $& == $width_of_this_tiaos_indenting ) {
            $c{項}++;
            $c{款} = $c{目} = 0;
        }
    }
    $period_ends = /。$/;
    push @lines, { text => $_, %c };
}

{    # If a 條 has only one 項, then in fact it has no 項s.
    my %max_xiang;
    for (@lines) { $max_xiang{ $_->{條} } = $_->{項} }
    for (@lines) {
        if ( $max_xiang{ $_->{條} } == 1 ) { $_->{項} = 0; }
    }
}

for (@lines) {
    $_->{note} = "";
    if ( length $_->{text} ) {
        for my $pn ( pp( \%$_ ) ) {
            next if $pn eq $last_note;
            $_->{note}       = $last_note = $pn;
            $_->{note_width} = mbswidth( $_->{note} );
            if ( $_->{note_width} > $maximum_note_width ) {
                $maximum_note_width = $_->{note_width};
            }
        }
    }
}

for (@lines) {
    if ( $_->{text} ) {
        print $_->{note} || "";
        print " " x ( $maximum_note_width - ( $_->{note_width} || 0 ) );
        print $_->{text};
    }
    print "\n";
}

sub pp {
    my $cc = $_[0];
    my @o;
    for (@s) {
        next if $_ eq "章" and !$ENV{yesZhang};    # Else too verbose.
        if ( %$cc{$_} ) {
            my $ss = %$cc{$_};
            $ss = zhongwenize($ss) if $_ eq "項";
            push @o, $ss . $_;
        }
    }
    unless ( $ENV{noDi} ) { s/^/第/ for @o }
    return join "", @o;
}

sub zhongwenize {
    my $st = $_[0];
    for ($st) {

        # No I don't want to import any more programs, so I'll do it
        # here, quick and dirty. Looks good for 1..99:
        s/(.)(.)$/$1十$2/;
        s/0$//;
        s/^1\B//;
        for my $nn ( 0 .. $#digs ) {
            s/$nn/$digs[$nn]/g;
        }
    }
    return $st;
}