#!/usr/bin/perl
# 輸入:某台灣法律之 .txt 檔。
# 輸出:該法,旁邊註逐行之阿拉伯數字幾條(T)幾項(X)幾款(K)幾目(M)。
# 故名 DiGital Z,T,X,K,M.
# Annotate Taiwan law sections with their numbers.
# 當然我僅測試過某一法。
# Wow, works, at least for one law.
#
# In this version we use Arabic numbers for a compact result:
# 18-1T|1X|2K|1M  (一)使用之單層樓地板...
# We stuff them in the blank part of the left margin,
# without increasing the width. So far we haven't ran out of space...
#
# Author: Dan Jacobson 積丹尼 https://www.jidanni.org/
# Copyright: https://www.gnu.org/licenses/gpl.html
# Created: 2023-03-28T22:45:26+0000
# Last-Updated: 2023-04-04T01:28:25+0000
#     Update #: 184
#
use strict;
use warnings q(all);
use open qw/:std :encoding(utf8)/;
use utf8;
use Text::CharWidth qw(mbswidth);
my $separator_char = $ENV{SEP} || "";           #q(_);
my $initial_char   = $ENV{INI} || " ";          #q(.);
my @s              = qw/章 條 項 款 目/;
my @digs           = qw/〇 一 二 三 四 五 六 七 八 九/;
my @letters        = qw/t x k m/;               #§
my $nums           = join "", @digs, "十百";
my %c;
$c{$_} = 0 for @s;
my $period_ends = 0;
my $width_of_this_tiaos_indenting;
my @lines;
while (<>) {
    chomp;
    s/ /  /g;
    s/\s+$//;
    if (/\s第([$nums]+)(章)\s/) {
        $c{$2} = ar($1);
        $c{條} = $c{項} = $c{款} = $c{目} = 0;
    }
    next unless $c{章};    #get rid of junk before the law
    last if /:::/;        #get rid of stuff after the law
    if (length) { s/^ // or die "I thought we could always shave one blank"; }
    if (/^第([$nums]+)(條)(?:之([$nums]+))?\s/) {
        $c{$2} = ar($1);
        $c{$2} .= "-" . ar($3) if $3;
        $c{項} = 1;
        $c{款} = $c{目} = 0;
        /^\S+\s+/ or die;
        $width_of_this_tiaos_indenting = mbswidth($&);
    }
    elsif (/([$nums]+)、/)  { $c{款} = ar($1); $c{目} = 0; }
    elsif (/(([$nums]+))/) { $c{目} = ar($1); }
    elsif ($period_ends) {    #previous line had period
        m/^\s+/;              #m for emacs perl-mode bug
        if ( length $& == $width_of_this_tiaos_indenting ) {
            $c{項}++;
            $c{款} = $c{目} = 0;
        }
    }
    $period_ends = /。$/;
    push @lines, { text => $_, %c };
}
{ # If a 條 has only one 項, then in fact it has no 項s.
    my %max_xiang;
    for (@lines) { $max_xiang{ $_->{條} } = $_->{項} }
    for (@lines) {
        if ( $max_xiang{ $_->{條} } == 1 ) { $_->{項} = 0; }
    }
}
{
    my $last_info = "";
    for my $l (@lines) {
        next unless $l->{text} =~ /^ /;
        next if $l->{text}     =~ / 第[$nums]+章 /;
        my @en = @letters;
        my @zh = qw/條 項 款 目/;
        my @protoA;
        for ( 0 .. $#zh ) {
            next unless $l->{ $zh[$_] };
            push @protoA, $l->{ $zh[$_] } . $en[$_];
        }
        my $proto = join $separator_char, @protoA;
        next if $proto eq $last_info;
        $l->{info} = $last_info = $proto;
    }
}
for (@lines) {
    my $info_length = length $_->{info};
    if ($info_length) {
        $_->{text} =~ s/^ *//;
        my $blank_length = length $&;
        die "Not enough blanks to eat on: $_->{text}.",
          " I need to make shorter {info} strings."
          if $info_length > $blank_length;
        ## Print as close to the law text as possible, to reduce
        ## horizontal scrolling need, but do pad by one blank:
        print $initial_char x ( $blank_length - $info_length - 1 ),
          $_->{info}, " ";
    }
    print $_->{text}, "\n";
}

sub ar {    #Make Arabic numerals, quick and dirty
    my $num = $_[0];
    for ($num) {
        s/^十/1$&/;
        s/十$/0/;
        s/十//;
        for my $nn ( 0 .. $#digs ) {
            s/$digs[$nn]/$nn/g;
        }
    }
    return $num;
}