#! /usr/bin/perl
use warnings;
use strict;
use integer;
use FindBin;

# This helper script ensures that ram titles are consistent and correct
# throughout debram.txt, that the titles are indented and ordered
# correctly, and that the cross-references are sane.
#
#     usage: check-ram-titles [-ch] { debram.txt }
#
# If no debram.txt filename is supplied, the script examines the main
# debram.txt file in this script's parent directory.  The -c option
# yields a ram count.
#
#

our $debram = "${FindBin::RealBin}/../debram.txt";
our $ndig   =  4;
our $shwid  =  2;
our $maxlen = 52;
our $pos_arrowhead
            = 66;
our $mark1  = "RAMIFICATION MAP AND TABLE OF CONTENTS\n";
our $mark2  = "THE RAMIFICATION\n";
our $ndig_ramcount
            =  3;

our $usage = <<END;
usage: $0 [-ch] { debram.txt }
    -c count rams
    -h print this usage message
END

my @r1; # head titles
my @r2; # body titles
my @r3; # tail titles

# Read command-line arguments and options.
my @opt;
my @arg;
push @{ /^-\S/ ? \@opt : \@arg }, $_ for @ARGV;
my %opt = map {
    my $o = $_;
    map { substr( $o, $_, 1 ) => 1 } 1 .. length($_)-1
} @opt;
if ( $opt{'?'} || $opt{h} || @arg > 1 ) {
    print $usage;
    exit 0;
}
$debram = shift @arg if @arg;

# Parse a title line from head or tail.
sub parse ($@) {

    # Are these main-body title lines?
    my $isbody = shift;

    # Divide each line into fields.
    my @line;
    for ( @_ ) {
        my( $ram, $sp, $title, $xrefs, $ramlvl, $uctitle, @xref );
        if ( $isbody ) {
            ( $ram, $title ) =
              /^(\d{$ndig}) (\S(?:.*?\S)??) \(\d+\)\s*$/o
              or die "$0: parse: badly formed line\n$_\n";
        }
        else {
            my $arrow;
            ( $ram, $sp, $title, $arrow, $xrefs ) =
              /^(\d{$ndig}) ( *)(\S(?:.*?\S)??)(?: (-+>) (\S(?:.*?\S)??))?\s*$/o
              or die "$0: parse: badly formed line\n$_\n";
            if ( defined $arrow ) {
              length($ram) + length($sp) + length($title)
                + length($arrow) + 2 == $pos_arrowhead
                or die "$0: parse: arrow too short or too long\n$_\n";
            }
            defined( $xrefs ) or $xrefs = '';
            @xref = split ' ', $xrefs;
        }
        $ramlvl = $ndig - @{ [ $ram =~ /0/g ] };
        length( $title ) <= $maxlen
          or die "$0: parse: title too long\n$_\n";
        $uctitle = uc $title;
        if ( $isbody ) {
            $uctitle eq $title
              or die "$0: parse: lower-case char in title\n$_\n";
        }
        else {
            length( $sp ) == $shwid * ($ramlvl-1)
              or die "$0: parse: bad indent in\n$_\n";
        }
        push @line, {
            isbody  => $isbody,
            ram     => $ram,
            ramlvl  => $ramlvl,
            title   => $title,
            uctitle => $uctitle,
            xref    => \@xref,
        };
    }

    # Mark each line as endram or parent ram.  Check ram ordering
    for my $i ( 0 .. $#line-1 ) {
        $line[$i+1]{ram} gt $line[$i]{ram}
          or die "$0: parse: misordered rams near $line[$i]{ram}\n";
        $line[$i]{endram} = $isbody
          || $line[$i+1]{ramlvl} <= $line[$i]{ramlvl};
    }
    $line[-1]{endram} = 1 if @line;

    # Hash the rams.
    my %ram = map { $_->{ram} => $_ } @line;

    # Guard against stray or unsorted cross-references.
    for my $ram ( keys %ram ) {
        my $ram1 = $ram{$ram};
        my $parent = $ram;
        $parent =~ s/^(.*)[^0]/${1}0/;
        my $ram_prev;
        my $n_match_prev = $ndig;
        for my $xref ( @{ $ram1->{xref} } ) {
            $ram{$xref}
              or die "$0: parse: xref to unknown ram $xref\n";
            $xref eq $parent
              and die "$0: parse: xref to ${ram}'s immediate parent\n";
            my $errmsg
              = "$0: parse: misordered xrefs from $ram\n";
            my $j = 0; # count of matching digits.
            ++$j while $j < $ndig
              && substr( $ram, $j, 1 ) eq substr( $xref, $j, 1 );
            $j < $ndig && $j <= $n_match_prev or die $errmsg;
            if ( $j < $n_match_prev ) {
                $n_match_prev = $j;
                $ram_prev     = $xref;
                next;
            }
            defined( $ram_prev ) && $ram_prev ge $xref and die $errmsg;
            $ram_prev = $xref;
        }
    }

    return %ram;

}

# Read the title lines in from each of debram.txt's three sections:
# head; body; tail.
open F, '<', $debram;
1 while <F> ne $mark1; <F>; <F>;
{
    while ( 1 ) {
        local $_ = <F>;
        if ( /^\d{$ndig}/o ) { push @r1, $_ }
        else { last }
    }
}
1 while <F> ne $mark2;
{
    my $intitle = 0;
    while ( 1 ) {
        local $_ = <F>;
        $intitle = !$intitle, next if /^-----/;
        next unless $intitle;
        if ( /^\d{$ndig}/o ) { push @r2, $_ }
        else { last }
    }
}
1 while <F> =~ /\S/; 1 while <F> =~ /\S/;
{
    while ( 1 ) {
        local $_ = <F>;
        if ( /^\d{$ndig}/o ) { push @r3, $_ }
        else { last }
    }
}
close F;

# Parse the three sections:
my %r1 = parse 0, @r1;
my %r2 = parse 1, @r2;
my %r3 = parse 0, @r3;

# Ensure that the rams in each section are the same, with the same
# titles, and that no title is duplicated.
for my $r ( [ \%r1, \%r3 ], [ \%r3, \%r1 ] ) {
    my( $ra, $rb ) = @$r;
    for my $ram ( sort keys %$ra ) {
        my $ram1    = $ra->{$ram};
        my $title   = $ram1->{title};
        my $uctitle = $ram1->{uctitle};
        my $endram  = $ram1->{endram};
        $rb->{$ram} && $title eq $rb->{$ram}{title} && (
            !$endram || (
                $r2{$ram}
                && $uctitle eq $r2{$ram}{title}
            )
        ) or die "$0: title mismatch, ram $ram\n";
    }
}
for my $ram ( sort keys %r2 ) {
    my $ram1    = $r2{$ram};
    my $title   = $ram1->{title};
    $r1{$ram} && $r1{$ram}{endram} && $title eq $r1{$ram}{uctitle} &&
    $r3{$ram} && $r3{$ram}{endram} && $title eq $r3{$ram}{uctitle}
      or die "$0: title mismatch, ram $ram\n";
}
{
    my %uctitle = ();
    for my $ram ( keys %r1 ) {
        my $uctitle = $r1{$ram}{uctitle};
        exists $uctitle{$uctitle}
          and die "$0: duplicate title\n$uctitle\n";
        $uctitle{$uctitle} = $ram;
    }
}

# Print ram counts if -c.
if ( $opt{c} ) {
    my $n_end    = 0;
    my $n_parent = 0;
    ++( $_->{endram} ? $n_end : $n_parent ) for values %r1;
    printf
      "%${ndig_ramcount}d end rams\n"    .
      "%${ndig_ramcount}d parent rams\n" .
      "%${ndig_ramcount}d total rams\n",
      $n_end, $n_parent, $n_end + $n_parent;
}

