#!/usr/bin/perl
#
# Lintian reporting harness -- Create and maintain Lintian reports automatically
#
# Copyright (C) 1998 Christian Schwarz and Richard Braakman
#
# This program is free software.  It is distributed under the terms of
# the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;
use warnings;

use Getopt::Long;

sub usage {
    print <<END;
Lintian reporting harness
Create and maintain Lintian reports automatically

Usage: harness [ -i | -f | -r | -c ]

Options:
  -c         clean mode, erase everything and start from scratch (implies -f)
  -f         full mode, blithely overwrite lintian.log
  -i         incremental mode, use old lintian.log data, process changes only
  -r         generate HTML reports only
  --dry-run  pretend to do the actions without actually doing them.  The
             "normal" harness output will go to stdout rather than the
             harness.log.
  --to-stdout
             [For debugging] Have output go to stdout as well as the usual
             log files.  Note, this option has no (extra) effect with --dry-run.
  --schedule-chuck-size N
             Schedule at most N groups in a given run of Lintian.  If more than N
             groups need to be processed, harness will invoke Lintian more than
             once.  If N is 0, schedule all groups in one go.  (Default: 512)

Incremental mode is the default if you have a lintian.log;
otherwise, it's full.

Report bugs to <lintian-maint\@debian.org>.
END
    #'# for cperl-mode
    exit;
}

my $LOG_FD;
my %opt = ('schedule-chuck-size' => 512,);

my %opthash = (
    'i' => \$opt{'incremental-mode'},
    'c' => \$opt{'clean-mode'},
    'f' => \$opt{'full-mode'},
    'r' => \$opt{'reports-only'},
    'dry-run' => \$opt{'dry-run'},
    'schedule-chuck-size=i' => \$opt{'schedule-chuck-size'},
    'to-stdout' => \$opt{'to-stdout'},
    'help|h' => \&usage,
);

# init commandline parser
Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');

# process commandline options
GetOptions(%opthash)
  or die("error parsing options\n");

# clean implies full - do this as early as possible, so we can just
# check $opt{'full-mode'} rather than a full
#   ($opt{'clean-mode'} || $opt{'full-mode'})
$opt{'full-mode'} = 1 if $opt{'clean-mode'};

die "Cannot use both incremental and full/clean.\n"
  if $opt{'incremental-mode'} && $opt{'full-mode'};
die "Cannot use other modes with reports only.\n"
  if $opt{'reports-only'} && ($opt{'full-mode'} || $opt{'incremental-mode'});

# read configuration
require './config'; ## no critic (Modules::RequireBarewordIncludes)
use vars qw($LINTIAN_ROOT $LINTIAN_LAB $LINTIAN_ARCHIVEDIR $LINTIAN_DIST
  $LINTIAN_ARCH @EXTRA_LINTIAN_OPTIONS
  $LOG_DIR $statistics_file
  $HTML_DIR $HTML_TMP_DIR
  $LINTIAN_AREA);

# export Lintian's configuration
$ENV{'LINTIAN_LAB'} = $LINTIAN_LAB;
# delete LINTIAN_{CFG,ROOT} in case they are set.
delete($ENV{'LINTIAN_CFG'});
delete($ENV{'LINTIAN_ROOT'});

my $html_reports_cmd = "$LINTIAN_ROOT/reporting/html_reports";
my ($log_file, $list_file, $lintian_log, $html_reports_log)
  = map {"$LOG_DIR/$_" }
  qw(harness.log changed-packages.list lintian.log html_reports.log);
my $old_lintian_log = $lintian_log . '.old';
my @lintian_cmd = (
    "$LINTIAN_ROOT/frontend/lintian",
    @EXTRA_LINTIAN_OPTIONS,
    qw(--no-cfg -I -E --pedantic -v --show-overrides),
    qw(--exp-output=format=fullewi --packages-from-file -),
);

# import perl libraries
unshift @INC, "$LINTIAN_ROOT/lib";
require Lintian::Lab;
require Lintian::Lab::Manifest;
require Lintian::Processable;
require Lintian::Relation::Version;
import Lintian::Relation::Version qw(versions_lt);
require Lintian::Util;
import Lintian::Util qw(open_gz strip visit_dpkg_paragraph);

# turn file buffering off
STDOUT->autoflush;

unless ($opt{'dry-run'}) {
    # rotate log files
    system("savelog $log_file $html_reports_log >/dev/null") == 0
      or die "Cannot rotate log files.\n";

    # create new log file
    open($LOG_FD, '>', $log_file)
      or die "cannot open log file $log_file for writing: $!";
} else {
    $opt{'to-stdout'} = 0;
    open($LOG_FD, '>&', \*STDOUT)
      or die "Cannot open log file <stdout> for writing: $!";
    Log('Running in dry-run mode');
}
# From here on we can use Log() and Die().

my $LAB = Lintian::Lab->new($LINTIAN_LAB);

unless ($opt{'dry-run'}) {
    # purge the old packages
    $LAB->remove if $opt{'clean-mode'};

    $LAB->create({ 'mode' => 02775}) unless $LAB->exists;
} else {
    if (!$LAB->exists || $opt{'clean-mode'}) {
        # We either do not have a lab or we were asked to clean
        # the existing one.  We solve this by creating a temp
        # lab (which will be empty).  This means that A) the lab
        # will appear to be empty (as expected by clean-mode) and
        # B) that we do not have to do a dry-run check on every
        # "read-only" lab operation (we still have to guard write
        # operations).
        $LAB = Lintian::Lab->new;
        $LAB->create;
    }
}

if (!$opt{'reports-only'} && !$opt{'full-mode'} && !$opt{'incremental-mode'}) {
    # Nothing explicitly chosen, default to -i if the log is present,
    # otherwise -f.
    if (-f $lintian_log) {
        $opt{'incremental-mode'} = 1;
    } else {
        $opt{'full-mode'} = 1;
    }
}

unless ($opt{'reports-only'}) {
    $LAB->open;
    Log('Processing mirror index files');
    my @manifests = local_mirror_manifests(
        $LINTIAN_ARCHIVEDIR, [_trim_split($LINTIAN_DIST)],
        [_trim_split($LINTIAN_AREA)], [_trim_split($LINTIAN_ARCH)]);
    my @diffs = $LAB->generate_diffs(@manifests);
    my %skip = ();
    my @worklist;
    my %dirty_groups = ();
    # Remove old/stale packages from the lab
    foreach my $diff (@diffs) {
        my $type = $diff->type;
        my $old_manifest = $diff->olist;
        my $man = $diff->nlist;
        Log("Removing old or changed $type packages from the lab");
        foreach my $removed (@{ $diff->removed }, @{ $diff->changed }) {
            my ($pkg_name, $pkg_version, $pkg_arch) = @$removed;
            my $entry;
            my ($pkg_src, $pkg_src_version);
            my $sk = "$type:$pkg_name/$pkg_version";
            $sk .= "/$pkg_arch" if $pkg_arch;
            $skip{$sk} = 1; # For log-cleaning (incremental runs)
            if ($opt{'dry-run'}) {
                $entry = Lintian::Processable->new_from_metadata($type,
                    $old_manifest->get(@$removed));
            } else {
                $entry = $LAB->get_package($pkg_name, $type, $pkg_version,
                    $pkg_arch);
            }

            $pkg_src = $entry->pkg_src;
            $pkg_src_version = $entry->pkg_src_version;
            $dirty_groups{$pkg_src}->{$pkg_src_version} = 1;

            # Under dry-run, $entry is not a L::Lab::Entry, so kill it
            # so we don't try to use it as one later.
            $entry = undef if $opt{'dry-run'};
            if ($opt{'dry-run'} || $entry) {
                my $arch = '';
                $arch = " [$pkg_arch]" if $pkg_arch;
                if ($opt{'dry-run'} || $entry->remove) {
                    Log("Removed $type $pkg_name ($pkg_version)$arch");
                } else {
                    Log("Removing $type $pkg_name ($pkg_version)$arch failed."
                    );
                }
            }
        }
        Log("Adding new and changed $type packages to the lab");
        foreach my $added (@{ $diff->added }, @{ $diff->changed }) {
            my ($pkg_name, $pkg_version, $pkg_arch) = @$added;
            my $me = $man->get(@$added);
            my $entry;
            my $proc = Lintian::Processable->new_from_metadata($type, $me);
            my $pkg_src = $proc->pkg_src;
            my $pkg_src_version = $proc->pkg_src_version;

            $dirty_groups{$pkg_src}->{$pkg_src_version} = 1;

            unless ($opt{'dry-run'}) {
                $entry = $LAB->get_package($proc);
            }
            if ($opt{'dry-run'} || $entry) {
                my $ok = 0;
                my $arch = '';
                $arch = " [$pkg_arch]" if $pkg_arch;
                if ($opt{'dry-run'}) {
                    $ok = 1;
                } else {
                    eval {
                        $entry->create;
                        $entry->update_status_file
                          or die "creating status file: $!";
                        $ok = 1;
                    };
                }
                if ($ok) {
                    my $query = "$type:$pkg_name/$pkg_version";
                    $query .= "/$pkg_arch" if $pkg_arch;
                    Log("Added $type $pkg_name ($pkg_version)$arch");
                } else {
                    Log("Adding $type $pkg_name ($pkg_version)$arch failed: $@"
                    );
                }
            }
        }
    }

    # At this point %dirty_groups has queries for all dirty groups for
    # the incremental run.  Basically each group falls into 2 cases:
    #
    #  1. All entries in the group have been added/removed
    #     - Package added to or removed from suite
    #     - New version replacing an older version.
    #  2. Only part of the entries in a group has been added/removed:
    #     - binNMUs etc.
    #
    # If we just "blindly" run the packages from case 1, then binaries
    # from group 2 will not be procesed with their (full) group
    # (particularly without the source package).

    Log('Computing grouping...');

    if ($opt{'full-mode'}) {
        # for full run, just replace %dirty_groups with "<all groups in lab>".
        $LAB->visit_packages(
            sub {
                my ($entry) = @_;
                $dirty_groups{$entry->pkg_src}->{$entry->pkg_src_version} = 1;
            });
    }

    # Sort so that the worklist is also sorted (makes it easier to review).
    my $comparator = sub { versions_lt($a, $b) ? -1 : 1; };
    foreach my $gname (sort keys %dirty_groups) {
        my $dver = $dirty_groups{$gname};
        foreach my $gversion (sort $comparator keys %$dver) {
            my $query = "GROUP:${gname}/${gversion}";
            my @res = $LAB->lab_query($query);
            if (@res) {
                # Group is still in the Lab - reprocess it together
                push(@worklist, $query);
                foreach my $entry (@res) {
                    # Remove old log entry for all entries in this group
                    # NB: This part is unable to replace the update to %skip
                    # in the "foreach removed-entry in diff" above.  In case
                    # the entire group is removed, we will enter this loop,
                    # but we will see each of the entries removed in the
                    # loop above.
                    $skip{$entry->identifier} = 1;
                }
            }
        }
    }

    # Flushes the changed manifest to the file system - croaks on
    # error
    # - no need to check dry-run here as nothing changed and it frees
    #   memory to do this.
    # - in the (hopefully unlikely) case that dry-run is *buggy* and
    #   the lab actually was modified, then this will at least keep
    #   the lab metadata consistent with the actual contents.
    $LAB->close;
    # Throw away the lab to avoid it hogging memory.
    $LAB = undef;

    if ($opt{'incremental-mode'}) {
        # Extra work for the incremental run
        # - do this regardless of @worklist, since we could in theory
        #   have an "only removals"-run.  In that case, their entries
        #   should still be removed from the log.

        die "Old Lintian log file $lintian_log not found!\n"
          unless -f $lintian_log;

        # update lintian.log
        Log('Updating lintian.log...');
        my $nfd;
        if ($opt{'dry-run'}) {
            open $nfd, '>', '/dev/null'
              or Die("cannot open lintian.log /dev/null for writing: $!");
        } else {
            rename $lintian_log, $old_lintian_log
              or Die("cannot rename lintian.log to $old_lintian_log: $!");
            open $nfd, '>', $lintian_log
              or Die("cannot open lintian.log $lintian_log for writing: $!");
        }
        open my $ofd, '<', $old_lintian_log
          or
          Die("cannot open old lintian.log $old_lintian_log for reading: $!");
        my $copy_mode = 1;
        while (<$ofd>) {
            if (
                m/^N: [ ] Processing [ ] (binary|udeb|source) [ ]
                       package [ ] (\S+) [ ] \(version [ ] (\S+), [ ]
                       arch [ ] (\S+)\)[ ]\.\.\./oxsm
              ) {
                my ($type, $pkg, $ver, $arch) = ($1,$2, $3, $4);
                my $k = "$type:$pkg/$ver";
                $k .= "/$arch" if $type ne 'source';
                $copy_mode = 1;
                $copy_mode = 0 if exists $skip{$k};
            }
            if ($copy_mode) {
                print $nfd $_;
            }
        }
        print $nfd "N: ---end-of-old-lintian-log-file---\n";
        close $nfd or Die("Close $lintian_log: $!");
        close $ofd; # Ignore (read-only handle)
    }

    Log('');

    # Clear some variables we no longer need, so Perl can reclaim
    # the memory.
    %dirty_groups = %skip = @diffs = @manifests = ();

    if (@worklist) {
        my $scs = $opt{'schedule-chuck-size'};
        my $round = 0;
        my $rounds = 1;
        if ($opt{'full-mode'}) {
            # truncate in full-mode
            open(my $fd, '>', $lintian_log)
              or Die("open/truncate $lintian_log failed: $!");
            close($fd)
              or Die("close $lintian_log failed: $!");
        }

        if ($scs > 0) {
            # compute the number of rounds needed.
            $rounds = int((scalar @worklist + ($scs -1)) / $scs);
        }

        Log(
            sprintf(
                'Groups to process %d will take %d round(s) [round limit: %s]',
                scalar @worklist,
                $rounds, $scs > 0 ? $scs : 'none'
            ));

        Log('Command line used: ' . join(q{ }, @lintian_cmd));
        while (@worklist) {
            my $len = scalar @worklist;
            $round++;
            # correct bounds to fit chuck size
            $len = $scs if $scs > 0 and $len > $scs;

            Log("Running Lintian (round $round/$rounds) ...");
            Log(' - Range: ' . $worklist[0] . q{ ... } . $worklist[$len - 1]);

            if ($opt{'dry-run'}) {
                splice(@worklist, 0, $len);
                next;
            }

            my $pid = open(my $lintpipe, '|-') // Die("fork failed: $!");
            if (not $pid) {
                # child => reopen STD{OUT,ERR} and exec lintian
                open(STDOUT, '>>', $lintian_log)
                  or die("re-open STDOUT failed: $!");
                open(STDERR, '>&', *STDOUT)
                  or die("re-open STDERR failed: $!");
                exec(@lintian_cmd)
                  or die("exec @lintian_cmd failed: $!");
            }
            foreach my $query (splice(@worklist, 0, $len)) {
                # Technically, we emit these items in reverse order to Lintian,
                # but Lintian sorts it anyway
                print {$lintpipe} "!query: $query\n";
            }
            $! = 0;
            close $lintpipe;
            Die("Closing pipe failed: $!") if $!;
            if ($?) {
                # exit 1 (policy violations) happens all the time (sadly)
                # exit 2 (broken packages) also happens all the time...
                my $res = ($? >> 8) & 0xff;
                my $sig = $? & 0xff;
                if ($res != 1 and $res != 0) {
                    Log("warning: executing lintian returned $res");
                } elsif ($sig) {
                    Log("Lintian terminated by signal: $sig");
                    # If someone is sending us signals (e.g. SIGINT/Ctrl-C)
                    # don't start the next round.
                    #  - 35k / 512 => 700 rounds (or 700 SIGINTs to stop...)
                    Log(' - skipping the rest of the worklist');
                    @worklist = ();
                }
            }
        }
    } else {
        Log('Skipping Lintian run - nothing to do...');
    }
}

# create html reports
Log('Creating HTML reports...');
run("$html_reports_cmd $lintian_log >$html_reports_log 2>&1")
  or
  Log("warning: executing $html_reports_cmd returned " . (($? >> 8) & 0xff));
Log('');

# rotate the statistics file updated by $html_reports_cmd
if (!$opt{'dry-run'} && -f $statistics_file) {
    system("cp $statistics_file $LOG_DIR/stats/statistics-`date +%Y%m%d`") == 0
      or Log('warning: could not rotate the statistics file');
}

# install new html directory
Log('Installing HTML reports...');
unless ($opt{'dry-run'}) {
    system("rm -rf $HTML_DIR") == 0
      or Die("error removing $HTML_DIR");
    # a tiny bit of race right here
    rename($HTML_TMP_DIR,$HTML_DIR)
      or Die("error renaming $HTML_TMP_DIR into $HTML_DIR");
}
Log('');

# ready!!! :-)
Log('All done.');
exit 0;

# -------------------------------

sub Log {
    print {$LOG_FD} $_[0],"\n";
    print $_[0], "\n" if $opt{'to-stdout'};
    return;
}

sub run {
    Log("Executing $_[0]");
    return 1 if $opt{'dry-run'};
    return (system($_[0]) == 0);
}

sub Die {
    Log("fatal error: $_[0]");
    exit 1;
}

sub _trim_split {
    my ($val) = @_;
    return () unless $val;
    return split m/\s*+,\s*+/o, strip($val);
}

# local_mirror_manifests ($mirdir, $dists, $areas, $archs)
#
# Returns a list of manifests that represents what is on the local mirror
# at $mirdir.  3 manifests will be returned, one for "source", one for "binary"
# and one for "udeb" packages.  They are populated based on the "Sources" and
# "Packages" files.
#
# $mirdir - the path to the local mirror
# $dists  - listref of dists to consider (i.e. ['unstable'])
# $areas  - listref of areas to consider (i.e. ['main', 'contrib', 'non-free'])
# $archs  - listref of archs to consider (i.e. ['i386', 'amd64'])
#
sub local_mirror_manifests {
    my ($mirdir, $dists, $areas, $archs) = @_;
    my $active_srcs = {};
    my $srcman = Lintian::Lab::Manifest->new('source');
    my $binman = Lintian::Lab::Manifest->new('binary');
    my $udebman = Lintian::Lab::Manifest->new('udeb');
    foreach my $dist (@$dists) {
        foreach my $area (@$areas) {
            my $srcs = "$mirdir/dists/$dist/$area/source/Sources";
            my $srcfd;
            my $srcsub;
            # Binaries have a "per arch" file.
            # - we check those first and then include the source packages that
            #   are referred to by these binaries.
            foreach my $arch (@$archs) {
                my $pkgs = "$mirdir/dists/$dist/$area/binary-$arch/Packages";
                my $upkgs = "$mirdir/dists/$dist/$area/debian-installer/"
                  ."binary-$arch/Packages";
                my $pkgfd = _open_data_file($pkgs);
                my $binsub = sub {
                    _parse_pkgs_pg($active_srcs, $binman, $mirdir, $area, @_);
                };
                my $upkgfd;
                my $udebsub = sub {
                    _parse_pkgs_pg($active_srcs, $udebman, $mirdir, $area, @_);
                };
                visit_dpkg_paragraph($binsub, $pkgfd);
                close($pkgfd);
                $upkgfd = _open_data_file($upkgs);
                visit_dpkg_paragraph($udebsub, $upkgfd);
                close($upkgfd);
            }
            $srcfd = _open_data_file($srcs);
            $srcsub
              = sub { _parse_srcs_pg($active_srcs, $srcman, $mirdir, $area, @_) };
            visit_dpkg_paragraph($srcsub, $srcfd);
            close $srcfd;
        }
    }
    return ($srcman, $binman, $udebman);
}

# _open_data_file ($file)
#
# Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens
# that instead.  It may pipe the file through a external decompressor, so the returned
# file descriptor cannot be assumed to be a file.
#
# If $file does not exists and no common extensions are found, this dies.  It may also
# die if it finds a file, but is unable to open it.
sub _open_data_file {
    my ($file) = @_;
    if (-e $file) {
        open my $fd, '<', $file or Die "opening $file: $!";
        return $fd;
    }
    if (-e "$file.gz") {
        return open_gz("$file.gz");
    }
    Die "Cannot find $file";
}

# Helper for local_mirror_manifests - it parses a paragraph from Packages file
sub _parse_pkgs_pg {
    my ($active_srcs, $manifest, $mirdir, $area, $data) = @_;
    my $ts = 0;
    my $s;
    unless ($data->{'source'}) {
        $data->{'source'} = $data->{'package'};
    } elsif ($data->{'source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) {
        $data->{'source'} = $1;
        $data->{'source-version'} = $2;
    } else {
        $data->{'source-version'} = $data->{'version'};
    }
    unless (defined $data->{'source-version'}) {
        $data->{'source-version'} = $data->{'version'};
    }
    $s = $data->{'source'} . '/' . $data->{'source-version'};
    $active_srcs->{$s}++;
    $data->{'file'} = $mirdir . '/' . $data->{'filename'};
    $data->{'area'} = $area;
    # $manifest strips redundant fields for us.  But for clarity and to
    # avoid "hard to debug" cases $manifest renames the fields, we explicitly
    # remove the "filename" field.
    delete $data->{'filename'};

    if (my @stat = stat $data->{'file'}) {
        $ts = $stat[9];
    }
    $data->{'timestamp'} = $ts;

    $manifest->set($data);
    return;
}

# Helper for local_mirror_manifests - it parses a paragraph from Sources file
sub _parse_srcs_pg {
    my ($active_srcs, $manifest, $mirdir, $area, $data) = @_;
    my $ts = 0;
    my $dir = $data->{'directory'}//'';
    my $s = $data->{'package'} . '/' . $data->{'version'};
    # only include the source if it has any binaries to be checked.
    # - Otherwise we may end up checking a source with no binaries
    #   (happens if the architecture is "behind" in building)
    return unless $active_srcs->{$s};
    $dir .= '/' if $dir;
    foreach my $f (split m/\n/, $data->{'files'}) {
        strip($f);
        next unless $f && $f =~ m/\.dsc$/;
        my (undef, undef, $file) = split m/\s++/, $f;
        # $dir should end with a slash if it is non-empty.
        $data->{'file'} = $mirdir . "/$dir" . $file;
        last;
    }
    $data->{'area'} = $area;
    # Rename a field :)
    $data->{'source'} = $data->{'package'};

    if (my @stat = stat $data->{'file'}) {
        $ts = $stat[9];
    }
    $data->{'timestamp'} = $ts;

    # $manifest strips redundant fields for us.
    $manifest->set($data);
    return;
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
