# source-copyright-file -- lintian check script -*- perl -*-

# Copyright (C) 2011 Jakub Wilk
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::source_copyright;

use strict;
use warnings;

use Lintian::Relation::Version qw(versions_compare);
use Lintian::Tags qw(tag);
use Util;

my $dep5_last_normative_change = '0+svn~166';
my $dep5_last_overhaul = '0+svn~148';
my %dep5_renamed_fields = (
    'format-specification' => 'format',
    'maintainer' => 'upstream-contact',
    'upstream-maintainer' => 'upstream-contact',
    'contact' => 'upstream-contact',
    'name' => 'upstream-name',
);

sub run {
 
my ($pkg, $type, $info) = @_;

my $copyright_filename = $info->debfiles('copyright');

if (-l $copyright_filename) {
    tag 'debian-copyright-is-symlink';
    return;
}

if (not -f $copyright_filename) {
    my $pkgs = $info->binaries;
    tag 'no-debian-copyright';
    $copyright_filename = undef;
    if (keys(%$pkgs) == 1) {
	# If debian/copyright doesn't exist, and the only a single binary
	# package is built, there's a good chance that the copyright file is
	# available as debian/<pkgname>.copyright.
	$copyright_filename = $info->debfiles((keys(%$pkgs))[0] . '.copyright');
	if (not -f $copyright_filename or -l $copyright_filename) {
	    $copyright_filename = undef;
	}
    }
}

return unless defined $copyright_filename;

$_ = slurp_entire_file($copyright_filename);
study $_;

my @dep5;

if (m{
    (^ | \n)
    (?i: format(:|[-\s]spec) )
    (?: . | \n\s+ )*
    (?: /dep[5s]?\b | \bDEP-?5\b | [Mm]achine-readable\s(?:license|copyright) | /copyright-format/ | CopyrightFormat | VERSIONED_FORMAT_URL )
}x)
{
    # Before trying to parse the copyright as Debian control file, try to
    # determine the format URI.
    my $first_para = $_;
    $first_para =~ s,^#.*,,mg;
    $first_para =~ s,[ \t]+$,,mg;
    $first_para =~ s,^\n+,,g;
    $first_para =~ s,\n\n.*,\n,s; #;; hi emacs
    $first_para =~ s,\n?[ \t]+, ,g;
    $first_para =~ m,^Format(?:-Specification)?:\s*(.*),mi;
    my $uri = $1;
    $uri =~ s/^([^#\s]+)#/$1/ if defined $uri; # strip fragment identifier
    if (defined $uri) {
	my $original_uri = $uri;
	my $version;
	if ($uri =~ m,\b(?:rev=REVISION|VERSIONED_FORMAT_URL)\b,) {
	    tag 'boilerplate-copyright-format-uri', $uri;
	}
	elsif ($uri =~ s,http://wiki\.debian\.org/Proposals/CopyrightFormat\b,,) {
	    $version = '0~wiki';
	    $uri =~ m,^\?action=recall&rev=(\d+)$, and $version = "$version~$1";
	}
	elsif ($uri =~ m,^http://dep\.debian\.net/deps/dep5/?$,) {
	    $version = '0+svn';
	}
	elsif ($uri =~ s,^http://svn\.debian\.org/wsvn/dep/web/deps/dep5\.mdwn\b,,) {
	    $version = '0+svn';
	    $uri =~ m,^\?(?:\S+&)?rev=(\d+)(?:&\S+)?$, and $version = "$version~$1";
	}
	elsif ($uri =~ s,^http://anonscm\.debian\.org/viewvc/dep/web/deps/dep5\.mdwn\b,,) {
	    $version = '0+svn';
	    $uri =~ m,^\?(?:\S+&)?(?:pathrev|revision)=(\d+)(?:&\S+)?$, and $version = "$version~$1";
	}
	elsif ($uri =~ m,^http://www\.debian\.org/doc/(?:packaging-manuals/)?copyright-format/(\d+\.\d+)$,) {
	    $version = $1;
	}
	else {
	    tag 'unknown-copyright-format-uri', $original_uri;
	}
	if (defined $version) {
	    if ($version =~ m,wiki,) {	
		    tag 'wiki-copyright-format-uri', $original_uri;
	    }
	    elsif ($version =~ m,svn$,) {
		tag 'unversioned-copyright-format-uri', $original_uri;
	    }
	    elsif (versions_compare $version, '<<', $dep5_last_normative_change) {
		tag 'out-of-date-copyright-format-uri', $original_uri;
	    }
	    if (versions_compare $version, '>=', $dep5_last_overhaul) {
		# We are reasonably certain that we're dealing with an up-to-date
		# DEP-5 format. Let's try to do more strict checks.
		eval {
		    @dep5 = read_dpkg_control($copyright_filename);
		};
		if ($@) {
		    $@ =~ s/^internal error: //;
		    tag 'syntax-error-in-dep5-copyright', $@;
		}
	    }
	}
    }
    else {
	tag 'unknown-copyright-format-uri';
    }
}

if (@dep5) {
    my $first_para = shift @dep5;
    my %standalone_licenses;
    my %required_standalone_licenses;
    for my $field (keys %{$first_para}) {
	my $renamed_to = $dep5_renamed_fields{$field};
	if (defined $renamed_to) {
	    tag 'obsolete-field-in-dep5-copyright', $field, $renamed_to
	}
    }
    if (not defined $first_para->{'format'} and not defined $first_para->{'format-specification'}) {
	tag 'missing-field-in-dep5-copyright', 'paragraph', '1', 'format';
    }
    for my $license (split_licenses($first_para->{'license'})) {
	$required_standalone_licenses{$license} = 1;
    }
    my $commas_in_files = 0;
    my $i = 1;
    for my $para (@dep5) {
	$i++;
	my $license = $para->{'license'};
	my $files = $para->{'files'};
	if (defined $license and not defined $files) {
	    # Standalone license paragraph
	    if (not $license =~ m/\n/) {
		tag 'missing-license-text-in-dep5-copyright', 'paragraph', $i, "\L$license";
	    }
	    ($license, undef) = split /\n/, $license, 2;
	    for (split_licenses($license)) {
		$standalone_licenses{$_} = $i;
	    }
	}
	elsif (defined $files) {
	    # Files paragraph
	    $commas_in_files = 1 if $files =~ /,/;
	    if (defined $license) {
		for (split_licenses($license)) {
		    $required_standalone_licenses{$_} = $i;
		}
	    }
	    else {
		tag 'missing-field-in-dep5-copyright', 'paragraph', $i, 'license';
	    }
	    if (not defined $para->{'copyright'}) {
		tag 'missing-field-in-dep5-copyright', 'paragraph', $i, 'copyright';
	    }
	}
	else {
	    tag 'unknown-paragraph-in-dep5-copyright', 'paragraph', $i;
	}
    }
    if ($commas_in_files) {
	tag 'comma-separated-files-in-dep5-copyright'
	    unless grep(/,/, $info->sorted_index);
    }
    while ((my $license, $i) = each %required_standalone_licenses) {
	if (not defined $standalone_licenses{$license}) {
	    tag 'missing-license-paragraph-in-dep5-copyright', 'paragraph', $i, $license;
	}
    }
    while ((my $license, $i) = each %standalone_licenses) {
	if (not defined $required_standalone_licenses{$license}) {
	    tag 'unused-license-paragraph-in-dep5-copyright', 'paragraph', $i, $license;
	}
    }
}

}

sub split_licenses {
    my ($_) = @_;
    return () unless defined;
    return () if /\n/;
    s/[(),]//;
    return map "\L$_", (split /\s++(?:and|or)\s++/);
}

1;

# Local Variables:
# indent-tabs-mode: t
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sw=4 ts=8 noet shiftround
