#!/usr/bin/perl

=head1 NAME

 apt-cacher2 - WWW proxy optimized for use with APT

 Copyright (C) 2005 Eduard Bloch <blade@debian.org>
 Distributed under the terms of the GNU Public Licence (GPL).

=head1 SYNOPSIS

 ./setup.pl /home/me/cache
 edit /etc/apt/sources.list (use sources like deb http://proxy:3142/archiveserver/debian ...)
 apt-get update
 apt-get -u upgrade

=head1 DESCRIPTION

If you have two or more Debian GNU/Linux machines on a fast local
network and you wish to upgrade packages from the Internet, you
don't want to download every package several times.

apt-cacher2 is a tiny HTTP proxy that keeps a cache on disk of Debian
binary/source packages and meta files which have been received from Debian
distribution servers on the Internet. When an apt-get client issues
a request for a file to apt-cacher2, if the file is already on disk
it is served to the client immediately, otherwise it is fetched from the
Internet and served to the client while a copy is beeing stored on the disk.
This means that several Debian machines can be upgraded but each package needs
to be downloaded only once.

apt-cacher2 is a rewrite of the original apt-cacher.pl CGI script, keeping
compatibility in mind. The cached data can be shared by the both
implementations, while apt-cacher2 providers better performance and less server
load.

=head1 INSTALLATION

Assuming your cache server is called B<www.myserver.com>
and your cache directory is called B</home/me/cache>, then:

1. Edit apt-cacher.conf to customize your settings

2. Run apt-cacher2

=cut
# ----------------------------------------------------------------------------

use strict;
#use warnings;

use Fcntl ':flock';
use POSIX;

use LWP::UserAgent;
use IO::Socket::INET;
use HTTP::Response;

use Time::HiRes qw( sleep gettimeofday tv_interval );


my @index_files = (
	'Packages.gz',
	'Packages.bz2',
	'Release',
	'Release.gpg',
	'Sources.gz',
	'Sources.bz2',
	'Contents-.+\.gz',
);
my $index_files_regexp = '(' . join('|', @index_files) . ')$';


# Include the library for the config file parser
require '/usr/share/apt-cacher/apt-cacher-lib.pl';
require '/etc/apt-cacher/checksumming.conf';


# Set some defaults
my $version='0.1';
my $configfile_default = '/etc/apt-cacher/apt-cacher.conf';
my $daemon_port_default=3142;
my $client="local";

# Read in the config file and set the necessary variables
my $configfile = $configfile_default;

my $direct_mode; # defines using STDIN/STDOUT
my $inetd_mode; # no security checks
my $cgi_mode;
my $cgi_path;

my $cfg;

my $pidfile;
my @extraconfig;

my $chroot;
my $retnum;
my $do_fork_away;

# this script needs to be executed trough a CGI wrapper setting a flag variable
if($ENV{CGI_MODE})
{
    # yahoo, back to the roots, assume beeing in CGI mode
    $cgi_mode=1;
    $direct_mode=1;
    # pick up the URL
    $cgi_path=$ENV{PATH_INFO} if ! $cgi_path;
    $cgi_path=$ENV{QUERY_STRING} if ! $cgi_path;
    $cgi_path="/" if ! $cgi_path; # set an invalid path to display infos below
}
else {
    while(scalar @ARGV) {

        my $arg=shift(@ARGV);

        if($arg eq "-c") {
            $configfile=shift(@ARGV);
            die "$configfile unreadable" if ! -r $configfile;
        }
        elsif($arg eq "-r") {
            $chroot=shift(@ARGV);
            die "No such directory: $chroot\n" if ! -d $chroot;
        }
        elsif($arg eq "-R") {
            $retnum=shift(@ARGV);
        }
        elsif($arg eq "-i") {
            $inetd_mode=1;
            $direct_mode=1;
        }
        elsif($arg eq "-d") {
            $do_fork_away=1;
        }
        elsif($arg eq "-p") {
            $pidfile=shift(@ARGV);
        }
        elsif($arg=~/(\S+)=(\S+)/) {
            push(@extraconfig, $1, $2);
        }
        elsif($arg eq "-h" || $arg eq "--help") {
            print <<EOM;
USAGE: $0 <options> <override(s)>
Options:

-c configfile   Custom config file (default: $configfile_default)
-i              Inetd mode, STDIN and STDOUT are used for service
(default: standalone server mode)
-d              become a background daemon

Advanced options (root only):
-r directory    (experimental option) 
                path to chroot to after reading the config and opening the log
                files. cache directory setting must be relative to the new root.
                WARNING: log files should be created before and be owned by tne
                effective user/group if -g or -u are used
-p pidfile      write the server process ID into this file

Overrides:     override config variables (see config file), eg. daemon_port=9999

EOM
            exit(0);
        }
        else {
            die "Unknown parameter $arg\n";
        }
    }
}

eval {
        $cfg = read_config($configfile);
};

# not sure what to do if we can't read the config file...
die "Could not read config file: $@" if $@;

# Now set some things from the config file
# $logfile used to be set in the config file: now we derive it from $logdir
$$cfg{logfile} = "$$cfg{logdir}/access.log";

# $errorfile used to be set in the config file: now we derive it from $logdir
$$cfg{errorfile} = "$$cfg{logdir}/error.log";

$$cfg{fetch_timeout}=300; # five minutes from now

my $private_dir = "$$cfg{cache_dir}/private";
define_global_lockfile("$private_dir/exlock");

# override config values with the user-specified parameters
while(@extraconfig) { 
    my $k=shift(@extraconfig);
    my $v=shift(@extraconfig); 
    $$cfg{$k}=$v;
}


my ($aclog_fh, $erlog_fh);
#FIXME: genauer die Scopes betrachten
my ($path, $filename, $new_filename, $con, $source);

my %pathmap;

if($$cfg{path_map}) {
    for(split(/\s*;\s*/, $$cfg{path_map})) {
        my @tmp = split(/\s+/, $_);
        # must have at least one path and target
        next if ($#tmp < 1);
        my $key=shift(@tmp);
        $pathmap{$key}=[@tmp];
    }
}

#optional checksumming support
db_init("$$cfg{cache_dir}/md5sums.sl3");


# Output data as soon as we print it
$| = 1;

# Function prototypes
sub ipv4_addr_in_list ($$);
sub ipv6_addr_in_list ($$);
sub get_abort_time ();

# ----------------------------------------------------------------------------
# Die if we have not been configured correctly
die "$0: No cache_dir directory!\n" if (!-d $$cfg{cache_dir});
die "$0: No cache_dir/private directory!\n" if (!-d $private_dir);

# ----------------------------------------------------------------------------
# Data shared between functions

my $cached_file;
my $cached_head;
my $complete_file;
my $notify_file;

my $do_import=0;

my $ua;
my $daemon;
my $server_pid;
my $fetcher_pid;
my %childPids;
my $terminating;

sub term_handler {
    $terminating=1;

    # close all connections or shutdown the server if parent and kill 
    debug_message("received SIGTERM, terminating");
    $con->close if defined($con);

    # stop all children
    for(keys %childPids) { 
        &debug_message("killing subprocess: $_"); 
        kill 15, $_;
    };

    if($server_pid && $server_pid == $$) {
        $daemon->shutdown(2);
    }
    exit 0;
};

# broken, kills unrelated processes. Not using for now.
# perlipc(1)
# also remove them from the to-be-killed list
sub reap_children {
    my $child;
    while (($child = waitpid(-1,WNOHANG)) > 0) {
        delete $childPids{$child};
    }
    $SIG{CHLD} = \&reap_children;  # still loathe sysV

}
$SIG{CHLD} = \&reap_children;
#$SIG{CHLD} = 'IGNORE';
$SIG{'TERM'} = \&term_handler;

my $getBufLen=10000;
my $maxspeed;

my ($chfd, $pkfd);

# for rate limit support
if($$cfg{limit}>0) {
    $maxspeed = $$cfg{limit}*1024;
    $getBufLen = $maxspeed/20; # 20 portions per second should be enough
}

sub setup_agent {

   return if(defined($ua));

   $ua=LWP::UserAgent->new('keep_alive' => 1);

   # Check whether a proxy is to be used, and set the appropriate environment variable
   my $proxystring;
   if ( $$cfg{use_proxy} eq 1 && $$cfg{http_proxy}) {
       $proxystring="http://";
       if ( $$cfg{use_proxy_auth} eq 1) {
           $proxystring.=$$cfg{http_proxy_auth}.'@';
       }
       $proxystring.=$$cfg{http_proxy};
   }
   $ua->proxy("http", $proxystring) if $proxystring;
}





# BEGINN MAIN PART

if($cgi_mode && defined($$cfg{cgi_advise_to_use}) && $$cfg{cgi_advise_to_use}) {
    print "Status: 410 $$cfg{cgi_advise_to_use}\r\n\r\n";
    exit 0;
}

if($direct_mode) {
    &open_log_files;
    $client = "INETD" if $inetd_mode;

    # get the string if available even in inetd / direct mode so local calles can
    # identify themselves in the logs.
    $client=$ENV{REMOTE_ADDR} if exists $ENV{REMOTE_ADDR};

    &handle_connection;
    exit 0;
}

my %daemonopts = (LocalPort => $$cfg{daemon_port}, Proto => 'tcp', Listen => 1);
$daemonopts{LocalAddr}=$$cfg{daemon_addr} if(defined($$cfg{daemon_addr}));

while(1) {
    $daemon = IO::Socket::INET->new(%daemonopts);
    last if $daemon;
    $retnum--;
    last if($retnum<=0);
    print STDERR "Unable to bind socket (port $$cfg{daemon_port}), trying again in 5 seconds.\n";
    sleep 5;
}
die "Unable to bind socket (port $$cfg{daemon_port}), $0 not started.\n" if ! $daemon;

$server_pid=$$;

if($do_fork_away) {
    my $pid = fork();
    if ($pid < 0) {
        barf("fork() failed");
    }
    if ($pid > 0) {
        # parent
        exit 0;
    }
}

# STATE: Port open, still beeing root. Create pidfiles, logfiles, then su
# 
if($pidfile) {
    open(my $fh, ">$pidfile");
    print $fh $$;
    close($fh);
}

my $uid=$$cfg{user};
my $gid=$$cfg{group};

if($chroot) {
    if($uid || $gid) {
        # open them now, before it is too late
        &open_log_files;
    }
    chroot $chroot;
    chdir $chroot;
}

if($uid) {
    if($uid=~/^\d+$/) {
        my $name=getpwuid($uid);
        die "Unknown user ID: $uid (exiting)\n" if !$name;
    }
    else {
        $uid=getpwnam($uid);
        die "No such user (exiting)\n" if !$uid;
    }
    setuid($uid) || barf("Unable to change user id");
}

if($gid) {
    if($gid=~/^\d+$/) {
        my $name=getgrgid($gid);
        die "Unknown user ID: $gid (exiting)\n" if !$name;
    }
    else {
        $gid=getgrnam($gid);
        die "No such group (exiting)\n" if !$gid;
    }
    setuid($gid) || barf("Unable to change group id");
}

&open_log_files;

# State: READY
# That is the working condition (daemon mode)

while (1)
{
    my $newcon = $daemon->accept;
    # we don't stop, only by term_handler since the accept method is unreliable
    next if(!$newcon);
    last if $terminating;

    $client = $newcon->peerhost;
    debug_message("Connection from $client");

    my $pid = fork();
    if ($pid < 0) {
        barf("fork() failed");
    }

    if ($pid > 0) {
        # parent
        debug_message("registred child process: $pid");
        $childPids{$pid}=1;
        next;
    }
    # child
    undef %childPids;

    &handle_connection($newcon);
    exit (0);

}
exit 0;
# exit from the daemon loop



sub handle_connection {
    # now begin connection's personal stuff
    debug_message("New HTTP connection open");
    
    if($direct_mode) {
        # beeing in forced mode, ie. manual call
        $source=*STDIN;
        $con = *STDOUT;
    }
    else {

        # serving a network client
        
        $con = shift;
        $source = $con;
    }
    

    if(!$inetd_mode) {
        # ----------------------------------------------------------------------------
        # Let's do some security checking. We only want to respond to clients within an
        # authorised address range (127.0.0.1 and ::1 are always allowed).

        my $ip_pass = 1;
        my $ip_fail = 0;
        my $clientaddr;

        # allowed_hosts == '*' means allow all ('' means deny all)
        # denied_hosts == '' means don't explicitly deny any
        # localhost is always accepted
        # otherwise host must be in allowed list and not in denied list to be accepted

        if ($client =~ /:/) # IPv6?
        {
            defined ($clientaddr = ipv6_normalise ($client)) or goto badaddr;
            if (substr ($clientaddr, 0, 12) eq "\0\0\0\0\0\0\0\0\0\0\xFF\xFF")
            {
                $clientaddr = substr ($clientaddr, 12);
                goto is_ipv4;
            }
            elsif ($clientaddr eq "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1")
            {
                debug_message("client is localhost");
            }
            else
            {
                $ip_pass = ($$cfg{allowed_hosts_6} =~ /^\*?$/) ||
                ipv6_addr_in_list ($clientaddr, 'allowed_hosts_6');
                $ip_fail = ipv6_addr_in_list ($clientaddr, 'denied_hosts_6');
            }
        }
        elsif (defined ($clientaddr = ipv4_normalise ($client))) # IPv4?
        {
            is_ipv4:
            if ($clientaddr eq "\x7F\0\0\1")
            {
                debug_message("client is localhost");
            }
            else
            {
                $ip_pass = ($$cfg{allowed_hosts} =~ /^\*?$/) ||
                ipv4_addr_in_list ($clientaddr, 'allowed_hosts');
                $ip_fail = ipv4_addr_in_list ($clientaddr, 'denied_hosts');
            }
        }
        else
        {
            goto badaddr;
        }

        # Now check if the client address falls within this range
        if ($ip_pass && !$ip_fail)
        {
            # Everything's cool, client is in allowed range
            debug_message("Client $client passed access control rules");
        }
#        elsif($client eq "local")
#        {
#            # Everything's cool, client is in allowed range
#            debug_message("Client $client passed access control rules");
#        }
        else
        {
            # Bzzzt, client is outside allowed range. Send 'em a 403 and bail.
            badaddr:
            debug_message("Alert: client $client disallowed by access control");

            &sendrsp(403, "Access to cache prohibited");
            exit(4);
        }

    }

    my $testpath; # temporary, to be set by GET lines, undef on GO
    my $ifmosince;# to be undef by new GET lines
    my $send_head_only=0; # to be undef by new GET lines
    my $tolerated_empty_lines=20;
    my $concloseflag;

    REQUEST:
    while(!$concloseflag) {

        debug_message("Processing a new request line");

        $_=&getRequestLine;

        exit if !defined($_);
        
        if(/^$/) {
            if(defined($testpath)) {
                $path=$testpath;
                undef $testpath;
            }
            elsif(!$tolerated_empty_lines)   {
                &sendrsp(403, "Go away");
                exit(4);
            }
            else {
                $tolerated_empty_lines--;
                next REQUEST;
            }
        }
        else {
            if(/^(GET|HEAD)\s+(\S+)/) {

                if(defined($testpath)) {
                    &sendrsp(403, "Confusing request");
                    exit(4);
                }

                $testpath=$2;

                undef $ifmosince; # will come after GET
                undef $send_head_only;

                # also support pure HEAD calls
                if($1 eq 'HEAD') {
                    $send_head_only=1;
                }

            }
            elsif(/^Connection: close\s+(.*)/i) {
                $concloseflag=1;
            }
            elsif(/^If-Modified-Since:\s+(.*)/i) {
                $ifmosince=$1;
            }
            elsif(/^\S+: [^:]*/) {
                # whatever
            }
            else {
                &sendrsp(403, "Could not understand $_");
                exit(4);
            }

            next REQUEST;
        }

        # tolerate CGI specific junk and two slashes in the beginning
        $path =~ s!^/apt-cacher\??/!/!;
        $path =~ s!^//!/!;

        # Now parse the path
        if ($path =~ /^\/?report/) {
            usage_report();
            exit(0);
        }

        if ($path !~ m(^/?.+/.+)) {
            usage_error();
        }

        REPARSE:
        
        my($host,$uri) = ($path =~ m#^/?([^/]+)(/.+)#);
        
        if ( !$host || !$uri ) {
            usage_error();
        }

        ($filename) = ($uri =~ /\/?([^\/]+)$/);

        if($$cfg{allowed_locations}) {
            #         debug_message("Doing location check for ".$$cfg{allowed_locations} );
            my $mess;
            my $cleanuri=$uri;
            $cleanuri=~s!/[^/]+/[\.]{2}/!/!g;
            if ($host eq ".." ) {
                $mess = "'..' contained in the hostname";
            }
            elsif ($cleanuri =~/\/\.\./) {
                $mess = "File outside of the allowed path";
            }
            else {
                for( split(/\s*,\s*/,$$cfg{allowed_locations}) ) {
                    debug_message("Testing URI: $host$cleanuri on $_");
                    goto location_allowed if ("$host$cleanuri" =~ /^$_/);
                }
                $mess = "Host '$host' is not configured in the allowed_locations directive";
            }
            badguy:
            debug_message("$mess; access denied");
            &sendrsp(403, "Access to cache prohibited, $mess");
            exit(4);
        }
        location_allowed:

        $do_import=0;

        if ($filename =~ /(\.deb|\.rpm|\.dsc|\.tar\.gz|\.diff\.gz|\.udeb)$/) {
            # We must be fetching a .deb or a .rpm, so let's cache it.
            # Place the file in the cache with just its basename
            $new_filename = $filename;
            debug_message("new filename with just basename: $new_filename");
        } elsif ($filename =~ /$index_files_regexp/) {
            # It's a Packages.gz or related file: make a long filename so we can cache these files without
            # the names colliding
            $new_filename = "$host$uri";
            $new_filename =~ s/\//_/g;
            debug_message("new long filename: $new_filename");
            # optional checksumming support
            if ($filename =~ /(Packages|Sources)/) {
                # warning, an attacker could poison the checksum cache easily
                $do_import=1;
            }
        } else {
            # Maybe someone's trying to use us as a general purpose proxy / relay.
            # Let's stomp on that now.
            debug_message("Sorry, not allowed to fetch that type of file: $filename");
            &sendrsp(403, "Sorry, not allowed to fetch that type of file: $filename");
            exit(4);
        }

        $cached_file = "$$cfg{cache_dir}/packages/$new_filename";
        $cached_head = "$$cfg{cache_dir}/headers/$new_filename";
        $complete_file = "$private_dir/$new_filename.complete";
        $notify_file = "$private_dir/$new_filename.notify";

        my $force_download=0;

        my $cache_status;

        debug_message("looking for $cached_file");

        if ($filename =~ /$index_files_regexp/) {
            debug_message("known as index file: $filename");
            if (-f $cached_file && -f $cached_head) {
                if($$cfg{expire_hours} > 0) {
                    my $now = time();
                    my @stat = stat($cached_file);
                    if (@stat && int(($now - $stat[9])/3600) > $$cfg{expire_hours}) {
                        debug_message("unlinking $new_filename because it is too old");
                        # Set the status to EXPIRED so the log file can show it was downloaded again
                        $cache_status = "EXPIRED";
                        debug_message("$cache_status");
                        $force_download=1;
                    }
                }
                else {
                    # use HTTP timestamping
                    my ($oldhead, $testfile, $newhead);
                    my $response = &ua_act(1, $host, $uri);
                    #my $response = $ua->head("http://$host$uri");
                    $newhead = $response->header("Last-Modified");
                    if($newhead && open($testfile, $cached_head)) {
                        
                        $newhead =~ s/\n|\r//g;

                        for(<$testfile>){
                            if(/^.*Last-Modified:\s(.*)(\r|\n)/) {
                                $oldhead = $1;
                                last
                            }
                        }
                        close($testfile);
                    }
                    if($oldhead && ($oldhead eq $newhead) ) {
                        # that's ok
                        debug_message("remote file not changed, $oldhead vs. $newhead");
                    }
                    else {
                        debug_message("unlinking $new_filename because it differs from server's version");
                        $cache_status = "EXPIRED";
                        debug_message("$cache_status");
                        $force_download=1;
                    }
                }
            }
        }

        # handle if-modified-since in a better way (check the equality of
        # the time stamps). Do only if download not forced above.

        if($ifmosince && !$force_download) {
            $ifmosince=~s/\n|\r//g;

            my $oldhead;
            if(open(my $testfile, $cached_head)) {
                LINE: for(<$testfile>){
                    if(/^.*Last-Modified:\s(.*)(\r|\n)/) {
                        $oldhead = $1;
                        last LINE;
                    }
                }
                close($testfile);
            }

            if($oldhead && $ifmosince eq $oldhead) {
                &sendrsp(304, "Not Modified");
                debug_message("File not changed: $ifmosince");
                next REQUEST;
            }
        }

        &set_global_lock(": file download decission"); # file state decissions, lock that area

        my $fromfile; # handle for the reader

        # download or not decission. Also releases the global lock
        dl_check:
        if( !$force_download && -e $cached_head && -e $cached_file) {
            if (-f $complete_file) {
                # not much to do if complete
                $cache_status = "HIT";
                debug_message("$cache_status");
            }
            else {
                # a fetcher was either not successfull or is still running
                # look for activity...
                sysopen($fromfile, $cached_file, O_RDONLY) || undef $fromfile;
                if (flock($fromfile, LOCK_EX|LOCK_NB)) {
                    flock($fromfile, LOCK_UN);
                    # bad, no fetcher working on this package. Redownload it.
                    close($fromfile); undef $fromfile;
                    debug_message("no fetcher running, forcing download");
                    $force_download=1;
                    goto dl_check;
                }
            }

            &release_global_lock;
        }
        else {
            # (re) download them
            unlink($cached_file, $cached_head, $complete_file, $notify_file);
            debug_message("file does not exist or so, creating it");
            # Set the status to MISS so the log file can show it had to be downloaded
            if(!defined($cache_status)) { # except on special presets from index file checks above
                $cache_status = "MISS"; 
                debug_message("$cache_status");
            }

            # the writer releases the global lock after opening the target file
            my $pid = fork();
            if ($pid < 0) {
                barf("fork() failed");
            }
            if ($pid == 0) {
                # child, the fetcher thread
                undef %childPids;
                sysopen($pkfd, $cached_file, O_RDWR|O_CREAT|O_EXCL, 0644) || barf("Unable to store files");
                open ( $chfd, ">$cached_head");

                if (flock($pkfd, LOCK_EX)) {

                    # release the global lock within fetcher thread after the
                    # file has been created
                    &release_global_lock;

                    &fetch_store ($host, $uri); 

                    exit 0;
                }
                else {
                    barf("Problem locking the target file!");
                }
                # child exiting above, so or so
            }
            # parent continues
            $childPids{$pid}=1;
            debug_message("registred child process: $pid");
        }

        # At this point the file is open, and it's either complete or somebody
        # is fetching its contents

        my $header_printed=0;

        data_init();
        
        my $abort_time = get_abort_time();

        my $buf;

        my $geslen=0;
        my $curlen=0;

        my $complete_found;

        # needs to print the header first
        CHUNK: while (1) {

            #debug_message("Send loop iteration:");

            if (time() > $abort_time) {
                debug_message("abort (timeout)");
                exit(4);
            }

            if(! $header_printed) {
                my $headstring;
                if(-s $cached_head) {
                    &set_global_lock(": reading the header file");
                    if(! -f $cached_head) {
                        # file removed while waiting for lock - download failure?!
                        # start over, maybe spawning an own fetcher
                        &release_global_lock;
                        goto dl_check;
                    }

                    open(my $in, $cached_head);
                    my $code=200;
                    my $msg='';
                    my $headstring='';

                    $headstring=<$in>; # read exactly one status line

                    ($code, $msg) = ($headstring=~/^HTTP\S+\s+(\d+)\s(.*)/);
                    # alternative for critical errors
                    if(!defined($code)) {
                        ($code, $msg) = ($headstring=~/^(5\d\d)\s(.*)/);
                    }

                    if(!defined($code)) {
                        writeerrorlog("Faulty header file detected: $cached_head, first line was: $headstring");
                        unlink $cached_head;
                        exit 3;
                    }

                    # in CGI mode, use alternative status line. Don't print one
                    # for normal data output (apache does not like that) but on
                    # anormal codes, and then exit immediately
                    if($cgi_mode) {
                        # don't print the head line but a Status on errors instead
                        $headstring=~s/^HTTP\S+/Status:/;
                        if($code == 200) {
                            $headstring=''; # kick headline by default
                        }
                        else {
                            print $con $headstring."\n\n";
                            exit 1;
                        }
                    }

                    # keep only parts interesting for apt
                    $headstring.="Connection: Keep-Alive\r\n";
                    for(<$in>) {
                        if(/^Last-Modified|Content|Accept/) {
                            $headstring.=$_;
                        }
                        #if(/^Content-Length:/)  # FIXME: selber die endlaenge bestimmen, anderen code nur als fallback
                        #$geslen=
                    }

                    # add this reader to the notification list before printing anything useful to the client

                    debug_message("starting to return $cached_file");

                    if(! -f $complete_file) { # there is no point if the file is already here
                        open(my $nf, ">>$notify_file");
                        flock($nf, LOCK_EX);
                        print $nf "$$\n";
                        flock($nf, LOCK_UN);
                        close($nf);
                    }

                    print $con $headstring."\r\n";

                    $header_printed=1;
                    debug_message("Header sent: $headstring");
                    &release_global_lock;

                    # close connection on failures, APT apparently expects this :(
                    exit if ($code != 200);
                    
                }
                else {
                    sleep(0.5);
                    next CHUNK;
                }

                # pure HEAD request, we are done
                next REQUEST if $send_head_only;
            }

            if(! $fromfile) # is the data file open already? open in this iteration if needed
            {
                if( ! -f $cached_file) {
                    sleep(1);
                    next CHUNK;
                }

                sysopen($fromfile, $cached_file, O_RDONLY); #FIXME, checken
                next CHUNK;
            }
            else
            {
                my $n=0;
                $n = sysread($fromfile, $buf, 65536);
                debug_message("read $n bytes");

                if(!defined($n)) {
                    debug_message("Error detected, closing connection");
                    exit(4);
                }
                
                DIDREREAD:
                
                if($n==0) {
                    
                    if($complete_found) { # comlete file was found in the previous iteration
                        last CHUNK;
                    }

                    if (-f $complete_file) {
                        # do another iteration, may need to read remaining data
                        debug_message("complete file found");
                        $complete_found=1;
                        next CHUNK;
                    }

                    #debug_message("waiting for new data");
                    # wait for fresh data
                    sleep(0.5);
                    next CHUNK;

                }
                else {
                    $curlen+=$n;
                    #debug_message("write $n / $curlen bytes");
                    # send data and update watchdog
                    print $con $buf;
                    debug_message("wrote $n (sum: $curlen) bytes");
                    $abort_time = get_abort_time();
                    data_feed(\$buf);
                }
            }
        }

        debug_message("Package sent");

        # Write all the stuff to the log file
        writeaccesslog("$cache_status", "$new_filename");
        if(!check_sum($new_filename)) {
            writeerrorlog("ALARM! Faulty package in local cache detected! Replacing $new_filename in the next run");
            unlink $cached_file;
            exit(4);
        }
    }

}



















sub barf {
	my $errs = shift;

	die "--- $0: Fatal: $errs\n";
}

sub usage_error {
    &open_log_files;
	writeerrorlog("--- $0: Usage error");

    if(! defined($$cfg{example_sources_line})) {
        $$cfg{example_sources_line}="deb&nbsp;http://<b>yourcache.example.com:$$cfg{daemon_port}/</b>ftp.au.debian.org/debian&nbsp;unstable&nbsp;main&nbsp;contrib&nbsp;non-free";
    }

    &sendrsp(200, "OK", "Content-Type", "text/html", "Expires", 0);
	print $con <<EOF;

<html>
<title>Apt-cacher version $version
</title><style type="text/css"><!--
a { text-decoration: none; }
a:hover { text-decoration: underline; }
h1 { font-family: arial, helvetica, sans-serif; font-size: 18pt; font-weight: bold;}
h2 { font-family: arial, helvetica, sans-serif; font-size: 14pt; font-weight: bold;}
body, td { font-family: arial, helvetica, sans-serif; font-size: 10pt; }
th { font-family: arial, helvetica, sans-serif; font-size: 11pt; font-weight: bold; }
//--></style>
</head>
<body>
<p>
<table border=0 cellpadding=8 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#9999cc"><td> <h1>Apt-cacher version $version</h1> </td></tr>
<tr bgcolor="#cccccc"><td>
Usage: edit your /etc/apt/sources.list so all your HTTP sources are prepended 
with the address of your apt-cacher machine and the port, like this:
<blockquote>deb&nbsp;http://ftp.au.debian.org/debian&nbsp;unstable&nbsp;main&nbsp;contrib&nbsp;non-free</blockquote>
becomes
<blockquote>$$cfg{example_sources_line}</blockquote>
</td></tr>
</table>

<h2 align="center">config values</h2>
<table border=0 cellpadding=3 cellspacing=1 bgcolor="#000000" align="center">
<tr bgcolor="#9999cc"><th> Directive </th><th> Value </th></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> configfile </td><td> $configfile </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> admin_email </td><td> <a href="mailto:$$cfg{admin_email}">$$cfg{admin_email}</a> </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> generate_reports </td><td> $$cfg{generate_reports} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> cache_dir </td><td> $$cfg{cache_dir} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> logfile </td><td> $$cfg{logfile} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> errorfile </td><td> $$cfg{errorfile} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> expire_hours </td><td> $$cfg{expire_hours} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> http_proxy </td><td> $$cfg{http_proxy} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> use_proxy </td><td> $$cfg{use_proxy} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> use_proxy_auth </td><td> $$cfg{use_proxy_auth} </td></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> debug </td><td> $$cfg{debug} </td></tr>
</table>

<p>
<h2 align="center">license</h2>
<table border=0 cellpadding=8 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#cccccc"><td>
<p>Apt-cacher is free software; you can redistribute it and/or modify it under the terms of the GNU General 
Public License as published by the Free Software Foundation; either version 2 of the License, or (at your 
option) any later version.

<p>Apt-cacher is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the 
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 
License for more details.

<p>A copy of the GNU General Public License is available as /usr/share/common-licenses/GPL in the Debian 
GNU/Linux distribution or on the World Wide Web at http://www.gnu.org/copyleft/gpl.html. You can also 
obtain it by writing to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 
02111-1307, USA.
</td></tr>
</table>
</body>
</html>
EOF

    exit 1;

}


# Jon's extra stuff to write the event to a log file.
sub writeaccesslog {
    my $cache_status = shift;
    my $new_filename = shift;

    # The format is 'time|cache status (HIT, MISS or EXPIRED)|client IP address|file size|name of requested file'
    my $time = localtime;
    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($cached_file);
    my $file_length = 0;
    $file_length+=$size if defined($size);

    flock($aclog_fh, LOCK_EX);
    print $aclog_fh "$time|$client|$cache_status|$file_length|$new_filename\n";
    flock($aclog_fh, LOCK_UN);
}

# Jon's extra stuff to write errors to a log file.
sub writeerrorlog {
	my $message = shift;
	
	my $time = localtime;

    flock($erlog_fh, LOCK_EX);
	syswrite($erlog_fh,"$time|$client|$message\n");
    flock($erlog_fh, LOCK_UN);
}

# Stuff to append debug messages to the error log.
sub debug_message {
    if ($$cfg{debug} eq 1) {
        my $message = shift;
        &writeerrorlog("debug: $message");
    }
}

sub open_log_files {
	if(!$erlog_fh)
    {
        open($erlog_fh,">>$$cfg{errorfile}") or barf("Unable to open $$cfg{errorfile}");
    }
    if(!$aclog_fh) {
        open($aclog_fh,">>$$cfg{logfile}") or barf("Unable to open $$cfg{logfile}");
    }
}
 
sub get_abort_time () {
  return time () + $$cfg{fetch_timeout}; # five minutes from now
}

my $header_stored=0;

my $tstart;
my $geslen;

sub get_callback {
    my $errors=0;

    my ($data, $response, $proto) = @_;
#    debug_message("Callback got data\n");
    if(!$header_stored) {
        $header_stored=1;
        my $headstring = $response->as_string;

        # print $con $headstring;
        
        &set_global_lock(": Callback, storing the header"); # set the lock before writting the first byte to that file, and release it after the file is closed
        print $chfd $headstring || $errors++;
        close($chfd);
        &release_global_lock;

        if($maxspeed) {
            $geslen=-$getBufLen; # will be re-added below
            $tstart = [gettimeofday];
        }

    }
    print $pkfd $data || $errors++;
    #print $con $data;

    data_feed(\$data);

    # delay for rate limiting
    if($maxspeed) {
        $geslen+=$getBufLen;
        my $delta= $geslen/$maxspeed - ( scalar tv_interval ( $tstart ));
        sleep($delta) if ($delta > 0);
    }

    if($errors) {
        #if($header_stored) {
            #    &sendrsp(502, "Caching storage error");
            #}
        # a callback-die!
        # don't just exit here, fetcher needs to handle that
        die();
    }
}

sub fetch_store {

    my ($host, $uri) = @_;

    my $url = "http://$host$uri";
    debug_message("fetcher: try to fetch $url");

    # for checksumming
    data_init();

    my $response = &ua_act(0, $host, $uri);
    #my $response = $ua->get($url, ':content_cb' => \&get_callback, ':read_size_hint' => $getBufLen);
    #$geslen=0;

    debug_message("Get is back");

    if ($response->is_success && !defined($response->header("X-Died")) )
    {

        close($pkfd) if $pkfd;
        undef $pkfd;

        debug_message("stored $url as $cached_file");

        # check missmatch or fetcher failure, could not connect the server
        if(!check_sum($new_filename)) {
            &set_global_lock(": file corruption report");
            writeerrorlog("Do00h, checksum mismatch on $new_filename");
            unlink $cached_file, $cached_head;
            open(MF, ">$cached_head");
            print MF "HTTP/1.1 502 Data corruption";
            close(MF);
            &kill_readers;
            &release_global_lock;
        }

        # assuming here that the filesystem really closes the file and writes
        # it out to disk before creating the complete flag file
        
        # Now create the file to show the pickup is complete, also store the original URL there
        open(MF, ">$private_dir/$new_filename.complete");
        print MF $path;
        close(MF); 

        import_sums($cached_file) if $do_import;

    }
    else
    {
        if(defined($response->header("X-Died"))) {
            $response->code(502);
            $response->message("Apt-Cacher: Transfer terminated");
        }

        debug_message("Reporting error: ".$response->code);
        &set_global_lock(": HTTP error report");
        open(my $ch, $cached_head);
        my $headstring = $response->as_string;
        print $chfd $headstring;
        close($chfd);
        &kill_readers;
        &release_global_lock;
    }

    debug_message("fetcher exiting");
    unlink $notify_file;

    # reset the shared vars
    $header_stored=0;

}

sub kill_readers {
    my $nf;
    if(open($nf, $notify_file)) {
        while(<$nf>) {
            chomp;
        debug_message("Stopping reader: $_");
            kill $_;
        }
        close($nf);
    }
    # should be okay to unlink the file after all readers are "notified"
    unlink $cached_file;
}


# Check if there has been a usage report generated and display it
sub usage_report {
	my $usage_file = "$$cfg{logdir}/report.html";
    &sendrsp(200, "OK", "Content-Type", "text/html", "Expires", 0);
	if (!-f $usage_file) {
		print $con <<EOF;

<html>
<title>Apt-cacher traffic report</title><style type="text/css"><!--
a { text-decoration: none; }
a:hover { text-decoration: underline; }
h1 { font-family: arial, helvetica, sans-serif; font-size: 18pt; font-weight: bold;}
h2 { font-family: arial, helvetica, sans-serif; font-size: 14pt; font-weight: bold;}
body, td { font-family: arial, helvetica, sans-serif; font-size: 10pt; }
th { font-family: arial, helvetica, sans-serif; font-size: 11pt; font-weight: bold; }
//--></style>
</head>
<body>
<table border=0 cellpadding=8 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#9999cc"><td> <h1>Apt-cacher traffic report</h1> </td></tr>
</td></tr>
</table>
		
<p><table border=0 cellpadding=3 cellspacing=1 bgcolor="#000000" align="center" width="600">
<tr bgcolor="#9999cc"><th bgcolor="#9999cc"> An Apt-cacher usage report has not yet been generated </th></tr>
<tr bgcolor="#cccccc"><td bgcolor="#ccccff"> Reports are generated every 24 hours. If you want reports to be generated, make sure you set '<b>generate_reports=1</b>' in <b>$configfile</b>.</td></tr>
</table>
		</body>
		</html>
EOF

	}
	else
	{
        open(my $usefile, $usage_file);
        my @usedata = <$usefile>;
        close($usefile);
        print $con @usedata;
	}
}

# IP address filtering.
sub ipv4_addr_in_list ($$)
{
	return 0 if $_[0] eq '';
	debug_message ("testing $_[1]");
	return 0 unless $$cfg{$_[1]};

	my ($client, $cfitem) = @_;
	my @allowed_hosts = split(/,\s*/, $$cfg{$cfitem});
	for my $ahp (@allowed_hosts)
	{
		goto unknown if $ahp !~ /^[-\/,.[:digit:]]+$/;

		# single host
		if ($ahp =~ /^([^-\/]*)$/)
		{
			my $ip = $1;
			debug_message("checking against $ip");
			defined ($ip = ipv4_normalise($ip)) or goto unknown;
			return 1 if $ip eq $client;
		}
		# range of hosts (netmask)
		elsif ($ahp =~ /^([^-\/]*)\/([^-\/]*)$/)
		{
			my ($base, $mask) = ($1, $2);
			debug_message("checking against $ahp");
			defined ($base = ipv4_normalise($base)) or goto unknown;
			$mask = ($mask =~ /^\d+$/) ? make_mask ($mask, 32)
																 : ipv4_normalise ($mask);
			goto unknown unless defined $mask;
			return 1 if ($client & $mask) eq ($base & $mask);
		}
		# range of hosts (start & end)
		elsif ($ahp =~ /^([^-\/]*)-([^-\/]*)$/)
		{
			my ($start, $end) = ($1, $2);
			debug_message("checking against $start to $end");
			defined ($start = ipv4_normalise($start)) or goto unknown;
			defined ($end = ipv4_normalise($end)) or goto unknown;
			return 1 if $client ge $start && $client le $end;
		}
		# unknown
		else
		{
			unknown:
			debug_message("Alert: $cfitem ($ahp) is bad");
			&sendrsp(500, "Configuration error");
			exit(4);
		}
	}
	return 0; # failed
}

sub ipv6_addr_in_list ($$)
{
	return 0 if $_[0] eq '';
	debug_message ("testing $_[1]");
	return 0 unless $$cfg{$_[1]};

	my ($client, $cfitem) = @_;
	my @allowed_hosts = split(/,\s*/, $$cfg{$cfitem});
	for my $ahp (@allowed_hosts)
	{
		goto unknown if $ahp !~ /^[-\/,:[:xdigit:]]+$/;

		# single host
		if ($ahp =~ /^([^-\/]*)$/)
		{
			my $ip = $1;
			debug_message("checking against $ip");
			$ip = ipv6_normalise($ip);
			goto unknown if $ip eq '';
			return 1 if $ip eq $client;
		}
		# range of hosts (netmask)
		elsif ($ahp =~ /^([^-\/]*)\/([^-\/]*)$/)
		{
			my ($base, $mask) = ($1, $2);
			debug_message("checking against $ahp");
			$base = ipv6_normalise($base);
			goto unknown if $base eq '';
			goto unknown if $mask !~ /^\d+$/ || $mask < 0 || $mask > 128;
			my $m = ("\xFF" x ($mask / 8));
			$m .= chr ((-1 << (8 - $mask % 8)) & 255) if $mask % 8;
			$mask = $m . ("\0" x (16 - length ($m)));
			return 1 if ($client & $mask) eq ($base & $mask);
		}
		# range of hosts (start & end)
		elsif ($ahp =~ /^([^-\/]*)-([^-\/]*)$/)
		{
			my ($start, $end) = ($1, $2);
			debug_message("checking against $start to $end");
			$start = ipv6_normalise($start);
			$end = ipv6_normalise($end);
			goto unknown if $start eq '' || $end eq '';
			return 1 if $client ge $start && $client le $end;
		}
		# unknown
		else
		{
			unknown:
			debug_message("Alert: $cfitem ($ahp) is bad");
            &sendrsp(500, "Configuration error");
			exit(4);
		}
	}
	return 0; # failed
}

sub sendrsp {
    my $code=shift;
    my $msg=shift;
    $msg="" if !defined($msg);
    
    my $initmsg=
    $cgi_mode ? 
    "Status: $code $msg\r\n" :
    "HTTP/1.1 $code $msg\r\n";
    
    $initmsg.="Connection: Keep-Alive\r\nAccept-Ranges: bytes\r\nKeep-Alive: timeout=15, max=100\r\n" if ($code ne 403);

    #debug_message("Sending Response: $initmsg");
    print $con $initmsg;

    my $altbit=0;
    for(@_) {
        $altbit=!$altbit;
        if($altbit) {
            #debug_message("$_: ");
            print $con $_.": ";
        }
        else {
            #debug_message($_."\r\n);
            print $con $_."\r\n";
        }
    }
    print $con "\r\n";

}

# DOS attack safe input reader
my @reqLineBuf;
my $reqTail;
sub getRequestLine {
    if($cgi_path) { 
        push(@reqLineBuf, "GET $cgi_path", "", undef); # undef stops operation
        undef $cgi_path; # don't re-add it
    }
    if(! @reqLineBuf) {
        my $buf="";

        # after every read at least one line MUST have been found. Read length
        # is large enough.

        my $n=sysread($source, $buf, 1024);
        $buf=$reqTail.$buf if(defined($reqTail));
        undef $reqTail;

        # pushes the lines found into the buffer. The last one may be incomplete,
        # extra handling below
        push(@reqLineBuf, split(/\r\n/, $buf, 1000) );

        # buf did not end in a line terminator so the last line is an incomplete
        # chunk. Does also work if \r and \n are separated
        if(substr($buf, -2) ne "\r\n") {
            $reqTail=pop(@reqLineBuf);
        }
    }
    return shift(@reqLineBuf);
}

# runs the get or head operations on the user agent
sub ua_act {
    my ($only_head, $vhost, $uri) = @_;

    my $url="http://$vhost$uri";
    
    &setup_agent;

    my $do_hopping = (defined(%pathmap) && exists $pathmap{$vhost});

    my $response;
    my $hostcand;

    RETRY_ACTION:

    # make the virtual hosts real. The list is reduced which is not so smart,
    # but since the fetcher process dies anyway it does not matter.
    if($do_hopping) {
        $hostcand = shift(@{$pathmap{$vhost}});
        $url="http://$hostcand$uri";
    }

    debug_message("download agent: getting $url");
    
    if($only_head) {
        $response = $ua->head($url);
    }
    else {
        $response = $ua->get($url, ':content_cb' => \&get_callback, ':read_size_hint' => $getBufLen);
    }

    if($do_hopping) {
        # if okay or the last candidate failes, put it back into the list
        if($response->is_success || ! @{$pathmap{$vhost}} ) { 
            unshift(@{$pathmap{$vhost}}, $hostcand);
        }
        else {
            goto RETRY_ACTION;
        }
    }

    return $response;
}
