package Lire::Time;

use strict;

use base qw/ Exporter /;
use Time::Local;
use Time::Timezone;

use Carp;

use vars qw/ @EXPORT /;

BEGIN {
    @EXPORT = qw/ date2cal syslog2cal clf2cal cal2rfc cal2ymdhms getMonthName /;
}

=pod

=head1 NAME

Lire::Time - parses and prints date in formats common to many log files.

=head1 SYNOPSIS

    use Lire::Time qw/ syslog2cal /;

    my @ltime = localtime;

    while ( <LOGFILE> ) {
	#...
	my $time = syslog2cal( $m, $d, $t, \@ltime );
    }

=head1 DESCRIPTION

This module supplies many functions to parse dates in formats that you
are likely to encounter in log files. It also offers many functions to format
epoch time in useful format.

=head2 NOTE ABOUT FUNCTION EXPORT

Altough all documented functions are exported by default to the caller
namespace, you should explicitely import the functions you require
since exporting by default isn't recommanded by the perl modules
guidelines.

=cut

# convert Jan and friends to localtime(3) tm_mon thingies
my %monthnumbers = (
        'jan' => 0,
        'feb' => 1,
        'mar' => 2,
        'apr' => 3,
        'may' => 4,
        'jun' => 5,
        'jul' => 6,
        'aug' => 7,
        'sep' => 8,
        'oct' => 9,
        'nov' => 10,
        'dec' => 11,
);

my @monthnames = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;

my @daynames = qw/Sun Mon Tue Wed Thu Fri Sat/;

# convert Mar 20 09:13:32 to ($tm_sec, $tm_min, $tm_hour, $tm_day, $tm_month)
sub syslog2tm($$$)
{
    my $sub = 'syslog2tm';

    # month name (e.g. May) we tolerate e.g. jan
    # day, possibly 0 padded
    # time as hh:mm:ss or hh:mm:ss.sss
    my ( $month, $day, $time ) = @_;

    # process $time
    # We tolerate an extra : at the end of the time.
    # HH:MM:SS[.MSEC]:
    my ( $hour, $min, $sec, $msec ) = $time =~ /^(\d\d):(\d\d):(\d\d)(\.\d+)?:?$/
      or croak "$sub time '$time' should be hh:mm:ss[.msec]:?";

    # convert from string to integers
    my $tm_hour = $hour + 0;
    my $tm_min = $min + 0;
    my $tm_sec = $sec + 0; # get rid of leading zero

    # process day
    my $tm_day = $day + 0;

    # process month
    my $tm_month;
    if ($month =~ /\d+/) {
	$tm_month = $month;
    } else {
	$tm_month = $monthnumbers{ lc $month };
	die("$sub cannot get monthnumber from monthname '$month'")
	    unless defined $tm_month;
    }

    return [ $tm_sec, $tm_min, $tm_hour, $tm_day, $tm_month  ];
}

# iso 8601:1988 5.3.3.1 ,
# http://doc.mdcc.cx/~joostvb/doc/iso-8601:1988-representation_of_dates_and_times.pdf
sub tzdiff2sec($)
{
    # e.g. +0100 or -0900 ; +hh:mm, +hhmm, or +hh
    my ( $sign, $hour, $min ) = $_[0] =~ /^([+-])?(\d\d):?(\d\d)?$/
      or croak "invalid tzdiff format: $_[0]. It must looks like +0100 or -01:00\n";
    $sign ||= "+";
    $hour ||= 0;
    $min  ||= 0;
    my $sec = $hour * 60 * 60 + $min * 60;
    $sec *= -1 if $sign eq '-';

    return $sec;
}

=pod

=head1 DATE PARSING FUNCTIONS

This module includes several functions that convert between a more
human readable date format and UNIX epoch time. All parsing functions
will return the number of seconds since Jan 1 1970 00:00:00 UTC and
will die() when passed invalid arguments.

=head2 date2cal()

    my $time = date2cal( $year, $month, $day, $time, [$timezone] );
    my $time = date2cal( "2001 Mar 20 09:32:29 +0100" );

This function will convert a date in the date(1) default output format
to UNIX epoch time. The function accepts either the date in a string
or splitted on whitespace. If the timezone component is omitted, the
local timezone is assumed (usually based on the value of the TZ
environment variable).

=cut

# Tue, 20 Mar 2001 09:46:40 +0100   is  Tue Mar 20 08:46:50 UTC 2001
#
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html says:
#
# 12:00 UTC (aka 12:00Z) = 13:00+01:00 = 0700-0500
#
# There exists no international standard that specifies abbreviations for
# civil time zones like CET, EST, etc.
#
# nice to handle apache stuff like [18/Mar/2001:16:00:26 +0100]
#
sub date2cal($;$$$$)
{
    my $sub = 'date2cal';

    @_ = split /\s+/, $_[0] if @_ == 1;

    croak "$sub give 1, 4 or 5 args" if @_ < 4;

    # tzdiff e.g. +0100 or -0900 ; +hh:mm, +hhmm, or +hh
    my ($year, $month, $day, $time, $tzdiff ) = @_;

    if ( defined $tzdiff ) {
	$tzdiff = tzdiff2sec( $tzdiff );
    } else {
	$tzdiff = tz_local_offset();
    }

    my $tm = syslog2tm($month, $day, $time);

    # tm still has to get fixed with respect to $tzdiff
    my $tm_year = $year - 1900;
    push @$tm, ($tm_year);

    # We use timegm because the time will be modified by tzdiff
    my $cal = timegm( @$tm );

    # If tzdiff is undefined, it will use the current time zone
    return $cal - $tzdiff;
}

=pod

=head2 syslog2cal()

    my $time = syslog2cal( $month, $day, $time, $local_tm_ref );
    my $time = syslog2cal( "Mar 11 13:21:00", $local_tm_ref );

This function will convert a date in the syslog default output format
to UNIX epoch time. The function accepts either the date in a string
or splitted on whitespace. 

Since the syslog format doesn't contain timezone information, the
local timezone is assumed (usually determined by the TZ environment
variable).

The last argument is a reference to an array returned by localtime().

    my $local_tm_ref = [localtime()];

It is used to determine the year.

=cut

sub syslog2cal($$;$$)
{
    my $sub = 'syslog2cal';

    @_ = ( split( /\s+/, $_[0]), $_[1] )
      if (@_ == 2 );

    # month name (e.g. May) we tolerate e.g. jan
    # day, possibly 0 padded
    # time as hh:mm:ss or hh:mm:ss.sss
    # reference point of now, to guess the year
    my ( $month, $day, $time, $now_ltime ) = @_;

    croak "$sub ltime arg should be a reference to array as returned by localtime()"
      unless ref $now_ltime eq 'ARRAY' && @$now_ltime == 9;

    my $tm = syslog2tm($month, $day, $time );

    # Guess the year based on the difference between now and tm
    my $tm_year;
    my $now_year    = $now_ltime->[5];
    my $now_month   = $now_ltime->[4];
    my $now_day	    = $now_ltime->[3];
    if ( $tm->[4] < $now_month ) {
	# Current year, unless the month is january, the day the first 
	# and now is the last day of december
	if ( $tm->[4] == 0 && $tm->[3] == 1 && 
	     $now_month == 11 && $now_day == 31) 
	{
	    $tm_year = $now_year + 1;
	} else {
	    $tm_year = $now_year;
	}
    } elsif ( $tm->[4] == $now_month ) {
	# Current year if the day is in the past, today, or one day ahead
	# last year otherwise
	if ( $tm->[3] <= ($now_day + 1) ) {
	    $tm_year = $now_year;
	} else {
	    $tm_year = $now_year - 1;
	}
    } elsif ( $tm->[4] == $now_month + 1 && $tm->[3] == 1 ) {
	# One month ahead: this means last year, unless we are on the
	# last day of the month and it is the first day of the month
	$tm_year = $now_year - 1;
	if ( $now_month =~ /^(0|2|4|6|7|9|11)$/ && $now_day == 31) {
	    # Last day of jan, march, may, jul, aug, oct, dec
	    $tm_year = $now_year;
	} elsif ( $now_month =~ /^(3|5|8|10)$/ && $now_day == 30) {
	    # Last day of apr, jun, sep, nov
	    $tm_year = $now_year;
	} elsif ( $now_day == 28 || $now_day == 29 ) {
	    # Last day of february
	    $tm_year = $now_year;
	}
    } else {
	# Last year
	$tm_year = $now_year - 1;
    }

    return timelocal( @$tm, $tm_year );
}

=pod

=head2 clf2cal()

    my $time = clf2cal( "[18/Mar/2001:15:59:30 +0100]" );

This function will convert a date as found in Common Log Format to
UNIX epoch time.

=cut

sub clf2cal($)
{
    my $sub = 'clf2cal';

    my ($time) = @_;

    # ($dom, $month, $year, hour:min:sec, $tzdiff)
    my @date =
      $time =~ /^\[(\d+)\/(\w+)\/(\d{4}):(\d{2}:\d{2}:\d{2})\s+([-+][:\d]+)\]$/
	or croak "$sub time '$time' should be something like [18/Mar/2001:15:59:30 +0100]\n";

    #                            year month day time tzdiff
    return date2cal($date[2], $date[1], $date[0], $date[3], $date[4]);
}

=pod

=head1 DATE FORMATING FUNCTIONS

This module includes some functions to convert date in UNIX epoch time to
some more human readable output. All functions will die() when passed
invalid arguments.

=head2 cal2rfc()

    print cal2rfc( $time );

This function will convert a date in UNIX epoch time to the RFC822 format
(used in email, for example). A RFC822 date looks like

    Wed, 30 May 2001 12:45:13 +0000

The timezone offset specification will correspond to the local
timezone (usually determined by the TZ environment variable).

=cut

sub cal2rfc($)
{
    my $sub = 'cal2rfc';

    my $time = $_[0];

    my ( $tm_sec, $tm_min, $tm_hour, $tm_mday, $tm_mon, $tm_year, $tm_wday )
      = localtime $time ;

    my $off	= tz_local_offset;
    my $tzoff	= sprintf("%+05d", int($off / 3600) * 100 + $off % 60);

    return $daynames[$tm_wday] .
      ", $tm_mday " .
      $monthnames[$tm_mon] . " " .
      ($tm_year + 1900) . " " .
      ($tm_hour < 10 ? '0' : '') . "$tm_hour:" .
      ($tm_min < 10 ? '0' : '') . "$tm_min:" .
      ($tm_sec < 10 ? '0' : '') . "$tm_sec " .
	$tzoff;
}

=pod

=head2 cal2ymdhms()

    print cal2ymdhms( $time );

This function converts a date in UNIX epoch time to a string of the form:

    YYYYMMDDHHMMSS

This representation will correspond the time in the local timezone (usually determined by the TZ environment variable.)

=cut

sub cal2ymdhms($)
{
    my $sub = 'cal2ymdhms';

    my ( $tm_sec, $tm_min, $tm_hour, $tm_mday, $tm_mon, $tm_year )
      = localtime $_[0];

    $tm_mon++;

    return '' . ($tm_year + 1900) .
      ($tm_mon  < 10 ? '0' : '') . $tm_mon .
      ($tm_mday < 10 ? '0' : '') . $tm_mday .
      ($tm_hour < 10 ? '0' : '') . $tm_hour .
      ($tm_min  < 10 ? '0' : '') . $tm_min .
      ($tm_sec  < 10 ? '0' : '') . $tm_sec;
}


=pod

=head2 getMonthName()

    print getMonthName( 0 ); # Gives 'Jan'

This function takes as parameter a number (0-11) representing the
month (as returned by localtime() for example) and will return the
English abbreviated name of that month ( Jan, Feb, etc. ).

=cut

sub getMonthName($) {
    my $sub = 'getMonthName';

    my ( $month ) = @_;

    croak "$sub month should be between 0 and 11"
      unless $month >= 0 && $month <= 11;

    return $monthnames[$month];
}


# keep perl happy
1;

__END__

=pod

=head1 VERSION

$Id: Time.pm,v 1.8 2004/03/26 00:27:34 wsourdeau Exp $

=head1 COPYRIGHT

Copyright (C) 2000-2002 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Joost van Baal <joostvb@logreport.org>

=cut


