#!/usr/bin/perl -c

package Cyrus::IndexFile;

use strict;
use warnings;

use IO::File;
use IO::Handle;
use String::CRC32 qw(crc32);

=pod

=head1 NAME

Cyrus::IndexFile - A pure perl interface to the "cyrus.index" file
format as generated by Cyrus IMAPd.

=head1 EXAMPLES

 use Cyrus::IndexFile;

 # Note: requires IO::File::fcntl module installed for locking support
 my $index = Cyrus::IndexFile->new_file("$path/cyrus.index", ['lock_ex', 5]);

 print "EXISTS: " . $index->header('Exists') . "\n";
 while (my $record = $index->next_record_hash()) {
   print "$record->{Uid}: $record->{MessageGuid} $record->{Size}\n";
 }

=head1 SUPPORTED FORMAT VERSIONS

 Definitions:
 ============

 * int32 4 - 32 bit value taking 4 octets on disk.  Visible in perl as an integer
 * int64 8 - 64 bit value taking 8 octets on disk.  Visible in perl as an integer
 * time_t 4 - same as int32
 * bitmap N - a bitmap taking up N octets on disk.  Visible in perl as a string of 1s and 0s.
 * hex N - a big value taking up N octets on disk.  Visible in perl as a hexadecimal string (0-9a-f)

 These values can be referenced by name using the hash API, or by index using the array API.
 You can also use the 'raw' API to get the record in on-disk format.

 All numbers are in network byte order as per Cyrus standard encoding.  Bitmap and hex values are
 layed out as octets on disk and encoded directly in order.

 Version 9:
 ==========

 Header:
  0: Generation            int32  4
  1: Format                int32  4
  2: MinorVersion          int32  4
  3: StartOffset           int32  4
  4: RecordSize            int32  4
  5: Exists                int32  4
  6: LastAppenddate        time_t 4
  7: LastUid               int32  4
  8: QuotaUsed             int64  8
  9: Pop3LastLogin         time_t 4
 10: UidValidity           int32  4
 11: Deleted               int32  4
 12: Answered              int32  4
 13: Flagged               int32  4
 14: Options               bitmap 4
 15: LeakedCache           int32  4
 16: HighestModseq         int64  8
 17: Spare0                int32  4
 18: Spare1                int32  4
 19: Spare2                int32  4
 20: Spare3                int32  4
 21: Spare4                int32  4

 Record:
  0: Uid                   int32  4
  1: InternalDate          time_t 4
  2: SentDate              time_t 4
  3: Size                  int32  4
  4: HeaderSize            int32  4
  5: ContentOffset         int32  4
  6: CacheOffset           int32  4
  7: LastUpdated           time_t 4
  8: SystemFlags           bitmap 4
  9: UserFlags             bitmap 16
 10: ContentLines          int32  4
 11: CacheVersion          int32  4
 12: MessageUuid           hex    12
 13: Modseq                int64  8

 Version 10:
 ===========

 Header:
  0: Generation            int32  4
  1: Format                int32  4
  2: MinorVersion          int32  4
  3: StartOffset           int32  4
  4: RecordSize            int32  4
  5: Exists                int32  4
  6: LastAppenddate        time_t 4
  7: LastUid               int32  4
  8: QuotaUsed             int64  8
  9: Pop3LastLogin         time_t 4
 10: UidValidity           int32  4
 11: Deleted               int32  4
 12: Answered              int32  4
 13: Flagged               int32  4
 14: Options               bitmap 4
 15: LeakedCache           int32  4
 16: HighestModseq         int64  8
 17: Spare0                int32  4
 18: Spare1                int32  4
 19: Spare2                int32  4
 20: Spare3                int32  4
 21: Spare4                int32  4

 Record:
  0: Uid                   int32  4
  1: InternalDate          time_t 4
  2: SentDate              time_t 4
  3: Size                  int32  4
  4: HeaderSize            int32  4
  5: ContentOffset         int32  4
  6: CacheOffset           int32  4
  7: LastUpdated           time_t 4
  8: SystemFlags           bitmap 4
  9: UserFlags             bitmap 16
 10: ContentLines          int32  4
 11: CacheVersion          int32  4
 12: MessageGuid           hex    20
 13: Modseq                int64  8

SKIPPED VERSION 11 - Fastmail internal only

 Version 12:
 ===========

 Header:
  0: Generation            int32  4
  1: Format                int32  4
  2: MinorVersion          int32  4
  3: StartOffset           int32  4
  4: RecordSize            int32  4
  5: Exists                int32  4
  6: LastAppenddate        time_t 4
  7: LastUid               int32  4
  8: QuotaUsed             int64  8
  9: Pop3LastLogin         time_t 4
 10: UidValidity           int32  4
 11: Deleted               int32  4
 12: Answered              int32  4
 13: Flagged               int32  4
 14: Options               bitmap 4
 15: LeakedCache           int32  4
 16: HighestModseq         int64  8
 17: DeletedModseq         int64  8
 18: Exists                int32  4
 19: FirstExpunged         time_t 4
 20: LastCleanup           time_t 4
 21: HeaderFileCRC         int32  4
 22: SyncCRC               int32  4
 23: RecentUid             int32  4
 24: RecentTime            time_t 4
 25: Spare0                int32  4
 26: Spare1                int32  4
 27: Spare2                int32  4
 28: HeaderCRC             int32  4

 Record:
  0: Uid                   int32  4
  1: InternalDate          time_t 4
  2: SentDate              time_t 4
  3: Size                  int32  4
  4: HeaderSize            int32  4
  5: GmTime                time_t  4
  6: CacheOffset           int32  4
  7: LastUpdated           time_t 4
  8: SystemFlags           bitmap 4
  9: UserFlags             bitmap 16
 10: ContentLines          int32  4
 11: CacheVersion          int32  4
 12: MessageGuid           hex    20
 13: Modseq                int64  8
 14: CacheCRC              int32  4
 15: RecordCRC             int32  4

 Version 13:
 ===========

 Header:
  0: Generation            int32  4
  1: Format                int32  4
  2: MinorVersion          int32  4
  3: StartOffset           int32  4
  4: RecordSize            int32  4
  5: Exists                int32  4
  6: LastAppenddate        time_t 4
  7: LastUid               int32  4
  8: QuotaUsed             int64  8
  9: Pop3LastLogin         time_t 4
 10: UidValidity           int32  4
 11: Deleted               int32  4
 12: Answered              int32  4
 13: Flagged               int32  4
 14: Options               bitmap 4
 15: LeakedCache           int32  4
 16: HighestModseq         int64  8
 17: DeletedModseq         int64  8
 18: Exists                int32  4
 19: FirstExpunged         time_t 4
 20: LastCleanup           time_t 4
 21: HeaderFileCRC         int32  4
 22: SyncCRC               int32  4
 23: RecentUid             int32  4
 24: RecentTime            time_t 4
 25: Spare0                int32  4
 26: Spare1                int32  4
 27: Spare2                int32  4
 28: HeaderCRC             int32  4

 Record:
  0: Uid                   int32  4
  1: InternalDate          time_t 4
  2: SentDate              time_t 4
  3: Size                  int32  4
  4: HeaderSize            int32  4
  5: GmTime                time_t  4
  6: CacheOffset           int32  4
  7: LastUpdated           time_t 4
  8: SystemFlags           bitmap 4
  9: UserFlags             bitmap 16
 10: ContentLines          int32  4
 11: CacheVersion          int32  4
 12: MessageGuid           hex    20
 13: Modseq                int64  8
 14: CID                   hex    8
 15: CacheCRC              int32  4
 16: RecordCRC             int32  4

=cut

# Set up header and record formatting information {{{

my $VersionFormats = {
  9 => {
    HeaderSize => 96,
    _make_fields('Header',<<EOF),
Generation            int32  4
Format                int32  4
MinorVersion          int32  4
StartOffset           int32  4
RecordSize            int32  4
Exists                int32  4
LastAppenddate        time_t 4
LastUid               int32  4
QuotaUsed             int64  8
Pop3LastLogin         time_t 4
UidValidity           int32  4
Deleted               int32  4
Answered              int32  4
Flagged               int32  4
Options               bitmap 4
LeakedCache           int32  4
HighestModseq         int64  8
HighestModseq         int64  8
Spare0                int32  4
Spare1                int32  4
Spare2                int32  4
Spare3                int32  4
Spare4                int32  4
EOF
    RecordSize => 80, # defined in file too, check it!
    _make_fields('Record', <<EOF),
Uid                   int32  4
InternalDate          time_t 4
SentDate              time_t 4
Size                  int32  4
HeaderSize            int32  4
ContentOffset         int32  4
CacheOffset           int32  4
LastUpdated           time_t 4
SystemFlags           bitmap 4
UserFlags             bitmap 16
ContentLines          int32  4
CacheVersion          int32  4
MessageUuid           hex    12
Modseq                int64  8
EOF
  },
  10 => {
    HeaderSize => 96,
    _make_fields('Header',<<EOF),
Generation            int32  4
Format                int32  4
MinorVersion          int32  4
StartOffset           int32  4
RecordSize            int32  4
Exists                int32  4
LastAppenddate        time_t 4
LastUid               int32  4
QuotaUsed             int64  8
Pop3LastLogin         time_t 4
UidValidity           int32  4
Deleted               int32  4
Answered              int32  4
Flagged               int32  4
Options               bitmap 4
LeakedCache           int32  4
HighestModseq         int64  8
Spare0                int32  4
Spare1                int32  4
Spare2                int32  4
Spare3                int32  4
Spare4                int32  4
EOF
    RecordSize => 88, # defined in file too, check it!
    _make_fields('Record', <<EOF),
Uid                   int32  4
InternalDate          time_t 4
SentDate              time_t 4
Size                  int32  4
HeaderSize            int32  4
ContentOffset         int32  4
CacheOffset           int32  4
LastUpdated           time_t 4
SystemFlags           bitmap 4
UserFlags             bitmap 16
ContentLines          int32  4
CacheVersion          int32  4
MessageGuid           hex    20
Modseq                int64  8
EOF
  },
  11 => {
    HeaderSize => 96,
    _make_fields('Header',<<EOF),
Generation            int32  4
Format                int32  4
MinorVersion          int32  4
StartOffset           int32  4
RecordSize            int32  4
Exists                int32  4
LastAppenddate        time_t 4
LastUid               int32  4
QuotaUsed             int64  8
Pop3LastLogin         time_t 4
UidValidity           int32  4
Deleted               int32  4
Answered              int32  4
Flagged               int32  4
Options               bitmap 4
LeakedCache           int32  4
HighestModseq         int64  8
Spare0                int32  4
Spare1                int32  4
Spare2                int32  4
Spare3                int32  4
HeaderCrc             int32  4
EOF
    RecordSize => 96, # defined in file too, check it!
    _make_fields('Record', <<EOF),
Uid                   int32  4
InternalDate          time_t 4
SentDate              time_t 4
Size                  int32  4
HeaderSize            int32  4
ContentOffset         int32  4
CacheOffset           int32  4
LastUpdated           time_t 4
SystemFlags           bitmap 4
UserFlags             bitmap 16
ContentLines          int32  4
CacheVersion          int32  4
MessageGuid           hex    20
Modseq                int64  8
CacheCrc              int32  4
RecordCrc             int32  4
EOF
  },
  12 => {
    HeaderSize => 128,
    _make_fields('Header',<<EOF),
Generation            int32  4
Format                int32  4
MinorVersion          int32  4
StartOffset           int32  4
RecordSize            int32  4
NumRecords            int32  4
LastAppenddate        time_t 4
LastUid               int32  4
QuotaUsed             int64  8
Pop3LastLogin         time_t 4
UidValidity           int32  4
Deleted               int32  4
Answered              int32  4
Flagged               int32  4
Options               bitmap 4
LeakedCache           int32  4
HighestModseq         int64  8
DeletedModseq         int64  8
Exists                int32  4
FirstExpunged         time_t 4
LastCleanup           time_t 4
HeaderFileCRC         int32  4
SyncCRC               int32  4
RecentUid             int32  4
RecentTime            time_t 4
Spare0                int32  4
Spare1                int32  4
Spare2                int32  4
HeaderCrc             int32  4
EOF
    RecordSize => 96, # defined in file too, check it!
    _make_fields('Record', <<EOF),
Uid                   int32  4
InternalDate          time_t 4
SentDate              time_t 4
Size                  int32  4
HeaderSize            int32  4
GmTime                time_t 4
CacheOffset           int32  4
LastUpdated           time_t 4
SystemFlags           bitmap 4
UserFlags             bitmap 16
ContentLines          int32  4
CacheVersion          int32  4
MessageGuid           hex    20
Modseq                int64  8
CacheCrc              int32  4
RecordCrc             int32  4
EOF
  },
  13 => {
    HeaderSize => 128,
    _make_fields('Header',<<EOF),
Generation            int32  4
Format                int32  4
MinorVersion          int32  4
StartOffset           int32  4
RecordSize            int32  4
NumRecords            int32  4
LastAppenddate        time_t 4
LastUid               int32  4
QuotaUsed             int64  8
Pop3LastLogin         time_t 4
UidValidity           int32  4
Deleted               int32  4
Answered              int32  4
Flagged               int32  4
Options               bitmap 4
LeakedCache           int32  4
HighestModseq         int64  8
DeletedModseq         int64  8
Exists                int32  4
FirstExpunged         time_t 4
LastCleanup           time_t 4
HeaderFileCRC         int32  4
SyncCRC               int32  4
RecentUid             int32  4
RecentTime            time_t 4
Pop3ShowAfter         int32  4
QuotaAnnotUsed        int32  4
SyncCRCVersion        int32  4
HeaderCrc             int32  4
EOF
    RecordSize => 104, # defined in file too, check it!
    _make_fields('Record', <<EOF),
Uid                   int32  4
InternalDate          time_t 4
SentDate              time_t 4
Size                  int32  4
HeaderSize            int32  4
GmTime                time_t 4
CacheOffset           int32  4
LastUpdated           time_t 4
SystemFlags           bitmap 4
UserFlags             bitmap 16
ContentLines          int32  4
CacheVersion          int32  4
MessageGuid           hex    20
Modseq                int64  8
CID                   hex    8
CacheCrc              int32  4
RecordCrc             int32  4
EOF
  },
};

my %SystemFlagMap = (
   0 => "\\Answered",
   1 => "\\Flagged",
   2 => "\\Deleted",
   3 => "\\Draft",
   4 => "\\Seen",
  29 => "[ARCHIVED]",
  30 => "[UNLINKED]",
  31 => "[EXPUNGED]",
);

# parse our the plaintext field definitions into a useful datastructure
sub _make_fields {
  my $prefix = shift;
  my $string = shift;

  my @lines = grep { m/\S/ } split /\n/, $string;

  my @names;
  my @items;
  my @packitems;
  my $Pos = 0;
  my $Num = 0;
  foreach my $line (@lines) {
    my ($Name, $Type, $Size) = split /\s+/, $line;

    push @names, $Name;
    push @items, [$Name, $Type, $Size, $Num, $Pos];
    push @packitems, _make_pack($Type, $Size);

    $Pos += $Size;
    $Num++;
  }

  return (
    $prefix . 'Names' => { map { $names[$_] => $_ } 0..$#names },
    $prefix . 'Fields' => \@items,
    $prefix . 'Pack' => join("", @packitems),
  );
}

# build the pack/unpack expression for a single field
sub _make_pack {
  my $format = shift;
  my $size = shift;
  if ($format eq 'int32' or $format eq 'time_t') {
    return 'N';
  }
  elsif ($format eq 'int64') { # ignore start..
    return 'x[N]N';
  }
  elsif ($format eq 'bitmap') {
    return 'B' . (8 * $size);
  }
  elsif ($format eq 'hex') {
    return 'H' . (2 * $size);
  }
}

# end format definitions
# }}}

=head1 PUBLIC API

=over

=item Cyrus::IndexFile->new($fh)

Build a new Cyrus::IndexFile object from a filehandle.  The handle is not 
required to be seekable, so make sure you have rewound it before use.

 seek($fh, 0, 0);
 my $index = Cyrus::IndexFile->new($fh);

This function reads the header from the file and returns a Cyrus::IndexFile
object.  The filehandle will be pointing at the start of the first record.

If there is a problem, then the position of the filehandle is undefined 
(though probably either at 12 bytes or the end of the header) and the
function will "die".

Causes of death:

 * unable to read a full header's length of data from the file
 * version of the file is not one of the supported versions

=cut

sub new {
  my $class = shift;
  my $handle = shift;

  my $buf;

  # read initial header information to determine version
  my $read = sysread($handle, $buf, 12);
  die "Unable to read header information\n" 
    unless $read == 12;

  # version is always at this offset!
  my $version = unpack('N', substr($buf, 8));

  # check that it's a supported version
  my $frm = $VersionFormats->{$version}
    || die "Unknown header format $version\n";

  # read the rest of the header (length depends on version)
  sysread($handle, $buf, $frm->{HeaderSize} - 12, 12);
  my $Self = bless { 
    @_,
    version => $version, 
    handle => $handle,
    format => $frm,
    rawheader => $buf,
    recno => 0,
  }, ref($class) || $class;

  $Self->{header} = $Self->_header_b2h($buf);
  die "Unable to parse header" unless $Self->{header};

  return $Self;
}

=item Cyrus::IndexFile->new_file($filename, $lockopts)

Open the file to read, optionally locking it with IO::File::fcntl.  If you
pass a scalar for lockopts then it will be locked with ['lock_ex'], otherwise
you can pass a tuple, e.g. ['lock_ex', 5] for a 5 second timeout.

This function will die if it can't open or lock the file.  On success, it
calls $class->new() with the filehandle.

=cut

sub new_file {
  my $class = shift;
  my $filename = shift;
  my $lockopts = shift;

  my $fh;
  if ($lockopts) {
    require 'IO/File/fcntl.pm' || die "can't lock without IO::File::fcntl module";
    $lockopts = ['lock_ex'] unless ref($lockopts) eq 'ARRAY';
    $fh = IO::File::fcntl->new($filename, '+<', @$lockopts)
          || die "Can't open $filename for locked read: $!";
  } else {
    $fh = IO::File->new("< $filename") 
          || die "Can't open $filename for read: $!";
  }

  return $class->new($fh, @_);
}

=item Cyrus::IndexFile->new_empty($version)

Create a new empty index file with the specified version.  This is
useful when you want to generate a new index file, as you can use
the write_record function and set header fields on the new object.

=cut

sub new_empty {
  my $class = shift;
  my $version = shift;

  # check that the version is supported
  my $frm = $VersionFormats->{$version}
    || die "unknown version $version";

  my $Self = bless {
    @_,
    version => $version,
    format => $frm,
  }, ref($class) || $class;

  return $Self;
}

=item $index->stream_copy($outfh, $decider, %Opts)

Currently broken!  Supposed to copy this file into the output filehandle.

NOTE: outfh must be seekable, as we write an initial header record with
Exists == 0, then update the header at the end with a new Exists and a
new LastUpdated.

=cut

sub stream_copy {
  my $Self = shift;
  my $outfh = shift;
  my $decide = shift;
  my %Opts = @_;

  my $out = $Self->new_empty($Opts{version} || $Self->{version});

  my $newheader = $Self->header_copy();
  if ($Opts{headerfields}) {
    foreach my $field (keys %{$Opts{headerfields}}) {
      $newheader->{$field} = $Opts{headerfields}{$field};
    }
  }

  # initially empty
  $newheader->{NumRecords} = 0;
  # Important!  Otherwise you get versions out of skew!
  $newheader->{MinorVersion} = $out->{version};
  $newheader->{RecordSize} = $out->{format}{RecordSize};
  $out->write_header($outfh, $newheader);

  $Self->reset();
  while (my $record = $Self->next_record()) {
    if ($decide->($newheader, $record)) {
      $newheader->{NumRecords}++;
      $out->write_record($outfh, $record);
    }
  }

  # update exists and last updated
  $newheader->{LastUpdated} = time();
  sysseek($outfh, 0, 0);
  $out->write_header($outfh, $newheader);
}

=item $index->header()

=item $index->header_hash()

Returns a hash reference of the entire header

=item $index->header($field)

Returns just the single named field from the header.  Dies if there is no
field with that name in the header.

=item $index->header_array($field)

Returns an array reference with the values in the order given in the version
information above.

=item $index->header_raw()

Returns the raw packed header as it is on disk.

=cut

sub header {
  my $Self = shift;
  my $Field = shift;

  if ($Field) {
    die "No such header field $Field\n" unless exists $Self->{header}{$Field};
    return $Self->{header}{$Field};
  }

  return $Self->{header};
}

sub header_array {
  my $Self = shift;
  return $Self->_header_h2a($Self->{header});
}

sub header_hash {
  my $Self = shift;
  return $Self->{header};
}

sub header_raw {
  my $Self = shift;
  return $Self->{rawheader};
}

=item $index->header_copy()

Returns a hashref the same as header_hash, but it's "non live", so you can
make destructive changes without affecting the original.

=cut

sub header_copy {
  my $Self = shift;
  my $orig = $Self->{header};
  return { %$orig };
}

=item $index->reset($num)

Deletes the cached 'current record' and seeks back to the given record 
number, or the end of the header (record 0) if no number given.

Requires the input filehandle to be seekable.

=cut

sub reset {
  my $Self = shift;
  my $num = shift || 0;

  my $NumRecords = $Self->{header}{MinorVersion} < 12 ?
		   $Self->{header}{Exists} : $Self->{header}{NumRecords};

  die "Invalid record $num (must be >= 0 and <= $NumRecords"
    unless ($num >= 0 and $num <= $NumRecords);

  my $HeaderSize = $Self->{format}{HeaderSize};
  my $RecordSize = $Self->{format}{RecordSize};

  sysseek($Self->{handle}, $HeaderSize + ($num * $RecordSize), 0)
    || die "unable to seek on this filehandle";

  $Self->{recno} = $num;

  delete $Self->{record};
  delete $Self->{rawrecord};
  delete $Self->{checksum_failure};
}

=item $index->next_record()
=item $index->next_record_hash()

Read the next record from the file and parse it in to a hash reference
per the format of the index file.

This works even on non-seekable files.

Returns undef when there are no more records (until you call reset)

=item $index->next_record_array()

As above, but returns the array in the format order.

More efficient, as the hash doesn't need to be created.

=item $index->next_record_raw()

Returns the raw bytes of the index file.  Most efficient, as no unpacking
is done, but then you have to deal with all the version checking and
offsets yourself.

=cut

sub next_record {
  my $Self = shift;
  $Self->next_record_raw();
  return $Self->record(@_);
}

sub next_record_hash {
  my $Self = shift;
  $Self->next_record_raw();
  return $Self->record_hash(@_);
}

sub next_record_array {
  my $Self = shift;
  $Self->next_record_raw();
  return $Self->record_array(@_);
}

sub next_record_raw {
  my $Self = shift;

  delete $Self->{record};
  delete $Self->{checksum_failure};

  # use direct access for speed
  my $NumRecords = $Self->{header}{MinorVersion} < 12 ?
		   $Self->{header}{Exists} : $Self->{header}{NumRecords};
  my $RecordSize = $Self->{header}{RecordSize};

  return undef unless $RecordSize;

  if ($Self->{recno} < $NumRecords) {
    my $res = sysread($Self->{handle}, $Self->{rawrecord}, $RecordSize);
    die "Failed to read entire record" unless $RecordSize == $res;
    # rewrite if passed so save the allocation cost
    $Self->{recno}++;
    return $Self->{rawrecord};
  }
  else {
    delete $Self->{rawrecord};
    return undef; # no more records!
  }
}

=item $index->record()
=item $index->record_hash()

Returns the "current" record, i.e. the last record returned by
next_record_*() as a hash reference.

Returns undef if there is no current record (either next_record has never
been called, reset has just been called, or the file is finished)

=item $index->record_array()

As above, but return the version-dependant arrayref or undef

=item $index->record_raw()

As above, but return just the raw record bytes as a string or undef

=item $index->record($field)

If a field name is given, return that field only from the record, or die if
it doesn't exist in this version.

Returns undef if there is no current record.  No legitimate field ever
returns undef, because there's no such concept in the datastructure.

=cut

sub record {
  my $Self = shift;
  my $Field = shift;

  my $record = $Self->record_hash();
  return undef unless $record;

  if ($Field) {
    die "No such record field $Field\n" unless exists $record->{$Field};
    return $record->{$Field};
  }

  return $record;
}

sub record_hash {
  my $Self = shift;
  unless (exists $Self->{record}{hash}) {
    $Self->{record}{hash} = $Self->_record_a2h($Self->record_array(@_));
  }
  return $Self->{record}{hash};
}

sub record_array {
  my $Self = shift;
  unless (exists $Self->{record}{array}) {
    $Self->{record}{array} = $Self->_record_b2a($Self->{rawrecord});
  }
  return $Self->{record}{array};
}

sub record_raw {
   my $Self = shift;
   return $Self->{rawrecord};
}

=item $index->system_flags([$Key])

Returns a hash of the system flags set on the current record, or just the
named flag if a Key is passed.

=cut

sub system_flags {
  my $Self = shift;
  my $Field = shift;

  my @sfdata = reverse split //, $Self->record('SystemFlags');
  my %hash;
  foreach my $key (0..$#sfdata) {
    next unless $sfdata[$key];
    $hash{$SystemFlagMap{$key} || $key} = $key;
  }

  if ($Field) {
    return $hash{$Field};
  }

  return wantarray ? %hash : \%hash;
}

=item $index->flagslist($Header)

Given a Cyrus::HeaderFile object to name the UserFlags, return an array of all
flags, both SystemFlags and UserFlags set on the record.

=cut
sub flagslist {
  my $Self = shift;
  my $Header = shift;
  my @flags;

  # 32 bit sets
  my @sfdata = reverse split //, $Self->record('SystemFlags');
  foreach my $i (0..$#sfdata) {
    next unless $sfdata[$i];
    push @flags, $SystemFlagMap{$i} || $i;
  }

  if ($Header) {
    my $userflags = $Header->header('Flags');
    my @ufdata = split //, $Self->record('UserFlags');
    foreach my $base (0, 32, 64, 96) {
      foreach my $i (0..31) {
        my $f = $userflags->[$base+31-$i];
        push @flags, $f if ($f and $ufdata[$base+$i]);
      }
    }
  }

  return wantarray ? @flags : \@flags;
}

=item $index->field_number($Field)

Return the field number in a record array for the named field, or die 
if there isn't one.

=cut

sub field_number {
  my $Self = shift;
  my $Field = shift;
  my $names = $Self->{format}{RecordNames};
  die "No such record field $Field\n" unless exists $names->{$Field};
  return $names->{$Field};
}

=item $index->write_header($fh, $header)

Writes a header to $fh - you need to make sure it's seeked to the start (can be used on a non-seekable filehandle)

$header can be in array, hash or buffer format

=cut

sub write_header {
  my $Self = shift;
  my $fh = shift;
  my $header = shift;

  my $buf = $Self->_make_header($header);

  syswrite($fh, $buf);
}

=item $index->append_record($record)

Appends the record (can be hash, array or buf) to the current file.  Needs the filehandle to be seekable.  Uses "Exists" from the header to find the position, so don't mess it up!

Also seeks back to the header and rewrites it with exists incremented by one.

=cut

sub append_record {
  my $Self = shift;
  my $record = shift;
  
  my $NumRecords = $Self->{header}{MinorVersion} < 12 ? 
		   $Self->{header}{Exists} : $Self->{header}{NumRecords};

  $Self->reset($NumRecords);
  $Self->write_record($Self->{handle}, $record);

  # extend the header:
  # XXX - sysflags
  my $header = $Self->header();
  $header->{NumRecords}++;
  $Self->rewrite_header($header);
}

sub rewrite_header {
  my $Self = shift;
  my $header = shift || $Self->header();

  sysseek($Self->{handle}, 0, 0);
  $Self->write_header($Self->{handle}, $header);

  $Self->reset(); # remove any cache and update the seek pointer
}

=item $index->rewrite_record($record, $num)

Rewrite the record at position given by $num with the record (hash, array or buf) passed.

=cut

sub rewrite_record {
  my $Self = shift;
  my $record = shift;
  my $num = @_ ? shift : ($Self->{recno} - 1);

  $Self->reset($num);

  $Self->write_record($Self->{handle}, $record);

  $Self->{recno}++;
}

=item $index->write_record($fh, $record, $num)

Write the record to the new filehandle $fh.  If $num is not given then it doesn't need to be seekable.

XXX - $num support not done yet
=cut

sub write_record {
  my $Self = shift;
  my $fh = shift;
  my $record = shift;
  my $num = shift; # XXX - seek?

  my $buf = $Self->_make_record($record);

  syswrite($fh, $buf);
}

=item $index->merge_indexes($target, @extras)

XXX - broken anyway.  The purpose of this function is to allow multiple index files to combined into one (say, an expunged file and an index file)

=cut

sub merge_indexes {
  my $Self = shift;
  my $target = shift;
  my @extras = shift;

  # copy the current header first
  my $targetpos = tell($target);
  my $header = $Self->header();
  # reset some stuff
  $header->{NumRecords} = 0;
  $header->{LastAppenddate} = 0;
  $header->{LastUid} = 0;
  $header->{QuotaUsed} = 0;
  $header->{Deleted} = 0;
  $header->{Answered} = 0;
  $header->{Flagged} = 0;
  $header->{HighestModseq} = 0;
  $Self->write_header($target, $header);

  my @all = ($Self, @extras);

  my @records = map { $_->next_record() } @all;

  my $nextuid = -1;

  while ($nextuid) {
    my $this;
    my $higheruid;

    # read the first record of all lists
    foreach my $n (0..$#all) {
      next unless $records[$n];
      if ($records[$n]{Uid} == $nextuid) {
        # algorithm: keep most recently modified
        if (not $this or $this->{LastModified} < $records[$n]{LastModified}) {
          $this = $records[$n]{LastModified};
        }
        # step forwards
        $records[$n] = $all[$n]->next_record();
      }
      # find the minimum now
      if (not $higheruid or $higheruid > $records[$n]{Uid}) {
        $higheruid = $records[$n]{Uid};
      }
    }

    # write out the best record found
    if ($this) {
      $Self->write_record($target, $this);
      $header->{NumRecords}++;
      # XXX - to make everything else work, we probably need to reconstruct or
      # put the entire logic here!
    }

    # move along
    $nextuid = $higheruid;
  }

  # move back to the start of this file and re-write the header
  seek($target, $targetpos, 0);
  $Self->write_header($target, $header);
}

=item $index->header_dump()

=item $index->record_dump()

=item $index->header_longdump()

=item $index->record_longdump()

=item $index->header_undump()

=item $index->record_undump()

Dump the headers and records in either space separated fields or named lines with a blank line between for long.  

The "undump" option is able to parse the space separated format, allowing pipe to a standard unix tool to 
process the records, and then re-parse them back into a binary index file.

=cut

sub header_dump {
  my $Self = shift;
  my $array = $Self->header_array();
  return join(' ', @$array);
}

sub header_longdump {
  my $Self = shift;
  my $array = $Self->header_array();
  my @data;
  my $frm = $Self->{format}{HeaderFields};
  foreach my $field (0..$#$frm) {
    my $name = $frm->[$field][0];
    my $val = $array->[$field];
    $val = sprintf("%08x", $val) if $name =~ m/Crc$/;
    push @data, "$name: $val";
  }
  return join("\n", @data, '');
}

sub header_undump {
  my $Self = shift;
  my $string = shift;
  my @items = split ' ', $string;
  return \@items;
}

sub record_dump {
  my $Self = shift;
  my $array = $Self->record_array();
  return join(' ', @$array);
}

sub record_longdump {
  my $Self = shift;
  my $array = $Self->record_array();
  my @data;
  my $frm = $Self->{format}{RecordFields};
  foreach my $field (0..$#$frm) {
    my $name = $frm->[$field][0];
    my $val = $array->[$field];
    $val = sprintf("%08x", $val) if $name =~ m/Crc$/;
    push @data, "$name: $val";
  }
  return join("\n", @data, '');
}

sub record_undump {
  my $Self = shift;
  my $string = shift;
  my @items = split ' ', $string;
  return \@items;
}

# INTERNAL METHODS

sub _make_header {
  my $Self = shift;
  my $ds = shift;

  my $ref = ref($ds);

  # check what sort of format it is:

  # scalar - already a buffer
  return $ds unless $ref;

  # array
  return $Self->_header_a2b($ds) if ref($ds) eq 'ARRAY';

  # must be hash
  return $Self->_header_h2b($ds);
}

sub _make_record {
  my $Self = shift;
  my $ds = shift;

  my $ref = ref($ds);

  # check what sort of format it is:

  # scalar - already a buffer
  return $ds unless $ref;

  # array
  return $Self->_record_a2b($ds) if ref($ds) eq 'ARRAY';

  # must be hash
  return $Self->_record_h2b($ds);
}

####################
# Header Conversions

sub _header_b2h {
  my $Self = shift;
  my $buf = shift;
  return undef unless $buf;

  my $array = $Self->_header_b2a($buf);
  my $hash = $Self->_header_a2h($array);

  return $hash;
}

sub _header_b2a {
  my $Self = shift;
  my $buf = shift;
  return undef unless $buf;

  my @array = unpack($Self->{format}{HeaderPack}, $buf);

  # check checksum match!
  if ($Self->{version} >= 11) {
    my $Header = $Self->{format}{HeaderFields}[$Self->{format}{HeaderNames}{HeaderCrc}];
    my $crc = crc32(substr($buf, 0, $Header->[4]));
    if ($array[$Header->[3]] != $crc) {
      $Self->{checksum_failure} = 1;
      warn "Header CRC Failure $array[$Header->[3]] != $crc";
      die "Header CRC Failure $array[$Header->[3]] != $crc"
        if $Self->{strict_crc};
    }
  }

  return \@array;
}

sub _header_h2b {
  my $Self = shift;
  my $hash = shift;
  return undef unless $hash;

  my $array = $Self->_header_h2a($hash);
  my $buf = $Self->_header_a2b($array);

  return $buf;
}

sub _header_a2b {
  my $Self = shift;
  my $array = shift;
  return undef unless $array;

  my $buf = pack($Self->{format}{HeaderPack}, @$array);

  if ($Self->{version} >= 11) {
    my $Header = $Self->{format}{HeaderFields}[$Self->{format}{HeaderNames}{HeaderCrc}];
    my $crc = crc32(substr($buf, 0, $Header->[4]));
    substr($buf, $Header->[4]) = pack('N', $crc);
  }

  return $buf;
}

sub _header_a2h {
  my $Self = shift;
  my $array = shift;
  return undef unless $array;

  my %res;
  my $frm = $Self->{format}{HeaderFields};
  for (0..$#$frm) {
    $res{$frm->[$_][0]} = $array->[$_];
  }

  return \%res;
}

sub _header_h2a {
  my $Self = shift;
  my $hash = shift;
  return undef unless $hash;

  my @array;
  my $frm = $Self->{format}{HeaderFields};
  for (0..$#$frm) {
    $array[$_] = $hash->{$frm->[$_][0]};
  }

  return \@array;
}

####################
# Record conversions

sub _record_h2b {
  my $Self = shift;
  my $hash = shift;
  return undef unless $hash;

  my $array = $Self->_record_h2a($hash);
  my $buf = $Self->_record_a2b($array);

  return $buf;
}

sub _record_a2b {
  my $Self = shift;
  my $array = shift;
  return undef unless $array;

  my $buf = pack($Self->{format}{RecordPack}, @$array);

  if ($Self->{version} >= 11) {
    my $Record = $Self->{format}{RecordFields}[$Self->{format}{RecordNames}{RecordCrc}];
    my $crc = crc32(substr($buf, 0, $Record->[4]));
    substr($buf, $Record->[4]) = pack('N', $crc);
  }

  return $buf;
}

sub _record_b2h {
  my $Self = shift;
  my $buf = shift;
  return undef unless $buf;

  my $array = $Self->_record_b2a($buf);
  my $hash = $Self->_record_a2h($array);

  return $hash;
}

sub _record_b2a {
  my $Self = shift;
  my $buf = shift;
  return undef unless $buf;

  my @array = unpack($Self->{format}{RecordPack}, $buf);

  # check checksum match!
  if ($Self->{version} >= 11) {
    my $Record = $Self->{format}{RecordFields}[$Self->{format}{RecordNames}{RecordCrc}];
    my $crc = crc32(substr($buf, 0, $Record->[4]));
    if ($array[$Record->[3]] != $crc) {
      $Self->{checksum_failure} = 1;
      warn "Record CRC Failure ($Self->{recno}) $array[$Record->[3]] != $crc";
      die "Record CRC Failure ($Self->{recno}) $array[$Record->[3]] != $crc"
        if $Self->{strict_crc};
    }
  }

  return \@array;
}

sub _record_a2h {
  my $Self = shift;
  my $array = shift;
  return undef unless $array;

  my %res;
  my $frm = $Self->{format}{RecordFields};
  for (0..$#$frm) {
    $res{$frm->[$_][0]} = $array->[$_];
  }

  return \%res;
}

sub _record_h2a {
  my $Self = shift;
  my $hash = shift;
  return undef unless $hash;

  my @array;
  my $frm = $Self->{format}{RecordFields};
  for (0..$#$frm) {
    $array[$_] = $hash->{$frm->[$_][0]};
  }

  return \@array;
}

=back

=head1 AUTHOR AND COPYRIGHT

Bron Gondwana <brong@fastmail.fm> - Copyright 2008 FastMail

Licenced under the same terms as Cyrus IMAPd.

=cut

1;
