#! /usr/bin/env perl
eval 'exec perl -S -w $0 ${1+"$@"}'
    if 0;

use strict;
# options-hash: use as $o{optionname} to check for commandline options.
# my %o=( files => 1, needed => 1, libpath => 1, detected => 1, symbols => 1 );
my %o = ( symbols => 1 );
my $help = "perl off_t_problem.pl [directory|files]...\n"
    ."      scans the given files (or all files in a directory) for its\n"
    ."      dynamic dependencies. The binary and all its dependencies\n"
    ."      are classified whether they have been compiled as largefile\n"
    ."      or not - depending on the existance of symbols like plain\n"
    ."      fopen()/lseek() or their 64bit cousins from the transitional\n"
    ."      largefile-API named fopen64()/lseek64() instead. When two\n"
    ."      executable objects have a mismatch then it gets reported!\n"
    ." debug options:\n"
    ."    --files        after parsing commandline, print the list of files\n"
    ."                   that will be checked for largefile mismatch\n"
    ."    --needed       after scanning dynamic imports of the given files\n"
    ."                   print the (long) list of dependencies recognized\n"
    ."                   which wil be scanned too for largefile mismatches\n"
    ."    --libpath      show the libpath that was used to resolve some of\n"
    ."                   of the dependencies if `ldd` was not available\n"
    ."    --symbols      print the number of dynamic symbols found in each\n"
    ."                   object while scanning them (default=ON).\n"
    ."    --detected     for each object that was scanned, print the\n"
    ."                   classification attribute -??- -32- -64- or 3264\n"
    ."                   (along with the dynamic symbols that made this\n"
    ."                   think it is of that largefile type)\n"
    ."    --quiet        suppress the list of classifications printed just\n"
    ."      or           usually before the list of largefile mismatches\n"
    ."    --silent    ...it does also silence some other hints usually\n"
    ."                   printed to the screen (--quit/--no-symbols/--smart)\n"
    ."    --smart        suppress largefile mismatch for a limited set of\n"
    ."                   known dependency libs from which only a known set\n"
    ."                   of algorithm functions is imported (i.e. 'zlib')\n"
    ."    --nonclean     for libraries that might be checked smart, show\n"
    ."                   the first symbol that was thought to be offending.\n"
    ."    --noncleanall  or actually print all the imported symbols from\n"
    ."                   mismatching libs that are not known to be good.\n";

# helper: move to column - the length of the input string is taken current
sub col36                # column and some spaces are printed to STDOUT
{
    my $column = length $_[0];
    return if 36 <= $column;
    return " " x (36 - $column);
}

# ----------------------------------------------------------------------
my %X; my $file; # use as $X{$file}

# this is the implicit libpath, as if used by ld.so to resolve imports..
my @L = ( "/lib", "/usr/lib", "/usr/local/lib");
{   # fill the library path
    my $F = "/etc/ld.so.conf";
    if (open F, "<$F")
    {
	while (<F>) { chomp; push @L; }
	close F;
    }else{
	print STDERR "could not open $F: $!\n";
    }
}

{  # scan the argument list, options and files and dirs, fill %X file-hash ...
    my $old = ""; # pushback of $arg
    my $arg;
    for $arg (@ARGV)
    {
	if ($old =~ /^-L/) { push @L, $arg; $old = ""; next; }
	if ($arg =~ /^--?help/) { print $help; exit 0; }
	if ($arg =~ /^--?(\w[\w-]*)=(.*)/) { $o{$1} = $2; next; }
	if ($arg =~ /^--?no-([a-z].*)/) { $o{$1} = ""; next; }
	if ($arg =~ /^--?([a-z].*)/) { $o{$1} = "*"; next; }
	if ($arg =~ /^-L(.+)/) { push @L, $1; next; }
	if ($arg =~ /^-L/) { $old = $arg; next; }
	if ($arg =~ /^-[A-Z]/) { die "illegal option $arg"; }

	$arg =~ s/\/$//; # chomp dirsep

	# register the file in th %X hash - .dir says where from (debugging)
	if (-f $arg)
	{
	    next if -d $arg or ! -r $arg;
	    $X{$arg}{dir} = $arg;
	    $X{$arg}{dir} =~ s:/[^/]+$::;
	    next;
	}
    
	# when a directory was given, we scan all executables in it
	if (not opendir (D, $arg)) # 
	{
	    print STDERR "could not open directory '$arg': $!\n";
	    next;
	}
	my $entry;
	foreach $entry (readdir (D))
	{
	    my $file = "$arg/$entry";
	    if (-l $file) { $file = readlink $file or next; # try to resolve..
			    $file = "$arg/$file" if $file !~ m:^/:; } 
	    next if -d $file or ! -r $file;
	    my $type = `file $file 2>/dev/null`;
	    next if $type =~ /script/ or $type =~ /text/;
	    # the following call will skip symlinks to real files..
	    # next unless $type =~ /ELF/; # well, we'll see warnings later on..
	    $X{$file}{dir} = $arg;
	}
	closedir (D);
    } # for @ARGV
}

if ($o{libpath}) { # debugging - print @L list if "--libpath" seen
    for $file (@L) {
	print STDERR "-L ",$file,"\n";
    }
}

if ($o{files}) { # debugging - print %X files if "--files" seen
    for $file (sort keys %X) {
	print STDERR $file, " <> ", $X{$file}{dir}, "\n";
    }
}

# some options imply other options...
$o{quiet} = 1 if $o{silent};
$o{smart} = 1 if $o{silent};
$o{symbols} = "" if $o{silent}; # yes, --symbols is ON by default
$o{nonclean} = "*" if $o{noncleanall};

# __________________ detect dynamic library imports _________________

# register library imports in $X{$file}{needed}{*}
for $file (sort keys %X)
{
    print "." if not $o{quiet};
    # `ldd` prints a nice list of import libs and how they resolve
    my $header = "";
    $header = `ldd $file 2>/dev/null` unless $o{noldd}; # "--noldd" option
    $header =~ s{ ^\s+(\S+)\s+[=][>]\s+(\S+) }
    { $X{$file}{needed}{$1} = $2; "" }gmex;

    next if exists $X{$file}{needed};

    # when there was nothing seen by `ldd` then try again with objdump.
    # however, "objdump -p" shows lib imports but not how they resolve...

    $header = `objdump -p $file 2>/dev/null`;
    $header =~ s{ ^\s+NEEDED\s+(\S+) }
    {
	$X{$file}{needed}{$1} = "" unless $1 eq "NEEDED"; ""
    }gmex;

    my $lib;
    for $lib (keys %{$X{$file}{needed}})
    {
	next if length $X{$file}{needed}{$lib};
	my $dir;
	for $dir (@L) # walk -L libpath
	{
	    if (-f "$dir/$lib") 
	    { $X{$file}{needed}{$lib} = "$dir/$lib"; last; }
	}
    }
}   print "\n" if not $o{quiet};

if ($o{needed}) { # debugging - print imports if "--needed" was seen
    for $file (sort keys %X) { my $lib;
	for $lib (sort keys %{$X{$file}{needed}}) {
	    print STDERR $file, " - "; 
	    print STDERR $lib, " => '",$X{$file}{needed}{$lib}, "'\n";
	}
    }
}

# _____________________ classify each object  ___________________________

my %R; my $lib; # use as $R{$lib} - it's a cache storing classifications.

# compare with largefile specs at http://ftp.sas.com/standards/large.file
# differences detected by 64on32bits hints, about section 4 of the
# http://ftp.sas.com/standards/large.file/specs/api+.006.ps

my @base64 = ( "creat64", "open64", "ftw64", "nftw64", "fgetpos64",
	       "fopen64", "freopen64", "fseeko64", "fsetpos64", 
	       "ftello64", "tmpfile64", "mmap64", "fstat64",
	       "lstat64", "stat64", "statvfs64", "fstatvfs64",
	       "lockf64", "lseek64", "ftruncate64", "truncate64",
	       "aio_read64", "aio_write64", "lio_listio64", "aio_erro64",
	       "aio_return64", "aio_cancel64", "aio_suspend64",
	       # these have been seen in the wild as well...
	       "mkstemp64", "tmpfile64", "readdir64", 
	       "pread64", "pwrite64", "sendfile64" );

sub imported
{
    return index ($_[0], "*UND*") >= 0
}

# this routine is run for all %X files and all their $X{$file}{needed}{*}
# dependencies - it stores the information into the %R cache for each one.
sub classifyRlib
{
    my $lib = $_[0];
    my $sym;
   
    # read the dynamic symbol table (slow!) and register in $R{$lib}{sym}{*}
    my $dynamicsymbols = `objdump -T $lib`; 
    $dynamicsymbols =~ s{ ^ (.*) \s+ ([\w_]\w+) \s*$ }
    { $R{$lib}{sym}{$2} = $1; "" }gmex;

    if ($o{symbols} and exists $R{$lib}{sym}) {
	print STDERR " .... ",$lib," ", col36($lib)," ";
	print STDERR scalar %{$R{$lib}{sym}}, " \t(symbols)\n";
    }

    $R{$lib}{_64} = "";
    $R{$lib}{_32} = ""; 
    for $sym (@base64) # foreach known ..64 symbol from the largefile-API
    {
	$sym =~ s/64$//;           next if exists $R{$lib}{sym}{$sym."32"};
	$R{$lib}{_64} .= " ".$sym."64"  if exists $R{$lib}{sym}{$sym."64"};
	$R{$lib}{_32} .= " ".$sym.".."  if exists $R{$lib}{sym}{$sym};
	if (exists $R{$lib}{sym}{$sym} and exists $R{$lib}{sym}{$sym."64"} 
	    and imported($R{$lib}{sym}{$sym})
	    and imported($R{$lib}{sym}{$sym."64"}))
	{ $R{$lib}{import3264} .= " ".$sym."../".$sym."64" }
    }

    return if length $R{$lib}{_32};
    # secondly - if the library/binary is itself _64 and does also export
    # functions in traditional dualmode-style (none/none64) then declare
    # them _32 as well - effectivly classifying it as a 3264 dualmode object
    for $sym (keys %{$R{$lib}{sym}})
    {
	next if $sym !~ /\w[\w_]+\w\w64$/;      # foreach symbol like "\w+64"
        next if $sym =~ /(_int|Int)64$/;        # (with one exception)
	$sym =~ s/64$//;                        # which has a cousin symbol
	next if not exists $R{$lib}{sym}{$sym}; # without the "64" suffix.
	next if imported($R{$lib}{sym}{$sym});

	my $number="";       # sanity check: there is no other symbol with a 
	my $num;             # number suffix, esp. no "${sym}32" or "${sym}65"
	for $num (0..1024)   # but we actually test every number up to 1024
	{
	    next if $num eq "64";
	    next if not exists $R{$lib}{sym}{$sym.$num};
	    $number=$num; last;
	}
	next if length $number and exists $R{$lib}{sym}{$sym.$number};

	# okay, this $lib looks like exporting 3264 dualmode symbols..
	$R{$lib}{_32} = " " x length($R{$lib}{_64}) if ! length $R{$lib}{_32};
	$R{$lib}{_64} .= " ".$sym."64" if exists $R{$lib}{sym}{$sym."64"};
	$R{$lib}{_32} .= " ".$sym.".." if exists $R{$lib}{sym}{$sym};
    }
} 

# the function above was defined as "sub", now let's walk all the binaries
# and imported libraries, and classify whether they are _32 or _64 (or both)
for $file (keys %X)
{
    classifyRlib ($file);
    my $importlib;
    foreach $importlib (keys %{$X{$file}{needed}})
    {
	$lib = $X{$file}{needed}{$importlib};
	next if exists $R{$lib}; # already classified
	classifyRlib ($lib);
    }
} print STDERR "\n" if $o{symbols}; # (done with scanning/reading object files)

# helper: print the classifyRlib result of a given Rlib to STDOUT
sub printRlib
{
    my $lib = $_[0];
    if (length $R{$lib}{_32})
    {
	if (length $R{$lib}{_64})
	{
	    print " 32++ ",$lib," ",col36($lib),$R{$lib}{_32},"\n";
	    print " ++64 ",$lib," ",col36($lib),$R{$lib}{_64},"\n";
	}else{
	    print " -32- ",$lib," ",col36($lib),$R{$lib}{_32},"\n";
	}
    }
    elsif (length $R{$lib}{_64})
    {
	{
	    print " -64- ",$lib," ",col36($lib),$R{$lib}{_64},"\n";
	}
    }else{
	{
	    print " -??-",$lib,"\n";
	}
    }
}

sub Rtyp # helper - subset of above, only 4char classfy-code is returned
{
    my $lib = $_[0];
    if (length $R{$lib}{_32})
    {
	return "3264" if length $R{$lib}{_64};
	return "-32-";
    }
    elsif (length $R{$lib}{_64})
    {
	return "-64-";
    }else{
	return "-??-";
    }
}
		
if ($o{detected}) {    # debugging - print classifyRlib results to
    for $lib (sort keys %R) { # STDOUT if "--detected" was seen
	next if $lib =~ m:.*/libc[.]so[.]\d+$:;
	printRlib ($lib);
    }
}

# _______________________ smart helper function _____________________
# some dependencies should not provoke a mismatch even that the
# libraries themselves do mismatch in their largefile mode - that is
# the case when only algorithm functions are imported that would not
# trigger access to any filedescriptor - `zlib` is a good example.
#
# implementation: for a known set of dependent libraries, we can check
# which symbols have been imported from it. We know about those imports 
# of algorithms that are acceptable. If only these were seen, then the 
# import dependency turns out to be notoffending, i.e. it is "(clean)".
my %goodimports = ( libz => [ "deflate\\w*", "inflate\\w*", 
			      "compress\\w*", "uncompress\\w*",
			      "\\w+32", "zError", "zlibVersion"],
		    # only file-reference: poptReadConfigFile(...,name)
		    libpopt => [ "popt[A-Z](?:\\w(?!File))*" ],
		    libutil => [ "(open|fork)pty", "log(in|out|wtmp|in_tty)" ],
		    ".." => [ "<>" ]);
sub notoffending 
{
    my $bin = $_[0];
    my $lib = $_[1];
    return 0 if not length $R{$bin}{_64};
    return 0 if not length $R{$lib}{_32};
   

    my $library = ""; my $known;
    foreach $known (keys %goodimports)
    {
	next if "/$lib" !~ m:/${known}[.]so\b[^/]*$:;
	$library = $known; last;
    }
    # return 0 if not length $library and not $o{nonclean};

    $library = ".." if not length $library;
    
    my $sym; my $offending = "";
    foreach $sym (keys %{$R{$lib}{sym}})
    {
	next if $R{$lib}{sym}{$sym} =~ /[*]UND[*]/; # $lib imports(!!) it.
	next if $sym =~ /^_\w+_*/;         # compiler symbols / hidden symbols
	next if $sym =~ /^\d/;             # hmmm, does exist sometimes
	next if $sym =~ /^[A-Z_]+[.]\w+/;  # a dot in the middle, "GLIBC_2.1"
	next if $sym =~ /^\s*$/;           # empty, some extra info line

	next if not exists $R{$bin}{sym}{$sym};
	# the symbol is exported(!!) by $lib and it exists in $bin....

	foreach $known (@{$goodimports{$library}})
	{
	    if ($sym =~ /^${known}$/) # it's a known symbol 
	    {	$sym = ""; last;   }  # clean it - it's not offending.
	}
	if (length $sym)
	{ # we have an offending symbol.
	    $offending .= '"'.$sym.'" ';
	    last unless $o{noncleanall};
	}
    }
    return 1 if not length $offending; # imports only known good symbols.
    
    print "$bin ",col36($bin),"(64->>-32).." if $o{nonclean};
    print  $library,".. "                    if $o{nonclean};
    print "(not clean?)\n"                   if $o{noncleanall};
    print  $offending, "\n"                  if $o{nonclean};
    return 0; # found symbols not in the goodlist, return FALSE.
}

# ___________________ show largefile-mode mismatches __________________
# we walk the %X{file}s twice - we check out all the largefile mismatches
# and register them in the %offending hash. When done, then we print the
# Rlib classification of these, so that the reader can have an eyeball
# check if that is actually done right. Finally, go over the list for
# real and print the largefile mismatches - as an extension some of the
# largefile-mismatches are marked "(clean)" when the `notoffending`-helper
# functions knows that the $bin file does not import any symbol from its
# dependency $lib that could trigger some file access. So, even that there
# is a mismatch, it does not matter for there will be no non-largefile-mode
# access to the filesystem effectivly. using "--smart" or "--silent" will
# suppress these lines completely from output to the user screen.
my %offending;
my $T = "";
for $file (keys %X)              # register the largefile mismatches
{    my $importlib;
    for $importlib (keys %{$X{$file}{needed}})
    {
	$lib = $X{$file}{needed}{$importlib};
	next if not length $R{$file}{_32} and not length $R{$file}{_64};
	next if not length $R{$lib}{_32}  and not length $R{$lib}{_32};
	next if length $R{$file}{_64} and length $R{$lib}{_64};
	next if length $R{$file}{_32} and length $R{$lib}{_32}
	and not length $R{$file}{_64};
	# okay: -64->>-64- 3264>>-64- 3264>>3264 and -32->>-32- -32->>3264
	# else: mismatch:  3264>>-32- -64->>-32- and -32->>-64-
	next if $o{smart} and notoffending ($file, $lib);
#	$importlib = ""; $importlib=" (clean)" if notoffending ($file,$lib);
#	print $file," ",col36($file),Rtyp($file),">>",Rtyp($lib)," ",$lib;
#	print $importlib,"\n";
	$offending{$lib} = "";    # register both, so that we'll see the
	$offending{$file} = "";   # Rlib classification of both of them.
    }
     $offending{$file} = "" if exists $R{$file}{import3264};
}

unless ($o{quiet} or $o{q})     # and here we print the Rlib classification
{                               # unless however "--quiet" or "--silent" seen.
    my $mismatch="";
    for $lib (sort keys %offending)
    {	$mismatch="1"; printRlib ($lib); }
    if (not length $mismatch)
    {	print "no largefile mismatch found :-)\n" unless $o{silent};
	exit 0; # note: the last line of this script reads "exit 1" :-)
    }
}

unless ($o{quiet} or $o{q})     # here we show all the miscompiled libraries
{
    my $shown = 0;
    for $lib (sort keys %offending)
    {
	next if not exists $R{$lib}{import3264};
	print $lib,col36($lib)," IMPORTS",$R{$lib}{import3264},"\n";
	$shown++;
    }
    print " WARNING: importing both 32bit and 64bit off_t symbols"
	, " is very very dangerous!" if $shown;
}

for $file (sort keys %X)        # now show the largefile mismatches
{    my $importlib;
    for $importlib (sort keys %{$X{$file}{needed}})
    {
	$lib = $X{$file}{needed}{$importlib};
	next if not length $R{$file}{_32} and not length $R{$file}{_64};
	next if not length $R{$lib}{_32}  and not length $R{$lib}{_32};
	next if length $R{$file}{_64} and length $R{$lib}{_64};
	next if length $R{$file}{_32} and length $R{$lib}{_32}
	and not length $R{$file}{_64};
	# okay: -64->>-64- 3264>>-64- 3264>>3264 and -32->>-32- -32->>3264
	# else: mismatch:  3264>>-32- -64->>-32- and -32->>-64-
	next if $o{smart} and notoffending ($file, $lib);
	$importlib = ""; $importlib=" (clean)" if notoffending ($file,$lib);
	print $file," ",col36($file),Rtyp($file),">>",Rtyp($lib)," ",$lib;
	print $importlib,"\n";
#	$offending{$lib} = "";
#	$offending{$file} = "";
    }
}

exit 1; # there were some offending imports, or so it seems....
