#!/usr/bin/perl
#
# This file causes a list of directories to be removed or moved off
# the users home directory into a given other directory. Usually this
# is used to relief NFS home directories of the burden of caches and
# other performance needing directories.
#
# Copyright (C) 2010-2012 by Axel Beckert <beckert@phys.ethz.ch>,
# Department of Physics, ETH Zurich.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see http://www.gnu.org/licenses/.
#

use Modern::Perl;

our $VERSION = '0.3.1.2';

# Configuration variables to be used in configuration files
my $CONFIG = {
    TARGETDIR  => '/tmp',
    FILELAYOUT => '.unburden-%u/%s',
};

# Just show what would be done
my $DRYRUN = undef;

# Undo feature
my $REVERT = 0;

# Defaul base name
my $BASENAME = 'unburden-home-dir';
my $LISTSUFFIX = '.list';

# Load Modules
use Config::File;
use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1;
use File::Path qw(mkpath rmtree);
use File::Basename;
use File::Touch;
use File::Rsync;
use File::Which;
use IO::Handle;
use Data::Dumper;

# Declare and initialise some variables
my %OPTIONS = ();
my $FILTER = undef;
my $UID = getpwuid($<);
my $USE_LSOF = 1;
my $LSOF_CMD = undef;

# Some messages for Getopt::Std
sub VERSION_MESSAGE {
    my ($fh, $getoptpkg, $getoptversion, $cmdlineargs) = @_;

    say $fh "Unburden Home Directory $VERSION\n";

    return;
}

sub HELP_MESSAGE {
    my ($fh, $getoptpkg, $getoptversion, $cmdlineargs) = @_;

    say $fh "Usage: $0 [ -F | -n | -u | -b basename | (-c|-C) conffile | -f filter | (-l|-L) listfile ]
       $0 ( -h | --help | --version )

Options with parameters:

  -b  use the given string as basename instead of $BASENAME.

  -c  read an additional configuration file

  -C  read only the given configuration file

  -f  just unburden those directory matched by the given filter (a perl
      regular expression) -- matches the already unburdened
      directories if used together with -u.

  -l  read an additional list file

  -L  read only the given list file

Options without parameters:

  -F  Do not check if to-be-(re)moved files and directories are still
      in use (aka *F*orce (re)moving).

  -n  dry run (show what would be done)

  -u  undo (reverse the functionality and put stuff back into the home
      directory)

  -h, --help show this help

  --version  show the program's version
";

    return;
}

# Parse command line options
getopts('hnufFb:c:C:l:L:', \%OPTIONS);

for (keys %OPTIONS) {
    when('h') {
	my $fh = IO::Handle->new_from_fd(fileno(STDERR),'w');
	VERSION_MESSAGE($fh);
	HELP_MESSAGE($fh);
	exit 0;
    }
    when('b') { $BASENAME = $OPTIONS{b}; }
}

# By default check for a system wide and a user configuration and list file
my @CONFFILES = ("/etc/$BASENAME", "$ENV{HOME}/.$BASENAME");
my @LISTFILES = ("/etc/$BASENAME$LISTSUFFIX", "$ENV{HOME}/.$BASENAME$LISTSUFFIX");

for (keys %OPTIONS) {
    when('C') {      @CONFFILES = ($OPTIONS{C}); }
    when('c') { push(@CONFFILES,   $OPTIONS{c}); }
    when('L') {      @LISTFILES = ($OPTIONS{L}); }
    when('l') { push(@LISTFILES,   $OPTIONS{l}); }
    when('n') { $DRYRUN   = 1; }
    when('u') { $REVERT   = 1; }
    when('F') { $USE_LSOF = 0; }
    when('f') {
	eval { $FILTER = qr/$OPTIONS{f}/; };
	if ($@) {
	    report_serious_problem("parameter to -f", "\n$@");
	    exit 2;
	}
    }
}

# Check for configuration files and read them
foreach my $configfile (@CONFFILES) {
    if ( -e $configfile ) {
	$CONFIG = { %$CONFIG,
		    %{Config::File::read_config_file($configfile)} };
    }
}

# Fix some values
$UID =~ s/\s+//gs;

# Remove quotes and line-feeds from values
foreach my $key (keys %$CONFIG) {
    chomp($CONFIG->{$key});
    $CONFIG->{$key} =~ s/^([\'\"])(.*)\1$/$2/;
}

# Set proper umask when creating files or directories. Save current
# umask before.
my $OLDUMASK = umask();
umask(077);

# Initialize rsync object
my $rsync = File::Rsync->new({
    archive => 1,
    verbose => 1,
    outfun => sub {
	my $_ = shift;
	chomp;
	say unless m(^sent |^total size|^\s*$);
    },
    errfun => sub { chomp; warn "$_[0]\n"; },
});

# Check for lsof in search path
my $which_lsof = which('lsof');
if (!$which_lsof) {
    warn "WARNING: lsof not found, not checking for files in use.\n";
    $USE_LSOF = 0;
}

sub report_problem {
    warn "WARNING: Can't handle $_[0]: $_[1]";
    return;
}

sub report_serious_problem {
    warn "ERROR: Can't handle $_[0]: $_[1]";
    return;
}

sub move ($$) {
    my ($from, $to) = @_;
    say "Moving $from -> $to";
    unless ($DRYRUN) {
	if (-d $from) {
	    $from .= '/' unless $from =~ m(/$);
	    $to .= '/' unless $to =~ m(/$);

	    my $rc = $rsync->exec({
		src => $from,
		dst => $to,
	    });
	    rmtree($from);
	} else {
	    my $rc = system(qw(mv -v), $from, $to);
	    return !($? >> 8);
	}
    }
    return 1;
}

sub create_symlink_and_parents {
    my ($old, $new) = @_;
    create_parent_directories($new);
    say "Symlinking $new -> $old";
    unless ($DRYRUN) {
	symlink($old, $new)
	    or die "Couldn't symlink $new -> $old: $!";
    }
    return;
}

sub create_parent_directories {
    my $file = shift;
    my $parent_dir = dirname($file);
    unless (-d $parent_dir) {
	say "Create parent directories for $file";
	mkpath($parent_dir, { verbose => 1 }) unless $DRYRUN;
    }
    return;
}

sub possibly_create_non_existing_stuff {
    my ($type, $item, $target) = @_;

    # Shall we create not yet existing directories or files as symlink?
    # Case 1: directory
    if ( $type eq 'D' ) {
	# TODO: Refactor create_symlink_and_parents so that its
	# create_parent_directories call isn't redundant in this case.
	say "Create directory $target and parents";
	mkpath($target, { verbose => 1 }) unless $DRYRUN;
	create_symlink_and_parents($target, $item);
    }

    # Case 2: file
    elsif ( $type eq 'F' ) {
	create_parent_directories($target);
	say "Touching $target";
	touch($target) unless $DRYRUN;
	create_symlink_and_parents($target, $item)
    }
    return 0;
}

sub fix_dangling_links {
    my ($type, $itemexpanded, $target) = @_;
    my $link = readlink($itemexpanded);
    my $is_dir  = type_is_directory($type);
    my $is_file = type_is_file($type);

    # Accept existing symlinks or unburden-home-dir.list entries for
    # directories with or without trailing slash
    if ($is_dir) {
	$link =~ s{/$}{};
	$itemexpanded =~ s{/$}{};
	$target =~ s{/$}{};
    }

    # Check if link target is wanted target
    if ( $link ne $target ) {
	report_problem($itemexpanded, "$link not equal $target");
	return 1;
    }

    # Check if target exists and is same type
    if ( -e $target ) {
	my $unexpected_type = check_for_unexpected_type($type, $target);
	return $unexpected_type if $unexpected_type;
    }
    # Symlink is there, but file or directory not
    else {
	create_object_of_type($type, $target);
    }
    return 0;
}

sub parse_lsof_output {
    my ($output) = @_;
    chomp($output);
    my @lines = split(/\n/, $output);

    my $result = '';
    my $pid;
    my $cmd;

    foreach my $line (@lines) {
	if ($line =~ /^p(.*)$/) {
	    $pid = $1;
	    $cmd = undef;
	} elsif ($line =~ /^c(.*)$/) {
	    $cmd = $1;
	    unless ($pid) {
		report_problem("lsof output", "No pid before command: $line");
		next;
	    }
	    $result .= sprintf("  %5i (%s)\n", $pid, $cmd);
	    $pid = undef;
	} else {
	    report_problem("line in lsof output", $line);
	}
    }

    return $result;

}

sub files_in_use {
    my ($item) = @_;
    my $lsof_output = undef;

    if (-d $item) {
	$lsof_output = `find '$item' -print0 | xargs -0 lsof -F c`;
    } elsif (-f _) {
	$lsof_output = `lsof -F c '$item'`;
    } else {
	report_problem("Not checking for open files in $item: neither file nor directory");
	return;
    }

    my $lsof_parsed = parse_lsof_output($lsof_output);

    if ($lsof_parsed) {
	report_problem($item, "in use, not (re)moving. Process list:\n$lsof_parsed");
	return 1;
    } else {
	return 0;
    }
}

sub action_move {
    my ($itemexpanded, $target) = @_;

    create_parent_directories($target);
    move($itemexpanded, $target)
	or die "Couldn't move $itemexpanded -> $target: $!";
    return;
}

sub action_delete_and_recreate {
    my ($type, $itemexpanded, $target) = @_;

    my $is_file = type_is_file($type);
    my $is_dir  = type_is_directory($type);

    say "Delete $itemexpanded";
    unless ($DRYRUN) {
	$is_dir  and rmtree($itemexpanded, { verbose => 1 }) ;
	$is_file and (unlink($itemexpanded)
		      || die "Couldn't delete $itemexpanded: $!");
    }
    create_object_of_type($type, $target);

    return;
}

sub create_object_of_type {
    my ($type, $target) = @_;

    say "Create $target";
    unless ($DRYRUN) {
	if (type_is_directory($type)) {
	    mkpath($target, { verbose => 1 });
	}
	elsif (type_is_file($type)) {
	    create_parent_directories($target);
	    say "Touching $target";
	    touch($target) || die "Couldn't touch $target: $!";
	}
    }

    return;
}

sub unknown_element {
    my ($what, $unknown) = @_;

    warn "Unknown $what '$unknown'. This should never happen.";
    return 255;
}

sub create_symlink {
    my ($itemexpanded, $target) = @_;

    say "Symlinking $target ->  $itemexpanded";
    unless ($DRYRUN) {
	symlink($target, $itemexpanded)
	    or die "Couldn't symlink $target ->  $itemexpanded: $!";
    }
    return;
}

sub type_is_directory {
    return (lc(shift) eq 'd');
}

sub type_is_file {
    return (lc(shift) eq 'f');
}

sub check_for_unexpected_type {
    my ($type, $itemexpanded) = @_;

    my $is_file = type_is_file($type);
    my $is_dir  = type_is_directory($type);

    unless ($is_file or $is_dir) {
	return unknown_element('type', $type);
    }

    if ($is_file and !-f $itemexpanded) {
	report_serious_problem($itemexpanded,
			       'Unexpected type (not a file)');
	return 1;
    }

    if ($is_dir and !-d $itemexpanded) {
	report_serious_problem($itemexpanded,
			       'Unexpected type (not a directory)');
	return 1;
    }

    return;
}

sub do_it {
    my ($type, $itemexpanded, $target, $action) = @_;

    if ( $USE_LSOF and files_in_use($itemexpanded) ) {
	return 0;
    }

    my $unexpected_type = check_for_unexpected_type($type, $itemexpanded);
    return $unexpected_type if $unexpected_type;

    if ( $action eq 'r' or $action eq 'd' ) {
	action_delete_and_recreate($type, $itemexpanded, $target);
    }
    elsif ( $action eq 'm' ) {
	action_move($itemexpanded, $target);
    }
    else {
	return unknown_element('action', $action);
    }

    create_symlink($itemexpanded, $target);

    return 0;
}

sub calculate_target {
    my $replacement = shift;
    my $target = $CONFIG->{FILELAYOUT};

    $target =~ s|%u|$UID|g;
    $target =~ s|%s|$replacement|g;

    return $CONFIG->{TARGETDIR}."/$target";
}

sub fill_in_wildcard_matches {
    my ($itemglob, $itemexpanded, $target) = @_;

    # Replace %<n> (e.g. %1) with the n-th wildcard match. Uses perl
    # here as it would be too complicated and way less readable if
    # written as (bourne) shell script.

    # Change from globbing to regexp
    $itemglob =~ s/\?/(.)/g;
    $itemglob =~ s/\*/(.*)/g;

    my @result = $itemexpanded =~ m($itemglob)g;

    $target =~ s/\%(\d+)/$result[$1-1]/eg;

    return $target;
}

# Check if the path to something to unburden already contains a symlink
sub symlink_in_path {
    my $path = shift;
    # Remove home directory, i.e. check just from below the home directory
    if ($path =~ s($ENV{HOME}/?)()) {
	# Split up into components, but remove the last one (which we
	# are requested to handle, so we shouldn't check that now)
	my @path_elements = split(m(/), $path);
	pop(@path_elements);

	foreach my $i (0..$#path_elements) {
	    my $path_to_check = $ENV{HOME}.'/'.join('/', @path_elements[0..$i]);
	    #say "Check if $path_to_check is a symlink";
	    return $path_to_check if -l $path_to_check;
	}
	return 0;
    } else {
	report_serious_problem("Can't find home directory ($ENV{HOME}) in $path!");
    }

    return;
}

sub replace {
    # replace $type $i $item $replacement
    my ($type, $itemexpanded, $itemglob, $replacement, $action) = @_;

    # Skip entries where wildcard where passed
    if ($itemexpanded =~ /[][*?]/) {
	warn "Skipping '$itemexpanded' due to unmatched wildcard.\n";
	return 0;
    }

    if (my $symlink = symlink_in_path($itemexpanded)) {
	warn "Skipping '$itemexpanded' due to symlink in path: $symlink\n";
	return 0;
    }

    my $target = fill_in_wildcard_matches($itemglob, $itemexpanded,
					  calculate_target($replacement));

    # Check if the source exists
    if ( ! -e $itemexpanded and ! -l $itemexpanded ) {
	possibly_create_non_existing_stuff($type, $itemexpanded, $target);
    }
    # Check if source is already a symlink
    elsif ( -l $itemexpanded ) {
	fix_dangling_links($type, $itemexpanded, $target);
    }

    # TODO: Check available disk space
    # Should use report_serious_problem

    # No symlink yet, then actually move or remove!
    else {
	do_it($type, $itemexpanded, $target, $action);
    }

    return;
}

sub revert {
    my ($itemexpanded, $item_in_home, $target_glob) = @_;

    # Skip entries where wildcard where passed
    if ($itemexpanded =~ /[][*?]/) {
	warn "Skipping '$target_glob' due to unmatched wildcard.\n";
	return 0;
    }

    $item_in_home = "$ENV{HOME}/" .
	fill_in_wildcard_matches($target_glob, $itemexpanded, $item_in_home);
    say "Trying to revert $itemexpanded to $item_in_home";

    if (-l $item_in_home) {
	my $link_target = readlink($item_in_home);
	if ($itemexpanded eq $link_target) {
	    say "Removing symlink $item_in_home";
	    unlink($item_in_home) unless $DRYRUN;
	    move($itemexpanded, $item_in_home);
	} else {
	    warn "Ignoring symlink $item_in_home as it points to $link_target ".
		 "and not to $itemexpanded as expected.\n";
	}
    }

    return;
}

sub exchange_wildcards_and_replacements {
    my ($wildcard, $replacement) = @_;
    my $i = 1;
    while ($replacement =~ /\%(\d+)/) {
	my $number = $1;
	my $prev = $number-1;
	$wildcard =~ s/^(([^*]*[*?]){$prev}[^*]*)([?*])/"$1\%".$i++/e;
	my $wildcardtype = $3;
	$replacement =~ s/\%(\d+)/$wildcardtype/;
    }
    return ($wildcard, $replacement);
}

for my $list (@LISTFILES) {
    next unless -r $list;

    # Clean up this and that
    open(LIST, '<', $list) or die "Can't open $list: $!";
    while (<LIST>) {
	next if /^#|^ *$/;

	chomp;
	my ($action, $type, $item, $replacement) = split;

	next unless defined $action;

	if (!defined($item) or !defined($replacement)) {
	    warn "Can't parse '$_', skipping...";
	    next;
	}
	unless ( type_is_directory($type) or type_is_file($type) ) {
	    warn "Can't parse type '$type', must be 'd', 'D', 'f' or 'F', skipping...";
	    next;
	}
	if ( $action ne 'd' and $action ne 'r' and $action ne 'm'  ) {
	    warn "Can't parse action '$action', must be 'd', 'r' or 'm', skipping...";
	    next;
	}

	if ( $item =~ m(^(\.\.)?/) ) {
	    warn "$item would be outside of the home directory, skipping...\n";
	    next;
	}

	if ($REVERT) {
	    ($item, $replacement) = exchange_wildcards_and_replacements($item, $replacement);

	    my $replacement_path = calculate_target($replacement);
	    for my $i (glob($replacement_path)) {
		if (defined($FILTER)) {
		    next unless ($i =~ $FILTER);
		}
		revert($i, $item, $replacement);
	    }
	} else {
	    for my $i (glob("$ENV{HOME}/$item")) {
		if (defined($FILTER)) {
		    next unless ($i =~ $FILTER);
		}
		replace($type, $i, $item, $replacement, $action);
	    }
	}
    }
    close(LIST);
}

# Restore original umask
umask($OLDUMASK);
