/*
 * Code to proxy syscalls for moo methods.
 *
 * Copyright 2001-2003 by Joey Hess <joey@mooix.net>
 * under the terms of the GNU GPL.
 * 
 * This is a multi-client (but all clients must be running as the same
 * user, as part of the same method call), stateless proxy.
 */

/*
 * The general code path for running a proxied command is this:
 *
 * - get command
 * - validate filename to act on
 * - do command-specific access checks on the file/whatever
 * - return result to client
 */

#include "mood.h"
#include "libmooproxy.h"
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
#include <syslog.h>
#include <assert.h>
#include <sys/wait.h>
#include <libgen.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <sys/socket.h>
#include <sys/un.h>

/* This is used as a sempahore by the sigchld handler to indicate when the
 * child process has exited. */
int child_exit;

extern struct embedded_lang embedded[]; /* from mood.c */

/* Allow any number of children to connect, and handle their requests. */
void proxy (int sock, uid_t myuid, stackinfo *info) {
	struct sockaddr addr;
	socklen_t addrlen = sizeof(addr);
	fd_set fds, readfds;
	int conn;
	int highfd = sock;
	int num, i;
	int num_clients = 0;
	
	FD_ZERO(&fds);
   	FD_SET(sock, &fds);
	
	memcpy(&readfds, &fds, sizeof(fd_set));
	
	//debug("sock: %i", sock);
	
	while (1) {
		//debug("child_exit: %i num_clients: %i", child_exit, num_clients);
		/* If the sole child process has exited, don't select() as
		 * that would hang forever. */
		if (child_exit && num_clients < 1) {
			//debug("child_exit set; no clients left");
			return;
		}
		else {
			child_exit = 0;
		}
			
		/* A -1 might be returned if the process receives a
		 * SIGCHLD, so ignore those. */
		while ((num = select(highfd + 1, &readfds, NULL, NULL, NULL)) == -1) {
			//debug("select == -1");
			if (num_clients < 1)
				return;
			//debug("still clients though, so continuing");
		}
		
		/* Check for new connections. */
		if (FD_ISSET(sock, &readfds)) {
			num--;
			if ((conn = accept(sock, &addr, &addrlen)) != -1) {
				FD_SET(conn, &fds);
				if (highfd < conn)
					highfd = conn;
				num_clients++;
				//debug("new client: #%i fd: %i", num_clients, conn);
			}
		}

		/* Handle existing connections. */
		for (i = 0; num > 0 && i <= highfd; i++) {
			if (i != sock && FD_ISSET(i, &readfds)) {
				num--;
				if (proxycommand(i, info, 0) == -1) {
					//debug("failed to proxy command %i", i);
					/* Close connection. */
					FD_CLR(i, &fds);
					close(i);
					if (highfd == i)
						highfd--;
					num_clients--;
					if (num_clients < 1) {
						//debug("returning on no clients");
						return;
					}
				}
			}
		}

		memcpy(&readfds, &fds, sizeof(fd_set));
	}
}

/*
 * Signal handler to for SIGCHLD, waits for child pid to terminate and
 * returns its exit code to the client.
 *
 * This is also called for stopped children, and so it must not hang if the
 * child still running.
 */
pid_t child_pid;
int client_handle;
void childhandler(int signum) {
	int status = 0;

	if (signum != SIGCHLD)
		return;
	
	if (waitpid(child_pid, &status, WUNTRACED) != -1) {
		//debug("child exit: %i", child_pid);
		if (WIFSTOPPED(status)) {
			signal(SIGCHLD, childhandler);
			return;
		}
		result(client_handle, status, 0);
		close(client_handle);
		child_pid = 0;
		child_exit = 1;
	}
}

/* Given an entire file or at least its first line, parses out hashbang line,
 * returning the executable to run. */
char *hashbang(char *line) {
	char *s;

	/* Find hashbang and skip over it and optional whitespace. */
	if (line[0] != '#' && line[1] != '!')
		return NULL;
	line=line+2;
	while(line[0] == ' ')
		line++;

	/* Find end of executable name -- either next whitespace or end of
	 * line. */
	s = strchr(line, ' ');
	if (s == NULL)
		s = strchr(line, '\n');
	if (s == NULL)
		return NULL; /* buffer must be too small */
	/* Ignore remainder of line. */
	s[0] = '\0';
	
	return line;
}

/* The exec call requires: newcallstack, mooix_debug, command, arg, ... ;
 * fds: cwd, stdin, stdout, stderr */
void execv_handler (stackinfo *callerinfo, int client, 
		    int argc, char **argv, uid_t myuid) {
	int newfds[3];
	int childsocket;
	char sockfile[64];
	char *mooix_debug = NULL;
	stackinfo *info;
	char *method, *this, *tmp;
	enum method_type methtype;
	struct stat st_buf;
	struct callstack *tmpnext;
	FILE *f;

	/* Some more parameter validation that runcommand doesn't do. */
	rassert(argc > 2);
	rassert(argv[2] != NULL);

	/* It will never be null, but if it is empty, do not set debugging
	 * on. */
	if (argv[1][0] != '\0')
		mooix_debug=argv[1];
	
	/* We're alredy cd'd into the object, so this will be its directory
	 * name. */
	this = getcwd(NULL, 0); /* FIXME: NULL is a gnuism */
	
	/* Stat the object's directory for sanity checks, and to see who
	 * owns it (used below) */
	rassert(fstat(THISFD, &st_buf) != -1);
	/* It's probably not a good idea to let folks turn /tmp
	 * into a mooix object.  */
	if ((st_buf.st_mode & S_ISVTX) == S_ISVTX)
		die("refusing to run method in sticky object directory");
	
	/* Set up info structure for method. */
	info = stackinfo_create();
	info->uid = myuid;
	/* Get a stack of the object and all of its parents. */
	info->stack = object_parents_stack(THISFD, 1); /* cd's to somewhere.. */
	info->stack->method = argv[2];
	assert(info->stack != NULL);
	
	/* Get fd's. */
	newfds[0] = getfd(client); 
	newfds[1] = getfd(client);
	newfds[2] = getfd(client);
	rassert(newfds[0] != -1 && newfds[1] != -1 && newfds[2] != -1);
	/* Connect to stdio fds. */
	rassert(dup2(newfds[0], 0) == 0);
	close(newfds[0]);
	rassert(dup2(newfds[1], 1) == 1);
	close(newfds[1]);
	rassert(dup2(newfds[2], 2) == 2);
	close(newfds[2]);
	
	/* Validate method (cd's to object that defines method). */
	if ((methtype = get_method_type(argv[2])) == invalid) {
		warn("invalid method %s called", argv[2]);
		result(client, -1, EINVAL);
		exit(0);
	}
	tmp = strdup(argv[2]);
	method = basename(tmp);
	
	/* Passing a file with an callstack in it can only be done by
	 * mooadmin. If this is done, the passed stack is used instead of
	 * the caller's stack. */
	if (argv[0][0] != '\0') {
		if (callstack_is_subset(callerinfo->stack, mooadminstack)) {
			//debug("loading stack %s", argv[0]);
			callerinfo->stack = callstack_load(argv[0], 0);
		}
		else {
			warn("rejected attempt to set stack to %s", argv[0]);
			result(client, -1, EPERM);
			exit(0);
		}
	}

	/* If the method is stackless, then the caller's stack is added
	 * after a stack boundry. Otherwise, it is just added. */
	if (methtype == normal) {
		/* postpone expensive calculation */
		info->important_ok = unknown;
	}
	else if (methtype == stackless) {
		/* important_ok on by definition */
		info->important_ok = true;
		if (callerinfo->stack)
			callerinfo->stack->boundry = 1;
	}
	else {
		die("unknown method type");
	}
	info->callerstack = callerinfo->stack;
	/* Save a new stack that has this object on top (without all of its
	 * parents), and then the callerstack. */
	f = fopen(callstack_file(myuid), "w");
	rassert(f != NULL);
	tmpnext = info->stack->next;
	info->stack->next = NULL;
	callstack_dump(info->stack, f);
	info->stack->next = tmpnext;
	callstack_dump(callerinfo->stack, f);
	fclose(f);
	
	/* Set up communications socket. */
	snprintf(sockfile, 64, "%s/%i.sock", RUNDIR, myuid);
	childsocket = bindsocket(sockfile, 0600);
	rassert(chown(sockfile, myuid, 0) != -1);

	/* Install handler before forking child to prevent races. */
	client_handle = client;
	child_exit = 0;
	signal(SIGCHLD, childhandler);
	
	profile("start", this, argv[2]);
	
	rassert((child_pid = fork()) != -1);
	if (child_pid == 0) {
		/* child */
		char firstline[32];
		char *interp = NULL;
		int i;
		FILE *f;
	
		/* Switch uid and group to myuid, and drop any other groups. */
		if (setgroups(0, NULL) == -1 || 
		    setgid(myuid) == -1 ||
		    setuid(myuid) == -1)
			die("error dropping permissions: %m");

		close(childsocket);
#ifdef PROFILE
		fclose(profilef);
#endif
		signal(SIGCHLD, SIG_DFL);
		
		/* Needed for regular methods, but also for embedded langs
		 * in case it execs something. */
		setenv("LD_PRELOAD", libmooproxy, 1);

		/* Load in first line of file, to check its interpreter. */
		f = fopen(method, "r");
		if (f != NULL) {
			if (fgets(firstline, 31, f))
				interp = hashbang(firstline);
			fclose(f);
		}
		
		if (interp) {
			for (i = 0; embedded[i].run_func != 0; i++) {
				if (strcmp(interp, embedded[i].interp) == 0) {
					debug("running %s with embedded %s", method, interp);
					mooix_proxy_enable(1); /* turn proxying on for method */
					rassert(fchdir(THISFD) == 0);
					(*embedded[i].run_func)(argc - 2, argv + 2, this, sockfile, method, mooix_debug);
					if (embedded[i].end_func != 0) {
						(*embedded[i].end_func)();
					}
					exit(0);
				}
			}
		}
			
		/* no embedded interpreter found */
		debug("running %s with execv", method);
		run_noembed(argc - 2, argv + 2, this, sockfile, method, mooix_debug);
		result(client, -1, errno);
		exit(-1);
	}
	else {
		/* parent */
	
		/* Drop root perms; it is sufficient to run with user and
		 * group set to whatever user owns the object the method
		 * will be accessing. But, if the object is owned by root,
		 * we refuse to run as root, instead dropping back to the
		 * mooadmin user. */
		rassert(setgroups(0, NULL) == 0);
		rassert(setgid(st_buf.st_gid) == 0);
		if (st_buf.st_uid == 0) {
			rassert(setuid(mooadminuid) == 0);
		}
		else {
			rassert(setuid(st_buf.st_uid) == 0);
		}
	
		/* Close stdin and out, because they are unneeded now, and
		 * the child needs to be able to close stdout to let its
		 * parent know it's done reporting results, which won't
		 * work if we still have it open. */
		close(0);
		close(1);
		
		/* Go to THISFD; every file we will act on from here out is
		 * in there. */
		rassert(fchdir(THISFD) == 0);
		
		/* Now just wait for a SIGCHLD while proxying children's
		 * requests. This returns when there are no more children. */
		proxy(childsocket, myuid, info);

		profile("stop", this, argv[2]);
		
		if (child_pid != 0) {
			/* If it gets here then the child process has
			 * exited, but the SIGCHILD has not quite yet been
			 * delivered. But it will be soon. So just sleep
			 * waiting for it. Note that it may be delivered
			 * in between the above if and entering the loop,
			 * which is why pause() cannot be used.
			 */
			if (! child_exit) {
				while (! child_exit)
					sleep(1);
			}
		}
	}
}

/* Helper function for kill_handler. */
struct callstack *kill_obj; /* the object that is to be killed. */
int kill_sig; /* the signal to send. */
uid_t no_kill_uid; /* don't kill methods running in this uid */
void kill_helper (uid_t uid) {
	pid_t pid;
	struct callstack *s;

	if (! validuid(uid)) {
		//debug("refusing to signal bad uid %i", uid);
		return;
	}
	if (no_kill_uid == uid)
		return;
	
	s = callstack_load(callstack_file(uid), 1); /* load only top */
	/* A null stack is possible, if the moo removed it because the
	 * process died, or if we loaded it when the moo was writing it. */
	if (s == NULL)
		return;
	
	if (s->dev == kill_obj->dev && s->inode == kill_obj->inode) {
		/* Fork off a child process that will assume the method's
		 * uid, and send the signal. */
		pid = fork();
		assert(pid != -1);
		if (pid == 0) {
			if (setuid(uid) != -1)
				kill(-1, kill_sig);
			exit(0);
		}
		waitpid(pid, NULL, 0);
	}
}

/* The kill command, if given 0, sends a signal to every running method
 * of the object. If given a positive uid, signals the method running as
 * that uid. Negative numbers are not supported.
 * 
 * NOTE: No guarantees are made about racing; if an object's method is just
 * starting when this is called, it may avoid being signaled. */
void kill_handler (stackinfo *callerinfo, int client, int argc, char **argv) {
	int ret = -1;
	uid_t uid = atoi(argv[0]);
	stackinfo *info;
	
	/* Load up the stack for the object we're run on. */
	info = stackinfo_create();
	info->stack = object_parents_stack(THISFD, 1);
	assert(info->stack != NULL);
	info->callerstack = callerinfo->stack;
	info->important_ok = unknown;
	
	if (uid < 0 || ! can_signal(info)) {
		result(client, ret, errno);
		return;
	}

	/* Globals for kill_helper. */
	kill_sig = atoi(argv[1]);
	kill_obj = info->stack;

	if (uid == 0) {
		/* Kill every method of object except the caller. */
		no_kill_uid = callerinfo->uid;
		
		/*
		 * Now to send the signal. The question is, which methods
		 * to send it to? The only current way to find out is to
		 * iterate over all the callstack files, looking at the
		 * topmost entry to see if it matches the object, and if so
		 * sending the signal.
		 */
		callstack_walk(&kill_helper);

		ret = 0;
		errno = 0;
	}
	else {
		/* Kill a single method by given uid. */
		no_kill_uid = 0;
		kill_helper(uid);
		
		ret = 0;
		errno = 0;
	}
	
	result(client, ret, errno);
}

/* Ok this is not a regular unix syscall, but the clients can use it to 
 * request a full destruction of an object. This includes removing its
 * directory, and kill all running methods. */
void destroy_handler (stackinfo *callerinfo, int client, int argc, char **argv) {
	int ret = -1;
	stackinfo *info;
	
	/* Load up the stack for the object we're run on. */
	info = stackinfo_create();
	info->stack = object_parents_stack(THISFD, 1);
	assert(info->stack != NULL);
	info->callerstack = callerinfo->stack;
	info->important_ok = unknown;
	
	if (can_unlink(info, MOOFILE) && can_rmdir(info, ".")) {
		char *buf = malloc(PATH_MAX * sizeof(char));
		buf = realpath(".", buf);
		rassert(buf != NULL);
		/* Remove parent link, if it exists. */
		unlink("parent");
		/* Remove .mooix file and object directory. */
		if (unlink(MOOFILE) != 0 ||
		    rmdir(buf) != 0)
			result(client, -1, errno);
		free(buf);
		/* kill -9 all methods of the object, including the running
		 * method. The running method is killed too just to prevent
		 * evil objects from appearing to destroy themselves, but
		 * keeping a method running. */
		kill_sig = SIGKILL;
		kill_obj = info->stack;
		no_kill_uid = 0; /* kill em all */
		callstack_walk(&kill_helper);
		
		ret = 0;
		errno = 0;
	}
	result(client, ret, errno);
}

/* The open call requires: filename, flags, mode */
/* Mode should already be modified by caller's umask. */
void open_handler (stackinfo *info, int client, int argc, char **argv) {
	int fd = -1;
	char *filename = argv[0];
	int flags = atoi(argv[1]);
	mode_t mode = atoi(argv[2]);
	
	/* Only proxy opening of files that are in this object. */
	rassert(is_object_file(filename));
	if (can_open(info, filename, flags, mode)) {
		umask(0); // mode already modified by caller's umask
		errno = 0;
		/* O_NOFOLLOW closes many potential symlink attacks */
		fd = open(filename, flags | O_NOFOLLOW, mode);
	}
	result(client, fd, errno);
	if (fd != -1) {
		sendfd(client, fd);
		close(fd);
	}
}

/* The unlink call requires: filename */
void unlink_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	char *filename = argv[0];
	
	rassert(is_object_file(filename));
	
	if (can_unlink(info, filename)) {
		errno = 0;
		ret = unlink(filename);
	}
	result(client, ret, errno);
}

/* The symlink command requires: oldpath, newpath */
void symlink_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	char *oldpath = argv[0];
	char *newpath = argv[1];

	rassert(is_object_file(newpath));
	
	if (can_create(info, newpath)) {
		errno = 0;
		ret = symlink(oldpath, newpath);
	}
	result(client, ret, errno);
}

/* The mkdir command requires: pathname, mode */
/* Mode should already be modified by the caller's umask. */
void mkdir_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	char *pathname = argv[0];
	mode_t mode = atoi(argv[1]);
	
	rassert(is_object_file(pathname));
	
	if (! valid_dir_mode(info, mode)) {
		errno = EINVAL;
	}
	else if (can_create(info, pathname)) {
		umask(0); // mode already modified by caller's umask
		errno = 0;
		ret = mkdir(pathname, mode);
	}
	result(client, ret, errno);
}

/* The rmdir command requires: dirname */
void rmdir_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	char *dirname = argv[0];

	rassert(is_object_file(dirname));

	if (can_rmdir(info, dirname)) {
		errno = 0;
		ret = rmdir(dirname);
	}
	result(client, ret, errno);
}

/* The chmod command requires: path, mode */
void chmod_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	char *path = argv[0];
	mode_t mode = atoi(argv[1]);
	int fd;
	
	rassert(is_object_file(path));
	
	/* To avoid attacks, this actually does a fchmod, after
	 * opening the file with O_NOFOLLOW. The only catch is that this
	 * requires that the file be readable in the first place. */
	fd = open(path, O_RDONLY | O_NOFOLLOW);
	if (fd != -1) {
		if (can_chmod(info, path, mode))
			ret = fchmod(fd, mode);
		close(fd);
	}
	result(client, ret, errno);
}

/* The rename command requires: oldpath, newpath */
void rename_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	char *oldpath = argv[0];
	char *newpath = argv[1];

	rassert(is_object_file(oldpath));
	rassert(is_object_file(newpath));
	
	if (can_create(info, newpath) && can_unlink(info, oldpath)) {
		errno = 0;
		ret = rename(oldpath, newpath);
	}
	result(client, ret, errno);
}

/* The connect command requires: socket (fd) and socket path */
void connect_handler (stackinfo *info, int client, int argc, char **argv) {
	int ret = -1;
	int sockfd = getfd(client);
	char *path = argv[0];

	rassert(sockfd != -1);
	rassert(is_object_file(path));
	
	if (can_open(info, path, O_RDWR, 0)) {
		struct sockaddr_un sunx;
		errno = 0;
		memset(&sunx, 0, sizeof(sunx));
		sunx.sun_family = AF_UNIX;
		strncpy(sunx.sun_path, path, sizeof(sunx.sun_path));
		ret = connect(sockfd, (struct sockaddr *) &sunx, 
		              sizeof(sunx.sun_family) + strlen(sunx.sun_path));
	}
	result(client, ret, errno);
}

/* This is the dispatch table for commands. */
struct commands_t {
   	void (*handler)(); /* the handler function */
	signed int argc; /* number of args; -1 means don't check */
	int privs; /* must the command only be run as root? */
} commands[] = {
	/* Note that order in the table must match the number
	 * defined for the command. So put them in the same order as the
	 * commands are listed in the enum in mooix.h. */
	{execv_handler,        -1, 1},  // command_execv
	{kill_handler,          2, 1},  // command_kill
	{destroy_handler,       0, 1},  // command_destroy
	{open_handler,          3, 0},  // command_open
	{unlink_handler,        1, 0},  // command_unlink
	{symlink_handler,       2, 0},  // command_symlink
	{mkdir_handler,         2, 0},  // command_mkdir
	{rmdir_handler,         1, 0},  // command_rmdir
	{chmod_handler,         2, 0},  // command_chmod
	{rename_handler,        2, 0},  // command_rename
	{connect_handler,	1, 0},  // command_connect
};

/* 
 * Read a command from a client and handle it.
 *
 * This runs in one of two ways. If the uid is set, it is being run as
 * root, and it runs only those commands which need to be run in privliged
 * mode.  The uid is a preallocated uid that it may use if it needs to run
 * a method. It will be passed to the handler function.  If the uid is not
 * set, it is being run as a user, and it runs only the commands that can
 * be run as users.
 *
 */
int proxycommand (int client, stackinfo *info, int uid) {
	struct command_t *cmd = getcommand(client);
	
	/* This means the other end hung up. */
	if (cmd == NULL) {
		//debug("child hangup");
		return -1;
	}
	
	/* Validate and dispatch command. */
	rassert(cmd->type < command_last);
	rassert(commands[cmd->type].handler != NULL);
	if (commands[cmd->type].argc > -1) {
		rassert(commands[cmd->type].argc == cmd->argc);
	}
	if (uid > 0) {
		rassert(commands[cmd->type].privs);
		(commands[cmd->type].handler)(info, client, cmd->argc, cmd->argv, uid);
	}
	else {
		rassert(! commands[cmd->type].privs);
		(commands[cmd->type].handler)(info, client, cmd->argc, cmd->argv);
	}
	
	freecommand(cmd);
	return 0;
}
