/* **********************************************************
 * Copyright (C) 2004 VMware, Inc. All rights reserved. 
 * -- VMware Confidential
 * **********************************************************/

/*
 * os.c --
 *
 *      Wrappers for Linux system functions required by "vmdesched".
 *      This allows customers to build their own vmmemctl driver for
 *      custom versioned kernels without the need for source code.
 *
 *      Linux 2.4.x and 2.6.x kernels are supported, with the exception
 *      that SMP kernels without set_cpus_allowed() are not supported.
 *      Normally this means kernels below 2.4.21 are not supported, but
 *      some distribution-specific kernels have this function backported.
 */

#include <linux/config.h>
#ifdef CONFIG_SMP
#define __SMP__
#endif

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/ioport.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <linux/smp_lock.h>
#include <linux/version.h>

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
#error vmdesched module does not support 2.2.x kernels
#endif

#include "compat_kernel.h"
#include "compat_sched.h"
#include "compat_completion.h"

#include "os.h"
#include "vmdesched_core.h"

/*
 * Types
 */

struct os_timer {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   struct timer_list callback;
#else
   struct tq_struct callback;
#endif
};

struct os_thread_data {
   wait_queue_head_t   waitQueue;
   compat_completion   exitCompletion;
   struct task_struct *taskStruct;
   volatile int        wake;           // set by another thread
   os_thread_function  function;
   void               *function_param;
};


/*
 * Globals
 */

static os_thread_data threads[OS_MAX_CPUS];
static struct timer_list globalTimer;


/*
 *----------------------------------------------------------------------------
 *
 * os_global_thread_starter --
 *
 *      This is a generic wrapper for threads created by
 *      os_start_kernel_thread().  It ensures that any post-start
 *      initialization for a kernel thread gets done (specifically, setting up
 *      the task struct).
 *
 * Results:
 *      Never returns.
 *
 * Side effects:
 *      Sets task struct and fires up the thread.
 *
 *----------------------------------------------------------------------------
 */

static int
os_global_thread_starter(void *threadData_v)
{
   int r;
   os_thread_data *threadData = (os_thread_data *) threadData_v;

   threadData->taskStruct = current;
   r = (*threadData->function)(threadData->function_param);
   compat_complete_and_exit(&threadData->exitCompletion, r);
   // NOT_REACHED()
   return 0; // never reached
}


/*
 *----------------------------------------------------------------------------
 *
 * os_global_simple_proc_read_node_callback --
 *
 *      This is a generic wrapper for the simple proc read node callback.
 *      Using it enables us to export a much simpler interface than what Linux
 *      requires for proc node read() callbacks.
 *
 * Results:
 *      Return value of the callback.
 *
 * Side effects:
 *      Fires the callback.
 *
 *----------------------------------------------------------------------------
 */

static int
os_global_simple_proc_read_node_callback(char   *buf,
					 char  **start,
					 off_t   offset,
					 int     count,
					 int    *eof,
					 void   *callback_v)
{
   int r;
   os_simple_proc_read_callback callback =
      (os_simple_proc_read_callback) callback_v;

   r = (*callback)(buf);
   *eof = 1;
   return r;
}


/*
 *----------------------------------------------------------------------------
 *
 * os_dequeue_signal_current --
 *
 *      Acquires the current thread's sigmask lock and dequeues the signal.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      ??
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_dequeue_signal_current(void)
{
   siginfo_t siginfo;

   spin_lock_irq(&current->compat_sigmask_lock);
   compat_dequeue_signal_current(&siginfo);
   spin_unlock_irq(&current->compat_sigmask_lock);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_add_timer_callback --
 *
 *      Add timer callback that fires ASAP.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_add_timer_callback(os_timer_callback callback,
		      void *param)
{
   globalTimer.expires  = jiffies;
   globalTimer.function = (void (*)(unsigned long))callback;
   globalTimer.data     = (unsigned long)param;
   mod_timer(&globalTimer, globalTimer.expires);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_init_timer --
 *
 *      Wrapper for init_timer.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_init_timer(void)
{
   init_timer(&globalTimer);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_del_timer_sync --
 *
 *      Wrapper for del_timer_sync. Removes our global timer (synchronously).
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_del_timer_sync(void)
{
   del_timer_sync(&globalTimer);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_smp_num_cpus --
 *
 *      Returns the number of (active) CPUs.
 *
 * Results:
 *      Number of CPUs.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

unsigned int CDECL
os_smp_num_cpus(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   return num_online_cpus();
#else
   return smp_num_cpus;
#endif
}


/*
 *----------------------------------------------------------------------------
 *
 * os_set_cpu_affinity_current --
 *
 *      Sets the current thread's CPU affinity.
 *
 *      XXX This doesn't work on 2.6.x kernels. See bug 60580.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Changes thread affinity.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_set_cpu_affinity_current(unsigned long cpuMask)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   /*
    * XXX This won't work if there's padding at the beginning of the
    *     cpumask_t structure, or if there are actually more than
    *     sizeof(unsigned long) << 3 CPUs on the host. We assume that neither
    *     of these cases matter...
    */
   cpumask_t kernelCpuMask;
   memset(&kernelCpuMask, 0, sizeof(cpuMask));
   memcpy(&kernelCpuMask, &cpuMask, sizeof(cpuMask));
   set_cpus_allowed(current, kernelCpuMask);
#else
   set_cpus_allowed(current, cpuMask);
#endif
}


/*
 *----------------------------------------------------------------------------
 *
 * os_set_highest_priority_current --
 *
 *      Set the current thread's priority to the max.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_set_highest_priority_current(void)
{
   current->policy      = SCHED_FIFO;
   current->rt_priority = 99;
}


/*
 *----------------------------------------------------------------------------
 *
 * os_signal_pending_current --
 *
 *      See if there's a wakeup signal pending for the current thread.
 *
 * Results:
 *      Non-zero value if there is a signal pending to dequeue.
 *
 * Side effects:
 *      ??
 *
 *----------------------------------------------------------------------------
 */

int CDECL
os_signal_pending_current(void)
{
   return signal_pending(current);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_thread_interruptible_sleep --
 *
 *      Puts the specified thread to sleep until we signal it.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      ??
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_thread_interruptible_sleep(os_thread_data *threadData)
{
   wait_event_interruptible(threadData->waitQueue, threadData->wake);
   threadData->wake = 0;
}


/*
 *----------------------------------------------------------------------------
 *
 * os_thread_wake_up --
 *
 *      Wake up the specified thread.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      ??
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_thread_wake_up(os_thread_data *threadData)
{
   threadData->wake = 1;
   wake_up(&threadData->waitQueue);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_start_kernel_thread --
 *
 *      Starts a new kernel thread.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Fires up a new thread.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_start_kernel_thread(os_thread_data *threadData,
		       os_thread_function func,
		       void *param)
{
   threadData->function       = func;
   threadData->function_param = param;
   kernel_thread(os_global_thread_starter, (void*)threadData, 0);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_thread_stop --
 *
 *      Sends a kill signal to the specified thread, then waits for it to die.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Waits for the thread to die before continuing (I think).
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_thread_stop(os_thread_data *threadData)
{
   kill_proc(threadData->taskStruct->pid, SIGKILL, 1);
   compat_wait_for_completion(&threadData->exitCompletion);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_init_thread_data_ptr --
 *
 *      Our OS-dependent thread-data structs are stored in an array.  The
 *      OS-agnostic code only ever has a pointer to such structs; this
 *      function initializes such a pointer, given an index into the array.
 *
 *      If the index given is out of bounds, no action is taken.
 *
 * Results:
 *      Pointer to initialized thread data structure.
 *
 * Side effects:
 *      Initializes waitqueue and stuff.
 *
 *----------------------------------------------------------------------------
 */

os_thread_data* CDECL
os_init_thread_data_ptr(int id)
{
   if (id < 0 || id >= OS_MAX_CPUS) {
      return NULL;
   }

   init_waitqueue_head(&threads[id].waitQueue);
   compat_init_completion(&threads[id].exitCompletion);
   threads[id].wake = 0;

   return &threads[id];
}


/*
 *----------------------------------------------------------------------------
 *
 * os_sprintf --
 *
 *      Wrapper for kernel's sprintf function.
 *
 * Results:
 *      Return value of vsprintf with the specified arguments.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

int CDECL
os_sprintf(char *str, const char *format, ...)
{
   va_list args;
   va_start(args, format);
   return(vsprintf(str, format, args));
}


/*
 *----------------------------------------------------------------------------
 *
 * os_daemonize --
 *
 *      Wrapper for daemonize() kernel function.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_daemonize(char *name)
{
   compat_daemonize(name);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_create_simple_proc_read_node --
 *
 *      This creates a read-only proc node with a very simple interface. The
 *      big assumption made here is that the given callback will generate no
 *      more than one page of data. If it does, the side effects are undefined
 *      (possibly very bad).
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_create_simple_proc_read_node(char *procNodeName,
				os_simple_proc_read_callback func)
{
   create_proc_read_entry(procNodeName,
			  0,
			  NULL,
			  os_global_simple_proc_read_node_callback,
			  (void*)func);
}


/*
 *----------------------------------------------------------------------------
 *
 * os_remove_simple_proc_node --
 *
 *      Wrapper for remove_proc_entry. Remove the specified proc node entry.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */

void CDECL
os_remove_simple_proc_node(char *procNodeName)
{
   remove_proc_entry(procNodeName, NULL);
}


/*
 *----------------------------------------------------------------------------
 *
 * init_module --
 * cleanup_module --
 *
 *      Our module init and cleanup routines. These need to be in the
 *      OS-dependent code since they're exports, and have to obey the kernel's
 *      calling convention; they are wrappers for our real initialization and
 *      cleanup routines.
 *
 * Results:
 * Side effects:
 *      See VmDesched_Init/Exit().
 *
 *----------------------------------------------------------------------------
 */

int
init_module(void)
{
   return VmDesched_Init();
}

void
cleanup_module(void)
{
   VmDesched_Exit();
}


