/* **********************************************************
 * Copyright (C) 1998-2000 VMware, Inc.
 * All Rights Reserved
 * **********************************************************/
#ifdef VMX86_DEVEL
char rcsId_driver[] = "$Id: driver.c,v 1.4 2003/02/16 15:32:34 bad Exp $";
#else
#   define FILECODE "F(300)"
#endif

#include "driver-config.h"
/*
 * hostif.h must come before meaningful linux headers, meaningful is
 * defined as anything which includes asm/page.h or else this could happen
 *    someone includes <asm/page.h>
 *    someone now includes "hostif.h" ... the plot thickens
 *      vmmon/common/hostif.h
 *      #ifdef KERNEL_2_1
 *      # include "vm_types.h"
 *      # undef PAGE_SIZE
 *      # undef PAGE_MASK
 *      # undef PAGE_OFFSET
 *      # undef APIC_ID_MASK
 *      oops, we already included asm/page.h, our compile will fail :-(
 *      # include <asm/page.h>
 */
#include "hostif.h"

#ifdef KERNEL_2_1
#define EXPORT_SYMTAB
#endif

extern int errno;
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/file.h>
#ifdef KERNEL_2_2
#   include <linux/slab.h>
#else
#   include <linux/malloc.h>
#endif
#include <linux/interrupt.h>
#include <linux/wrapper.h>

#ifdef __SMP__
#include <linux/smp.h>
#include <linux/smp_lock.h>
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,5)
# define spin_lock_bh(lock) do { \
     start_bh_atomic(); \
     spin_lock(lock); \
  } while(0)
# define spin_unlock_bh(lock) do { \
     spin_unlock(lock); \
     end_bh_atomic(); \
  } while(0)
#endif


#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0)
# include <linux/poll.h>
#endif

#ifdef DO_APM
#include <linux/apm_bios.h>
#endif

#define __KERNEL_SYSCALLS__
#include <asm/unistd.h>
#include <asm/io.h>

#include "driver.h"
#include "x86.h"
#include "vm_types.h"
#include "vm_assert.h"
#include "modulecall.h"
#include "vm_asm.h"
#include "vmx86.h"
#include "initblock.h"
#include "task.h"
#include "speaker_reg.h"
#include "vtrace.h"
#include "memtrack.h"
#include "task.h"
#include "cpuid.h"

#ifdef VMX86_DEVEL
#include "private.h"
#endif

#ifdef USE_PERFCOUNTERS
#include "perfctr.h"
#endif

#ifdef SUPPORT_PASSTHROUGH
#include "passthrough.h"	// _driver_ version of passthrough.h
#endif

#include "compat_file.h"
#include "compat_wait.h"
#include "compat_uaccess.h"
#include "compat_wrapper.h"
#include "compat_pgtable.h"
#include "vmmonInt.h"

int errno;       // required for _exit()
static void LinuxDriverQueue(VMLinux *vmLinux);
static void LinuxDriverDequeue(VMLinux *vmLinux);
static Bool LinuxDriverCheckPadding(void);

#ifdef DO_APM
static int LinuxDriverAPMCallback(apm_event_t event);
static int LinuxDriverAPMstate = APM_STATE_READY;
#endif

#ifndef SUPPORT_LINUXVMWARE
#define VMWare_SetVTracer(VTrace_Set)
#endif

#ifdef VMX86_DEVEL
#define TAGNAME "(development)"
#else
#define TAGNAME "$Name: vmware-netbsd-3-2-0 $"
#endif

struct VMXLinuxState linuxState;


/*
 *----------------------------------------------------------------------
 *
 * Device Driver Interface --
 *
 *      Runs the VM by implementing open/close/ioctl functions
 *
 *
 *----------------------------------------------------------------------
 */
static int LinuxDriver_Open(struct inode * inode, struct file * filp);

static int LinuxDriver_Ioctl( struct inode *inode, struct file *filp,
         u_int iocmd, unsigned long ioarg );


#ifdef KERNEL_2_1
static int LinuxDriver_Close( struct inode *inode, struct file *filp );
static unsigned int LinuxDriver_Poll(struct file *file, poll_table *wait);
#else
static void LinuxDriver_Close( struct inode *inode, struct file *filp );
static int LinuxDriver_Poll(struct inode *inode, struct file *filp,
                            int sel_type, select_table *wait);
#endif

static void LinuxDriverSelectTimeout(VMLinux *vmLinux);

static struct file_operations vmuser_fops;


/*
 *----------------------------------------------------------------------
 *
 * VMX86_RegisterMonitor --
 *
 *      (debugging support) Should be the first function of this file
 *
 * Results:
 *
 *      Registers the module.
 *      /sbin/ksyms -a | grep VMX86_RegisterMonitor will return the base
 *      address of that function as loaded in the kernel.
 *
 *      Since this is the first function of the kernel module,
 *      every other symbol can be computing by adding the base
 *      to the output of nm.
*
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */
int VMX86_RegisterMonitor(int);

#ifdef KERNEL_2_1
EXPORT_SYMBOL(VMX86_RegisterMonitor);
#else
static struct symbol_table vmx86_syms = {
#include <linux/symtab_begin.h>
   X(VMX86_RegisterMonitor),
#include <linux/symtab_end.h>
};
#endif

int
VMX86_RegisterMonitor(int value)
{
   printk("/dev/vmmon: RegisterMonitor(%d) \n",value);
   return 1291;
}

/*
 *----------------------------------------------------------------------
 *
 * init_module --
 *
 *      linux module entry point. Called by /sbin/insmod command
 *
 * Results:
 *      registers a device driver for a major # that depends
 *      on the uid. Add yourself to that list.  List is now in
 *      private/driver-private.c.
 *
 *----------------------------------------------------------------------
 */
int
init_module(void)
{
   int retval;

   if (!LinuxDriverCheckPadding()) {
      return -ENOEXEC;
   }

   HostIF_InitSpinLock();
   CPUID_Init();
   Task_ProbeSysenterMSR();

   /* Initialize the file_operations structure. Because this code is always
      compiled as a module, this is fine to do it here and not in a static
      initializer --hpreg */
   memset(&vmuser_fops, 0, sizeof(vmuser_fops));
   compat_fop_set_owner(&vmuser_fops);
   compat_fop_set_poll(&vmuser_fops, LinuxDriver_Poll);
   vmuser_fops.ioctl = LinuxDriver_Ioctl;
   vmuser_fops.open = LinuxDriver_Open;
   vmuser_fops.release = LinuxDriver_Close;

#ifdef VMX86_DEVEL
   devel_init_module();
   linuxState.minor = 0;
   retval = register_chrdev(linuxState.major, linuxState.deviceName,
			    &vmuser_fops);
#else
   sprintf(linuxState.deviceName, "vmmon");
   linuxState.major = 10;
   linuxState.minor = 165;
   linuxState.misc.minor = linuxState.minor;
   linuxState.misc.name = linuxState.deviceName;
   linuxState.misc.prev = 0;
   linuxState.misc.next = 0;
   linuxState.misc.fops = &vmuser_fops;

   retval = misc_register(&linuxState.misc);
#endif

   if (retval) {
      Warning("Module %s: error registering with major=%d minor=%d tag=%s\n",
	      linuxState.deviceName, linuxState.major, linuxState.minor,
	      TAGNAME);
      return -ENOENT;
   }
   Log("Module %s: registered with major=%d minor=%d tag=%s\n",
       linuxState.deviceName, linuxState.major, linuxState.minor, TAGNAME);

#ifndef KERNEL_2_1
   retval = register_symtab(&vmx86_syms);
#endif

   /*
    * Snap shot the time stamp counter and the real time so we
    * can later compute an estimate of the cycle time.
    */

   Vmx86_SetStartTime(&linuxState.startTime);

   Log("Module %s: initialized\n", linuxState.deviceName);

   return 0;
}

/*
 *----------------------------------------------------------------------
 *
 * cleanup_module --
 *
 *      Called by /sbin/rmmod
 *
 *
 *----------------------------------------------------------------------
 */

int cleanup_module(void)
{
   int retval;

   /*
    * XXX smp race?
    */
#ifdef VMX86_DEVEL
   retval =  unregister_chrdev(linuxState.major, linuxState.deviceName);
#else
   retval = misc_deregister(&linuxState.misc);
#endif

   if (retval) {
      Warning("Module %s: error unregistering\n", linuxState.deviceName);
   } else {
      Log("Module %s: unloaded\n", linuxState.deviceName);
   }
   return 0;
}



/*
 *----------------------------------------------------------------------
 *
 * LinuxDriver_Open  --
 *
 *      called on open of /dev/vmmon or /dev/vmx86.$USER. Use count used
 *      to determine eventual deallocation of the module
 *
 * Side effects:
 *     Increment use count used to determine eventual deallocation of
 *     the module
 *
 *----------------------------------------------------------------------
 */
static int
LinuxDriver_Open(struct inode *inode,
                 struct file *filp)
{
   VMLinux *vmLinux;

   compat_mod_inc_refcount;

   vmLinux = kmalloc(sizeof(*vmLinux), GFP_KERNEL);
   if (vmLinux == NULL) {
      compat_mod_dec_refcount;
      return -ENOMEM;
   }
   memset(vmLinux, 0, sizeof(*vmLinux));
   init_waitqueue_head(&vmLinux->selectQueue);
   vmLinux->selectTimer.data = (unsigned long) vmLinux;
   vmLinux->selectTimer.function =
      (void (*)(unsigned long)) LinuxDriverSelectTimeout;

   spin_lock_init(&vmLinux->selectLock);

   spin_lock_init(&vmLinux->lock4Gb);
   /*
    * We are opening the struct file here, so clearly no other process uses it
    * yet, and we do not need to hold the lock --hpreg
    */
   vmLinux->list4Gb.next = &vmLinux->list4Gb;

   filp->private_data = vmLinux;
   LinuxDriverQueue(vmLinux);

#ifdef SUPPORT_LINUXVMWARE
   VMWare_SDumpInit();
   VMWare_SDumpSend("Real men use serial ports! \n");
#endif

#ifdef DO_APM
   apm_register_callback(&LinuxDriverAPMCallback);
#endif

   return 0;
}


/*
 *-----------------------------------------------------------------------------
 *
 * LinuxDriverDestructor4Gb --
 *
 *    Destructor for a range --hpreg
 *
 * Results:
 *    None
 *
 * Side effects:
 *    None
 *
 *-----------------------------------------------------------------------------
 */

static void
LinuxDriverDestructor4Gb(MemRange *that) // IN
{
   struct page *page;
   struct page const *pageEnd;
   int log2NumPages;

   ASSERT(that);

   /*
    * We must unreserve the pages first otherwise free_pages() will refuse to
    * do its job --hpreg
    */
   page = virt_to_page(that->kAddr);
   pageEnd = page + that->numPages;
   for (; page < pageEnd; page++) {
      compat_mem_map_unreserve(page);
   }

   log2NumPages = compat_get_order(that->numPages << PAGE_SHIFT);
   free_pages(that->kAddr, log2NumPages);

   kfree(that);
}


/*
 *----------------------------------------------------------------------
 *
 * LinuxDriver_Close  --
 *
 *      called on close of /dev/vmmon or /dev/vmx86.$USER, most often when the
 *      process exits. Decrement use count, allowing for possible uninstalling
 *      of the module.
 *
 *----------------------------------------------------------------------
 */

#ifdef KERNEL_2_1
static int
#else
static void
#endif
LinuxDriver_Close(struct inode * inode,
                  struct file * filp)
{
   VMLinux *vmLinux;
   MemRange *head;
   MemRange *first;

   vmLinux = (VMLinux *)filp->private_data;
   ASSERT(vmLinux);
retry:
   spin_lock_bh(&vmLinux->selectLock);
   if (vmLinux->selectWaiting) {
      if (!del_timer(&vmLinux->selectTimer)) {
         spin_unlock_bh(&vmLinux->selectLock);
         goto retry;
      }
      vmLinux->selectWaiting = FALSE;
   }
   spin_unlock_bh(&vmLinux->selectLock);
   VMWare_SetVTracer(0);
   LinuxDriverDequeue(vmLinux);
   if (vmLinux->vm != NULL) {
      Vmx86_ReleaseVM(vmLinux->vm);
      vmLinux->vm = NULL;
   }

   /*
    * Destroy all remaining ranges. We are closing the struct file here, so
    * clearly no other process uses it anymore, and we do not need to hold the
    * lock --hpreg
    */
   for (head = &vmLinux->list4Gb; (first = head->next) != head; ) {
      /* Unlink the first item at the beginning of the list --hpreg */
      head->next = first->next;

      LinuxDriverDestructor4Gb(first);
   }

   kfree(vmLinux);
   filp->private_data = NULL;
   compat_mod_dec_refcount;
#ifdef KERNEL_2_1
   return 0;
#endif
}


/*
 *----------------------------------------------------------------------
 *
 * LinuxDriver_Poll  --
 *
 *      This is only used to wake up the idle select() at the next
 *	clock tick.
 *	We don't care what kind of select this is (read, write, or exception).
 *
 *----------------------------------------------------------------------
 */
#ifdef KERNEL_2_1

unsigned int
LinuxDriver_Poll(struct file *filp,
		 poll_table *wait)
{
   VMLinux *vmLinux = (VMLinux *) filp->private_data;
   int selectWaiting;
   ASSERT(vmLinux);
   if (wait != NULL) {
retry:
      spin_lock_bh(&vmLinux->selectLock);
      if (vmLinux->selectWaiting) {
         if(!del_timer(&vmLinux->selectTimer)) {
            spin_unlock_bh(&vmLinux->selectLock);
            goto retry;
         }
      } else {
         vmLinux->selectWaiting = TRUE;
      }
      spin_unlock_bh(&vmLinux->selectLock);
      poll_wait(filp, &vmLinux->selectQueue, wait);
      init_timer(&vmLinux->selectTimer);
      vmLinux->selectTimer.expires = jiffies + 1;
      selectWaiting = vmLinux->selectWaiting;
      add_timer(&vmLinux->selectTimer);
   } else {
      selectWaiting = vmLinux->selectWaiting;
   }
   return !selectWaiting;
}

#else

int
LinuxDriver_Poll(struct inode *inode,
                 struct file *filp,
                 int sel_type,
                 select_table *wait)
{
#if 0
   if (current->timeout) {
      current->timeout = jiffies + 1;
   }
   return 0;
#else
   VMLinux *vmLinux = (VMLinux *) filp->private_data;
   unsigned long flags;

   save_flags(flags);
   cli();
   if (wait != NULL) {
      if (vmLinux->selectWaiting) {
	 del_timer(&vmLinux->selectTimer);
      } else {
	 vmLinux->selectWaiting = TRUE;
      }
      select_wait(&vmLinux->selectQueue, wait);
      vmLinux->selectTimer.next = vmLinux->selectTimer.prev = NULL;
      vmLinux->selectTimer.expires = jiffies + 1;
      add_timer(&vmLinux->selectTimer);
   }
   restore_flags(flags);

   return !vmLinux->selectWaiting;
#endif
}

#endif
/*
 *----------------------------------------------------------------------
 *
 * LinuxDriverSelectTimeout  --
 *
 *      Wake up a process waiting in poll/select.  This is called from
 *      the timer, and hence processed in the bottom half
 *
 *----------------------------------------------------------------------
 */

void
LinuxDriverSelectTimeout(VMLinux *vmLinux)
{
   spin_lock(&vmLinux->selectLock);
   vmLinux->selectWaiting = FALSE;
   wake_up(&vmLinux->selectQueue);
   spin_unlock(&vmLinux->selectLock);
}


/*
 *----------------------------------------------------------------------
 *
 * LinuxDriverIPIHandler  --
 *
 *      Null IPI handler - for monitor to notice AIO completion
 *
 *----------------------------------------------------------------------
 */
void
LinuxDriverIPIHandler(void *info)
{
   return;
}


/*
 *-----------------------------------------------------------------------------
 *
 * LinuxDriverIoctlAlloc4Gb --
 *
 *    Allocate and lock 'numPages' pages of memory whose physical addresses
 *    are contiguous and < 4 GB --hpreg
 *
 * Results:
 *    0 on success: '*addr' is the physical address of the beginning of the
 *                  range
 *    < 0 on failure: the actual value determines the type of failure
 *
 * Side effects:
 *    None
 *
 *-----------------------------------------------------------------------------
 */

static int
LinuxDriverIoctlAlloc4Gb(MemRange *head,         // IN
                         spinlock_t *lock,       // IN
                         unsigned long numPages, // IN
                         PA *addr)               // OUT
{
   MemRange *range;
   int log2NumPages;
   VA kAddr;
   struct page *page;
   struct page const *pageEnd;
 
   ASSERT(head);
   ASSERT(lock);
   ASSERT(addr);

   if (numPages == 0) {
      return -EINVAL;
   }

   range = kmalloc(sizeof(*range), GFP_KERNEL);
   if (range == NULL) {
      return -ENOMEM;
   }

   log2NumPages = compat_get_order(numPages << PAGE_SHIFT);
   /*
    * We use the ISA DMA zone (i.e. pages whose physical addresses are < 16 MB)
    * by lack of better choice. We should really use GFP_32BIT or analog when
    * it becomes available, but do not do that in this branch, as there is code
    * to fix bug 16306 that relies the ISA DMA zone --hpreg
    */
   kAddr = __get_dma_pages(GFP_KERNEL, log2NumPages);
   if (kAddr == 0) {
      kfree(range);

      return -ENOMEM;
   }

   /*
    * We must reserve the pages otherwise remap_page_range() will refuse to do
    * its job --hpreg
    */
   page = virt_to_page(kAddr);
   pageEnd = page + numPages;
   for (; page < pageEnd; page++) {
      compat_mem_map_reserve(page);
   }

   range->kAddr = kAddr;
   range->pAddr = virt_to_phys((void *)kAddr);
   *addr = range->pAddr;
   range->numPages = numPages;

   /* Insert the range at the beginning of the list --hpreg */
   spin_lock(lock);
   range->next = head->next;
   head->next = range;
   spin_unlock(lock);

   return 0;
}


/*
 *-----------------------------------------------------------------------------
 *
 * LinuxDriverRemove4Gb --
 *
 *    Remove a range (looked up by physical address) from the list. It is up to
 *    the caller to lock the list if needed --hpreg
 *
 * Results:
 *    The range if found
 *    NULL if not found
 *
 * Side effects:
 *    None
 *
 *-----------------------------------------------------------------------------
 */

static MemRange *
LinuxDriverRemove4Gb(MemRange *head, // IN
                     PA addr)        // IN
{
   MemRange *cur;
   MemRange *next;

   ASSERT(head);

   for (cur = head; (next = cur->next) != head; cur = next) {
      if (next->pAddr == addr) {
         /* Unlink the matching item --hpreg */
         cur->next = next->next;

         return next;
      }
   }

   return NULL;
}


/*
 *-----------------------------------------------------------------------------
 *
 * LinuxDriverIoctlFree4Gb --
 *
 *    Free the range whose begins at physical address 'addr' --hpreg
 *
 * Results:
 *    0 on success
 *    < 0 on failure: the actual value determines the type of failure
 *
 * Side effects:
 *    None
 *
 *-----------------------------------------------------------------------------
 */

static int
LinuxDriverIoctlFree4Gb(MemRange *head,   // IN
                        spinlock_t *lock, // IN
                        PA addr)          // IN
{
   MemRange *range;

   ASSERT(head);
   ASSERT(lock);

   spin_lock(lock);
   range = LinuxDriverRemove4Gb(head, addr);
   spin_unlock(lock);
   if (range == NULL) {
      return -EINVAL;
   }

   LinuxDriverDestructor4Gb(range);

   return 0;
}


/*
 *----------------------------------------------------------------------
 *
 * LinuxDriver_Ioctl --
 *
 *      Main path for UserRPC
 *
 * Results:
 *
 *
 *
 *
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */
#include <linux/sched.h>
static int
LinuxDriver_Ioctl(struct inode *inode,
                  struct file *filp,
                  u_int iocmd,
                  unsigned long ioarg)
{
   VMLinux *vmLinux = (VMLinux *) filp->private_data;
   int retval = 0;

   VMWare_SetVTracer(VTrace_Set);

   switch (iocmd) {
   case IOCTLCMD_VERSION:
      retval = VMMON_VERSION;
      break;

   case IOCTLCMD_CREATE_VM: {
      WslimitsInfo wslimitsInfo;
      if (vmLinux->vm != NULL) {
	 retval = -EINVAL;
	 break;
      }

      memset(&wslimitsInfo, 0, sizeof(wslimitsInfo));
      if ((char *) ioarg != NULL) {
          if (HostIF_CopyFromUser(&wslimitsInfo, (char *) ioarg,
			   sizeof(wslimitsInfo)) != 0) {
             Log("CopyFromUser failed for WslimitsInfo struct \n");
          }
      }
      vmLinux->vm = Vmx86_CreateVM((void *)filp, (void *)(current->pid), 
				   &wslimitsInfo);
      if (vmLinux->vm == NULL) {
	 retval = -ENOMEM;
	 break;
      }
      retval = vmLinux->vm->id;
      break;
   }
   case IOCTLCMD_BIND_VM:
      if (vmLinux->vm != NULL) {
	 retval = -EINVAL;
	 break;
      }
      vmLinux->vm = Vmx86_BindVM((int) ioarg);
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      break;

   case IOCTLCMD_RELEASE_VM: {
      VMDriver *vm;
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      vm = vmLinux->vm;
      vmLinux->vm = NULL;
      Vmx86_ReleaseVM(vm);
      break;
   }

   case IOCTLCMD_INIT_VM: {
      InitBlock initParams;
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyFromUser(&initParams, (char*)ioarg,
				   sizeof initParams);
      if (retval != 0) {
	 break;
      }
      if (Vmx86_InitVM(vmLinux->vm, &initParams)) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyToUser((char*)ioarg,&initParams,sizeof(InitBlock));
      break;
   }

   case IOCTLCMD_LATE_INIT_VM:
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      if (Vmx86_LateInitVM(vmLinux->vm)) {
	 retval = -EINVAL;
	 break;
      }
      break;

   case IOCTLCMD_RUN_VM:
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
#ifdef __SMP__
      unlock_kernel();
#endif
      retval = Vmx86_RunVM(vmLinux->vm);
#ifdef __SMP__
      lock_kernel();
#endif
      break;

   case IOCTLCMD_SET_UID:
#ifdef VMX86_DEVEL
      devel_suid();
#else
      retval = -EPERM;
#endif
      break;

   case IOCTLCMD_LOOK_UP_MPN: {
      char *addr = (char *)ioarg;
      MPN mpn;
      mpn = HostIF_LookupUserMPN(vmLinux->vm, addr);
      retval = mpn;
      break;
   }

   case IOCTLCMD_LOCK_PAGE: {
      char *addr = (char *)ioarg;
      MPN mpn;
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      mpn = Vmx86_LockPage(vmLinux->vm, addr, TRUE);
      retval = mpn;
      break;
   }

   case IOCTLCMD_UNLOCK_PAGE: {
      char *addr = (char *)ioarg;
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = Vmx86_UnlockPage(vmLinux->vm, addr, TRUE);
      break;
   }

   case IOCTLCMD_GET_NUM_VMS: {
      retval = Vmx86_GetNumVMs();
      break;
   }

   case IOCTLCMD_GET_TOTAL_MEM_USAGE: {
      retval = Vmx86_GetTotalMemUsage();
      break;
   }

   case IOCTLCMD_SET_HARD_LIMIT: {
      int32 limit;
      retval = HostIF_CopyFromUser(&limit, (void *)ioarg, sizeof(limit));
      if (retval != 0) {
	 break;
      }
      if (!Vmx86_SetLockedPagesLimit(limit)) {
         retval = -EINVAL;
      }
      break;
   }

   case IOCTLCMD_GET_HARD_LIMIT: {
      retval = Vmx86_GetLockedPagesLimit();
      break;
   }

   case IOCTLCMD_SET_MEM_INFO: {
      VMSetMemInfoArgs args;

      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyFromUser(&args, (void *)ioarg, sizeof args);
      if (retval != 0) {
	 break;
      }
      Vmx86_SetMemInfo(vmLinux->vm, &args);
      break;
   }

   case IOCTLCMD_GET_MEM_INFO: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      if (!Vmx86_GetMemInfoCopy(vmLinux->vm, (VMGetMemInfoArgs *) ioarg)) {
         retval = -EINVAL;
      }
      break;
   }

   case IOCTLCMD_GET_STATS: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyToUser((void *)ioarg, &vmLinux->vm->stats,
				 sizeof vmLinux->vm->stats);
      if (retval != 0) {
	 break;
      }
      break;
   }

   case IOCTLCMD_SET_STATS: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyFromUser(&vmLinux->vm->stats, (void *)ioarg,
				   sizeof vmLinux->vm->stats);
      break;
   }

   case IOCTLCMD_PAE_ENABLED:
      retval = Vmx86_PAEEnabled();
      break;

   case IOCTLCMD_IS_MP_SAFE: {
#ifdef __SMP__
      retval = TRUE;
#else
      retval = FALSE;
#endif
      break;
   }

   case IOCTLCMD_APIC_BASE: {
      MA ma;
      Bool setVMPtr = (ioarg & APIC_FLAG_DISABLE_NMI) != 0;
      Bool probe = (ioarg & APIC_FLAG_PROBE) != 0;
#ifdef USE_PERFCOUNTERS
      Bool forceEnable = (ioarg & APIC_FLAG_FORCE_ENABLE) != 0;
#endif

      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,20)
      // Kernel uses NMIs for deadlock detection -
      //  set APIC VMptr so that NMIs get disabled in the monitor
      setVMPtr = TRUE;
#endif
      ma = HostIF_APICBase(vmLinux->vm, setVMPtr, probe);

#ifdef USE_PERFCOUNTERS
      if ((ma == 0) && forceEnable) {
	 ma = HostIF_APICEnable(vmLinux->vm);
      }
#endif
      retval = ma;
      break;
   }

   case IOCTLCMD_IOAPIC_BASE: {
      MA ma;
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      ma = HostIF_IOAPICBase(vmLinux->vm);
      retval = ma;
      break;
   }

   case IOCTLCMD_CHECK_MEMORY:
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CheckMemory(vmLinux->vm);
      break;

#ifdef SUPPORT_PASSTHROUGH
   case IOCTLCMD_REGISTER_PASSTHROUGH_IRQ: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = Passthrough_RegisterIRQ((unsigned char) ioarg, "VMware",
				       vmLinux->vm);
      break;
   }

   case IOCTLCMD_REGISTER_PASSTHROUGH_IO: {
      struct passthrough_iorange ior;

      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyFromUser(&ior, (char*)ioarg, sizeof(ior));
      if (retval != 0) {
	 break;
      }
      retval = Passthrough_RegisterIORegion(ior.ioBase, ior.numPorts, "VMware");
      break;
   }

   case IOCTLCMD_FREE_PASSTHROUGH_IRQ: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      Passthrough_FreeIRQ((unsigned char) ioarg, vmLinux->vm);
      retval = 0;
      break;
   }

   case IOCTLCMD_FREE_PASSTHROUGH_IO: {
      struct passthrough_iorange ior;

      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyFromUser(&ior, (char*)ioarg, sizeof(ior));
      if (retval != 0) {
	 break;
      }
      retval = Passthrough_ReleaseIORegion(ior.ioBase, ior.numPorts);
      break;
   }

   case IOCTLCMD_START_PASSTHROUGH: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = Passthrough_Init(vmLinux->vm);
      break;
   }

   case IOCTLCMD_STOP_PASSTHROUGH: {
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = Passthrough_Release(vmLinux->vm);
      break;
   }

   case IOCTLCMD_QUERY_PASSTHROUGH: {
      unsigned char irq;
      int err;

      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      if (vmLinux->vm->numPendingPassthroughIRQs > 0) {
        irq = vmLinux->vm->pendingPassthroughIRQs[--vmLinux->vm->numPendingPassthroughIRQs];
        retval = vmLinux->vm->numPendingPassthroughIRQs;
      } else {
        irq = 0x0;
        retval = -EINVAL;
      }
      err = HostIF_CopyToUser((unsigned char*)ioarg, &irq, sizeof(unsigned char));
      if (err != 0) {
	 retval = err;
	 break;
      }
      break;
   }
#endif // ifdef SUPPORT_PASSTHROUGH

   case IOCTLCMD_ALLOW_CORE_DUMP:
      if (current->euid == current->uid &&
	  current->fsuid == current->uid &&
          current->egid == current->gid &&
	  current->fsgid == current->gid) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 7)
	 current->mm->dumpable = 1;
#else
	 current->dumpable = 1;
#endif
         retval = 0;
      } else {
         retval = -EPERM;
      }
      break;

   case IOCTLCMD_BROADCAST_IPI:
#ifdef __SMP__
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 8)
      retval = smp_call_function(LinuxDriverIPIHandler, NULL, FALSE, FALSE);
#endif
#endif
      break;

#ifdef USE_PERFCOUNTERS
   case IOCTLCMD_REGISTER_PERFCTR: {
      PerfCtrRegisterArgs args;

      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = HostIF_CopyFromUser(&args, (void *)ioarg, sizeof(args));
      if (retval != 0) {
	 break;
      }
      retval = PerfCtr_Register(vmLinux->vm, &args);
      break;
   }
   case IOCTLCMD_START_PERFCTR:
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = PerfCtr_StartCounters(vmLinux->vm);
      break;

   case IOCTLCMD_STOP_PERFCTR:
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = PerfCtr_StopCounters(vmLinux->vm);
      break;

   case IOCTLCMD_RELEASE_PERFCTR:
      if (vmLinux->vm == NULL) {
	 retval = -EINVAL;
	 break;
      }
      retval = PerfCtr_Release(vmLinux->vm, TRUE);
      break;
#endif // ifdef USE_PERFCOUNTERS

   case IOCTLCMD_ALLOC_LOW_PAGES:
      {
         unsigned long numPages;
         PA addr;

         retval = compat_get_user(numPages, (unsigned long *)ioarg);
         if (retval < 0) {
            break;
         }

         retval = LinuxDriverIoctlAlloc4Gb(&vmLinux->list4Gb,
                                           &vmLinux->lock4Gb, numPages, &addr);
         if (retval < 0) {
            break;
         }

         retval = compat_put_user(addr, (PA *)ioarg);
      }
      break;

   case IOCTLCMD_FREE_LOW_PAGES:
      retval = LinuxDriverIoctlFree4Gb(&vmLinux->list4Gb, &vmLinux->lock4Gb,
                                       ioarg);
      break;

   case IOCTLCMD_GET_MHZ_ESTIMATE:
      retval = Vmx86_GetMHzEstimate(&linuxState.startTime);
      break;

   default:
      Warning("Unknown ioctl %d\n", iocmd);
      retval = -EINVAL;
   }

   VMWare_SetVTracer(0);
   return retval;
}


/*
 * The exit(2) path does, in this order:
 * . set current->files to NULL
 * . close all fds, which potentially calls LinuxDriver_Close()
 *
 * fget() requires current->files != NULL, so we must explicitely check --hpreg
 */
#define vmware_fget(_fd) (current->files ? fget(_fd) : NULL)


/*
 *----------------------------------------------------------------------
 *
 * vLog --
 *
 *     Log() from the kernel module logged to fd
 *
 *
 *----------------------------------------------------------------------
 */

static INLINE void
vLog(int fd) // IN
{
   struct file *file;

   ASSERT(strlen(linuxState.buf));
   ASSERT(strlen(linuxState.buf) < LINUXLOG_BUFFER_SIZE);

   file = vmware_fget(fd);
   if (file) {
#ifdef KERNEL_2_1
      mm_segment_t old_fs = get_fs();

      set_fs(get_ds());
      file->f_op->write(file, (char*)linuxState.buf, strlen(linuxState.buf),
                        &file->f_pos);
#else
      int32 old_fs = get_fs();

      set_fs(get_ds());
      file->f_op->write(file->f_inode, file, (char*)linuxState.buf,
                        strlen(linuxState.buf));
#endif
      set_fs(old_fs);
      compat_fput(file);
   }
}


/*
 *----------------------------------------------------------------------
 *
 * vWarning --
 *
 *      Warning() get here. Adds a \r to compensate for the
 *      uncooked mode of the terminal.
 *
 *----------------------------------------------------------------------
 */

static INLINE void
vWarning(VMDriver *vm) // IN
{
   struct file *file;

   ASSERT(strlen(linuxState.buf));
   ASSERT(strlen(linuxState.buf) < LINUXLOG_BUFFER_SIZE);

   if (vm && (file = vmware_fget(1))) {
#ifdef KERNEL_2_1
      mm_segment_t old_fs = get_fs();

      set_fs(get_ds());
      file->f_op->write(file, (char*)linuxState.buf, strlen(linuxState.buf),
                        &file->f_pos);
#else
      int32 old_fs = get_fs();

      set_fs(get_ds());
      file->f_op->write(file->f_inode, file, (char*)linuxState.buf,
                        strlen(linuxState.buf));
#endif
      set_fs(old_fs);
      compat_fput(file);

      printk("/dev/vmmon: (pid=%d) %s", current->pid, linuxState.buf);

      return;
   }

   /* Use the kernel log */
   printk(KERN_WARNING "/dev/vmmon: %s", linuxState.buf);
}


/*
 *----------------------------------------------------------------------
 *
 * Warning --
 *
 *      Warning messages from kernel module: logged to stdout and the log file
 *
 *----------------------------------------------------------------------
 */
void
Warning(char *fmt,...)
{
   VMDriver *vm;
   va_list args;


   vm = Vmx86_GetVMforProcess((void *)(current->pid));

   va_start(args, fmt);
   vsprintf(linuxState.buf, fmt, args);
   va_end(args);

   if (vm) {
      vLog(vm->logFD);
   }
   vWarning(vm);
}

/*
 *----------------------------------------------------------------------
 *
 * Log --
 *
 *      Log messages from kernel module: logged to log file only
 *
 *----------------------------------------------------------------------
 */
void
Log(char *fmt,...)
{
   VMDriver *vm;
   va_list args;


   vm = Vmx86_GetVMforProcess((void *)(current->pid));

   va_start(args, fmt);
   vsprintf(linuxState.buf, fmt, args);
   va_end(args);

   if (vm) {
      vLog(vm->logFD);
   } else {
      /* Use the kernel log with at least a KERN_DEBUG level so that it doesn't
         garbage the screen at (re)boot time on RedHat 6.0 --hpreg */
      printk(KERN_DEBUG "/dev/vmmon: %s", linuxState.buf);
   }
}


/*
 *----------------------------------------------------------------------
 *
 * Panic --
 *
 *      ASSERTION failures and Panics from kernel module get here.
 *      Message is logged to stdout and the log file
 *
 *
 * Side effects:
 *      Never returns
 *
 *----------------------------------------------------------------------
 */
void
Panic(char *fmt, ...)
{
   asmlinkage int sys_exit(int exit_code);
   VMDriver *vm = Vmx86_GetVMforProcess((void *)(current->pid));
   va_list args;

   va_start(args, fmt);
   vsprintf(linuxState.buf, fmt, args);
   va_end(args);

   /*
    * XXX
    * XXX We cannot exit() the process since we are not running it
    * XXX
    */
#ifdef KERNEL_2_1
   if (in_interrupt()) {
      printk("/dev/vmmon: Panic in interrupt (no intrcount)\n");
      panic("Assertion failure in interrupt handling in VMX86\n");
   }

#else
   if (intr_count) {
      printk("/dev/vmmon: Panic intrcount=%ld\n",intr_count);
      panic("Assertion failure in interrupt handling in VMX86\n");
   }
#endif

   if (vm) {
      vLog(vm->logFD);
   }
   vWarning(vm);
   sprintf(linuxState.buf,"VMX86 driver panic. pid=%d\n\r",current->pid);
   if (vm) {
      vLog(vm->logFD);
   }
   vWarning(vm);

   _exit(1);
   for (;;);   /* To supress warning. */
}



/*
 *----------------------------------------------------------------------
 *
 * LinuxDriverQueue --
 *
 *      add the vmLinux to the global queue
 *
 * Results:
 *
 *      void
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */
static void
LinuxDriverQueue(VMLinux *vmLinux)
{
   /*
    * insert in global vm queue
    */

   HostIF_GlobalVMLock(12);

   vmLinux->next = linuxState.head;
   linuxState.head = vmLinux;


   HostIF_GlobalVMUnLock(12);

}


/*
 *----------------------------------------------------------------------
 *
 * LinuxDriveDequeue --
 *
 *      remove from active list
 *
 * Results:
 *
 *      void
 * Side effects:
 *      printk if it is not in the list (error condition)
 *
 *----------------------------------------------------------------------
 */
static void
LinuxDriverDequeue(VMLinux *vmLinux)
{
   VMLinux **p;

   HostIF_GlobalVMLock(13);
   for (p = &linuxState.head; *p != vmLinux; p = &(*p)->next) {
      ASSERT(*p != NULL);
   }
   *p = vmLinux->next;
   vmLinux->next = NULL;
   HostIF_GlobalVMUnLock(13);
}






/*
 *----------------------------------------------------------------------
 *
 * CheckPadding --
 *
 *      check for expected padding --
 *      this check currently fails on the egcs compiler
 *
 * Results:
 *
 *      TRUE if the check succeeds -- module will be loaded
 *
 *
 *
 * Side effects:
 *      output to kernel log on error
 *
 *----------------------------------------------------------------------
 */
static Bool
LinuxDriverCheckPadding(void)
{
   DTRWords dtr;
   uint16 *x;

   memset(&dtr,0,sizeof(DTR));
   dtr.dtr.limit = 0x1111;
   dtr.dtr.offset = 0x22223333;

   x = (uint16*)&dtr;

   if (x[0] == 0x1111 && x[1] == 0x3333 && x[2] == 0x2222) {
   } else {
      Warning("DTR padding\n");
      goto error;
   }

   if (sizeof(VMCrossPage) != MODULECALL_CROSSPAGE_SIZE ||
       offsetof(VMCrossPage,crosspageLinearAddr)!=MODULECALL_CROSSPAGE_LAOFFSET) {
      Warning("cross page 0x%x expected 0x%x. la 0x%x expected 0x%x\n",
              sizeof(VMCrossPage), MODULECALL_CROSSPAGE_SIZE,
              offsetof(VMCrossPage,crosspageLinearAddr), MODULECALL_CROSSPAGE_LAOFFSET);
      goto error;
   }

   return TRUE;


 error:

   printk("/dev/vmmon: Cannot load module. Use standard gcc compiler\n");
   return FALSE;
}


#ifdef DO_APM
static int LinuxDriverAPMCallback(apm_event_t event)
{
   switch (event) {
   case APM_SYS_SUSPEND:
   case APM_USER_SUSPEND:
      if (LinuxDriverAPMstate == APM_STATE_READY) {
	 // call up to user to suspend VMs
      }
      break;
   case APM_NORMAL_RESUME:
   case APM_CRITICAL_RESUME:
      if (LinuxDriverAPMstate == APM_STATE_SUSPEND) {
	 // call up to user to resume VMs
      }
   }
   return 0;
}
#endif
