VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 4827

Last change on this file since 4827 was 4827, checked in by vboxsync, 18 years ago

fixed for high ksoftireq load.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 57.9 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.215389.xyz. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 * Some lines of code to disable the local APIC on x86_64 machines taken
16 * from a Mandriva patch by Gwenole Beauchesne <gbeauchesne@mandriva.com>.
17 */
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "SUPDRV.h"
23#include "version-generated.h"
24
25#include <iprt/assert.h>
26#include <iprt/spinlock.h>
27#include <iprt/semaphore.h>
28#include <iprt/initterm.h>
29#include <iprt/process.h>
30#include <iprt/err.h>
31#include <iprt/mem.h>
32
33#include <linux/module.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/fs.h>
37#include <linux/mm.h>
38#include <linux/pagemap.h>
39#include <linux/sched.h>
40#include <linux/slab.h>
41#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
42# include <linux/jiffies.h>
43#endif
44#include <asm/mman.h>
45#include <asm/io.h>
46#include <asm/uaccess.h>
47#ifdef CONFIG_DEVFS_FS
48# include <linux/devfs_fs_kernel.h>
49#endif
50#ifdef CONFIG_VBOXDRV_AS_MISC
51# include <linux/miscdevice.h>
52#endif
53#ifdef CONFIG_X86_LOCAL_APIC
54# include <asm/apic.h>
55# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
56# include <asm/nmi.h>
57# endif
58#endif
59
60#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
61# ifndef page_to_pfn
62# define page_to_pfn(page) ((page) - mem_map)
63# endif
64# include <asm/pgtable.h>
65# define global_flush_tlb __flush_tlb_global
66#endif
67
68#include <iprt/mem.h>
69
70
71/* devfs defines */
72#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
73# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
74
75# define VBOX_REGISTER_DEVFS() \
76({ \
77 void *rc = NULL; \
78 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
79 S_IFCHR | S_IRUGO | S_IWUGO, \
80 DEVICE_NAME) == 0) \
81 rc = (void *)' '; /* return not NULL */ \
82 rc; \
83 })
84
85# define VBOX_UNREGISTER_DEVFS(handle) \
86 devfs_remove(DEVICE_NAME);
87
88# else /* < 2.6.0 */
89
90# define VBOX_REGISTER_DEVFS() \
91 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
92 DEVICE_MAJOR, 0, \
93 S_IFCHR | S_IRUGO | S_IWUGO, \
94 &gFileOpsVBoxDrv, NULL)
95
96# define VBOX_UNREGISTER_DEVFS(handle) \
97 if (handle != NULL) \
98 devfs_unregister(handle)
99
100# endif /* < 2.6.0 */
101#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
102
103#ifndef CONFIG_VBOXDRV_AS_MISC
104# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
105# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
106# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
107# else
108# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
109# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
110# endif
111#endif /* !CONFIG_VBOXDRV_AS_MISC */
112
113
114#ifdef CONFIG_X86_HIGH_ENTRY
115# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
116#endif
117
118/*
119 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
120 */
121#if defined(RT_ARCH_AMD64)
122# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
123#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
124# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
125#else
126# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
127#endif
128
129/*
130 * The redhat hack section.
131 * - The current hacks are for 2.4.21-15.EL only.
132 */
133#ifndef NO_REDHAT_HACKS
134/* accounting. */
135# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
136# ifdef VM_ACCOUNT
137# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
138# endif
139# endif
140
141/* backported remap_page_range. */
142# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
143# include <asm/tlb.h>
144# ifdef tlb_vma /* probably not good enough... */
145# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
146# endif
147# endif
148
149# ifndef RT_ARCH_AMD64
150/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
151 * the page attributes from PAGE_KERNEL to something else, because there appears
152 * to be a bug in one of the many patches that redhat applied.
153 * It should be safe to do this on less buggy linux kernels too. ;-)
154 */
155# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
156 do { \
157 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
158 change_page_attr(pPages, cPages, prot); \
159 change_page_attr(pPages, cPages, prot); \
160 } while (0)
161# endif
162#endif /* !NO_REDHAT_HACKS */
163
164
165#ifndef MY_DO_MUNMAP
166# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
167#endif
168
169#ifndef MY_CHANGE_PAGE_ATTR
170# ifdef RT_ARCH_AMD64 /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
171# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
172 do { \
173 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
174 change_page_attr(pPages, cPages, prot); \
175 } while (0)
176# else
177# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
178# endif
179#endif
180
181
182/** @def ONE_MSEC_IN_JIFFIES
183 * The number of jiffies that make up 1 millisecond. This is only actually used
184 * when HZ is > 1000. */
185#if HZ <= 1000
186# define ONE_MSEC_IN_JIFFIES 0
187#elif !(HZ % 1000)
188# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
189#else
190# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
191# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
192#endif
193
194#ifdef CONFIG_X86_LOCAL_APIC
195
196/* If an NMI occurs while we are inside the world switcher the machine will
197 * crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
198 * which is compared with another counter increased in the timer interrupt
199 * handler. We disable the NMI watchdog.
200 *
201 * - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
202 * - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
203 * and disabled on i386.
204 */
205# if defined(RT_ARCH_AMD64)
206# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
207# define DO_DISABLE_NMI 1
208# endif
209# endif
210
211# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
212extern int nmi_active;
213# define nmi_atomic_read(P) *(P)
214# define nmi_atomic_set(P, V) *(P) = (V)
215# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
216# else
217# define nmi_atomic_read(P) atomic_read(P)
218# define nmi_atomic_set(P, V) atomic_set(P, V)
219# define nmi_atomic_dec(P) atomic_dec(P)
220# endif
221
222# ifndef X86_FEATURE_ARCH_PERFMON
223# define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
224# endif
225# ifndef MSR_ARCH_PERFMON_EVENTSEL0
226# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
227# endif
228# ifndef ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
229# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
230# endif
231
232#endif /* CONFIG_X86_LOCAL_APIC */
233
234
235/*******************************************************************************
236* Defined Constants And Macros *
237*******************************************************************************/
238/**
239 * Device extention & session data association structure.
240 */
241static SUPDRVDEVEXT g_DevExt;
242
243/** Timer structure for the GIP update. */
244static struct timer_list g_GipTimer;
245/** Pointer to the page structure for the GIP. */
246struct page *g_pGipPage;
247
248/** Registered devfs device handle. */
249#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
250# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
251static void *g_hDevFsVBoxDrv = NULL;
252# else
253static devfs_handle_t g_hDevFsVBoxDrv = NULL;
254# endif
255#endif
256
257#ifndef CONFIG_VBOXDRV_AS_MISC
258/** Module major number */
259#define DEVICE_MAJOR 234
260/** Saved major device number */
261static int g_iModuleMajor;
262#endif /* !CONFIG_VBOXDRV_AS_MISC */
263
264/** The module name. */
265#define DEVICE_NAME "vboxdrv"
266
267#ifdef RT_ARCH_AMD64
268/**
269 * Memory for the executable memory heap (in IPRT).
270 */
271extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
272__asm__(".section execmemory, \"awx\", @progbits\n\t"
273 ".align 32\n\t"
274 ".globl g_abExecMemory\n"
275 "g_abExecMemory:\n\t"
276 ".zero 1572864\n\t"
277 ".type g_abExecMemory, @object\n\t"
278 ".size g_abExecMemory, 1572864\n\t"
279 ".text\n\t");
280#endif
281
282
283/*******************************************************************************
284* Internal Functions *
285*******************************************************************************/
286static int VBoxDrvLinuxInit(void);
287static void VBoxDrvLinuxUnload(void);
288static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp);
289static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
290static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
291static int VBoxDrvLinuxIOCtlSlow(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
292#ifndef USE_NEW_OS_INTERFACE_FOR_MM
293static RTR3PTR VBoxDrvLinuxMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
294#endif /* !USE_NEW_OS_INTERFACE_FOR_MM */
295static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt);
296static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt);
297static void VBoxDrvLinuxGipTimer(unsigned long ulUser);
298#ifdef CONFIG_SMP
299static void VBoxDrvLinuxGipTimerPerCpu(unsigned long ulUser);
300static void VBoxDrvLinuxGipResumePerCpu(void *pvUser);
301#endif
302static int VBoxDrvLinuxErr2LinuxErr(int);
303
304
305/** The file_operations structure. */
306static struct file_operations gFileOpsVBoxDrv =
307{
308 owner: THIS_MODULE,
309 open: VBoxDrvLinuxCreate,
310 release: VBoxDrvLinuxClose,
311 ioctl: VBoxDrvLinuxIOCtl,
312};
313
314#ifdef CONFIG_VBOXDRV_AS_MISC
315/** The miscdevice structure. */
316static struct miscdevice gMiscDevice =
317{
318 minor: MISC_DYNAMIC_MINOR,
319 name: DEVICE_NAME,
320 fops: &gFileOpsVBoxDrv,
321# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
322 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
323 devfs_name: DEVICE_NAME,
324# endif
325};
326#endif
327
328#ifdef CONFIG_X86_LOCAL_APIC
329# ifdef DO_DISABLE_NMI
330
331/** Stop AMD NMI watchdog (x86_64 only). */
332static int stop_k7_watchdog(void)
333{
334 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
335 return 1;
336}
337
338/** Stop Intel P4 NMI watchdog (x86_64 only). */
339static int stop_p4_watchdog(void)
340{
341 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
342 wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
343 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
344 return 1;
345}
346
347/** The new method of detecting the event counter */
348static int stop_intel_arch_watchdog(void)
349{
350 unsigned ebx;
351
352 ebx = cpuid_ebx(10);
353 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
354 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
355 return 1;
356}
357
358/** Stop NMI watchdog. */
359static void vbox_stop_apic_nmi_watchdog(void *unused)
360{
361 int stopped = 0;
362
363 /* only support LOCAL and IO APICs for now */
364 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
365 (nmi_watchdog != NMI_IO_APIC))
366 return;
367
368 if (nmi_watchdog == NMI_LOCAL_APIC)
369 {
370 switch (boot_cpu_data.x86_vendor)
371 {
372 case X86_VENDOR_AMD:
373 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
374 return;
375 stopped = stop_k7_watchdog();
376 break;
377 case X86_VENDOR_INTEL:
378 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
379 {
380 stopped = stop_intel_arch_watchdog();
381 break;
382 }
383 stopped = stop_p4_watchdog();
384 break;
385 default:
386 return;
387 }
388 }
389
390 if (stopped)
391 nmi_atomic_dec(&nmi_active);
392}
393
394/** Disable LAPIC NMI watchdog. */
395static void disable_lapic_nmi_watchdog(void)
396{
397 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
398
399 if (nmi_atomic_read(&nmi_active) <= 0)
400 return;
401
402 on_each_cpu(vbox_stop_apic_nmi_watchdog, NULL, 1, 1);
403
404 BUG_ON(nmi_atomic_read(&nmi_active) != 0);
405
406 /* tell do_nmi() and others that we're not active any more */
407 nmi_watchdog = NMI_NONE;
408}
409
410/** Shutdown NMI. */
411static void nmi_cpu_shutdown(void * dummy)
412{
413 unsigned int vERR, vPC;
414
415 vPC = apic_read(APIC_LVTPC);
416
417 if ((GET_APIC_DELIVERY_MODE(vPC) == APIC_MODE_NMI) && !(vPC & APIC_LVT_MASKED))
418 {
419 vERR = apic_read(APIC_LVTERR);
420 apic_write(APIC_LVTERR, vERR | APIC_LVT_MASKED);
421 apic_write(APIC_LVTPC, vPC | APIC_LVT_MASKED);
422 apic_write(APIC_LVTERR, vERR);
423 }
424}
425
426static void nmi_shutdown(void)
427{
428 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
429}
430# endif /* DO_DISABLE_NMI */
431#endif /* CONFIG_X86_LOCAL_APIC */
432
433/**
434 * Initialize module.
435 *
436 * @returns appropriate status code.
437 */
438static int __init VBoxDrvLinuxInit(void)
439{
440 int rc;
441
442 dprintf(("VBoxDrv::ModuleInit\n"));
443
444#ifdef CONFIG_X86_LOCAL_APIC
445 /*
446 * If an NMI occurs while we are inside the world switcher the macine will crash.
447 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
448 * compared with another counter increased in the timer interrupt handler. Therefore
449 * we don't allow to setup an NMI watchdog.
450 */
451# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
452 /*
453 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
454 * the nmi_watchdog variable.
455 */
456# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
457 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
458# ifdef DO_DISABLE_NMI
459 if (nmi_atomic_read(&nmi_active) > 0)
460 {
461 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate the NMI watchdog...\n");
462
463 switch (nmi_watchdog)
464 {
465 case NMI_LOCAL_APIC:
466 disable_lapic_nmi_watchdog();
467 break;
468 case NMI_NONE:
469 nmi_atomic_dec(&nmi_active);
470 break;
471 }
472
473 if (nmi_atomic_read(&nmi_active) == 0)
474 {
475 nmi_shutdown();
476 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
477 }
478 else
479 printk(KERN_INFO DEVICE_NAME ": Failed!\n");
480 }
481# endif /* DO_DISABLE_NMI */
482
483 /*
484 * Permanent IO_APIC mode active? No way to handle this!
485 */
486 if (nmi_watchdog == NMI_IO_APIC)
487 {
488 printk(KERN_ERR DEVICE_NAME
489 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
490 DEVICE_NAME
491 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
492 DEVICE_NAME
493 ": command line.\n");
494 return -EINVAL;
495 }
496
497 /*
498 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
499 */
500 nmi_atomic_set(&nmi_active, -1);
501 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate the NMI watchdog permanently...\n");
502
503 /*
504 * Now fall through and see if it actually was enabled before. If so, fail
505 * as we cannot deactivate it cleanly from here.
506 */
507# else /* < 2.6.19 */
508 /*
509 * Older 2.6 kernels: nmi_watchdog is not initalized by default
510 */
511 if (nmi_watchdog != NMI_NONE)
512 goto nmi_activated;
513# endif
514# endif /* >= 2.6.0 */
515
516 /*
517 * Second test: Interrupt generated by performance counter not masked and can
518 * generate an NMI. Works also with Linux 2.4.
519 */
520 {
521 unsigned int v, ver, maxlvt;
522
523 v = apic_read(APIC_LVR);
524 ver = GET_APIC_VERSION(v);
525 /* 82489DXs do not report # of LVT entries. */
526 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
527 if (maxlvt >= 4)
528 {
529 /* Read status of performance counter IRQ vector */
530 v = apic_read(APIC_LVTPC);
531
532 /* performance counter generates NMI and is not masked? */
533 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
534 {
535# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
536 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
537 printk(KERN_ERR DEVICE_NAME
538 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
539 DEVICE_NAME
540 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
541 return -EINVAL;
542# else /* < 2.6.19 */
543# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
544nmi_activated:
545# endif
546 printk(KERN_ERR DEVICE_NAME
547 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
548 DEVICE_NAME
549 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
550 return -EINVAL;
551# endif /* >= 2.6.19 */
552 }
553 }
554 }
555# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
556 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
557# endif /* >= 2.6.19 */
558#endif /* CONFIG_X86_LOCAL_APIC */
559
560#ifdef CONFIG_VBOXDRV_AS_MISC
561 rc = misc_register(&gMiscDevice);
562 if (rc)
563 {
564 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
565 return rc;
566 }
567#else /* !CONFIG_VBOXDRV_AS_MISC */
568 /*
569 * Register character device.
570 */
571 g_iModuleMajor = DEVICE_MAJOR;
572 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
573 if (rc < 0)
574 {
575 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
576 return rc;
577 }
578
579 /*
580 * Save returned module major number
581 */
582 if (DEVICE_MAJOR != 0)
583 g_iModuleMajor = DEVICE_MAJOR;
584 else
585 g_iModuleMajor = rc;
586 rc = 0;
587
588#ifdef CONFIG_DEVFS_FS
589 /*
590 * Register a device entry
591 */
592 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
593 if (g_hDevFsVBoxDrv == NULL)
594 {
595 dprintf(("devfs_register failed!\n"));
596 rc = -EINVAL;
597 }
598#endif
599#endif /* !CONFIG_VBOXDRV_AS_MISC */
600 if (!rc)
601 {
602 /*
603 * Initialize the runtime.
604 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
605 */
606 rc = RTR0Init(0);
607 if (RT_SUCCESS(rc))
608 {
609#ifdef RT_ARCH_AMD64
610 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
611#endif
612 /*
613 * Initialize the device extension.
614 */
615 if (RT_SUCCESS(rc))
616 rc = supdrvInitDevExt(&g_DevExt);
617 if (!rc)
618 {
619 /*
620 * Create the GIP page.
621 */
622 rc = VBoxDrvLinuxInitGip(&g_DevExt);
623 if (!rc)
624 {
625 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
626 return rc;
627 }
628
629 supdrvDeleteDevExt(&g_DevExt);
630 }
631 else
632 rc = -EINVAL;
633 RTR0Term();
634 }
635 else
636 rc = -EINVAL;
637
638 /*
639 * Failed, cleanup and return the error code.
640 */
641#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
642 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
643#endif
644 }
645#ifdef CONFIG_VBOXDRV_AS_MISC
646 misc_deregister(&gMiscDevice);
647 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
648#else
649 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
650 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
651#endif
652 return rc;
653}
654
655
656/**
657 * Unload the module.
658 */
659static void __exit VBoxDrvLinuxUnload(void)
660{
661 int rc;
662 dprintf(("VBoxDrvLinuxUnload\n"));
663
664 /*
665 * I Don't think it's possible to unload a driver which processes have
666 * opened, at least we'll blindly assume that here.
667 */
668#ifdef CONFIG_VBOXDRV_AS_MISC
669 rc = misc_deregister(&gMiscDevice);
670 if (rc < 0)
671 {
672 dprintf(("misc_deregister failed with rc=%#x\n", rc));
673 }
674#else /* !CONFIG_VBOXDRV_AS_MISC */
675#ifdef CONFIG_DEVFS_FS
676 /*
677 * Unregister a device entry
678 */
679 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
680#endif // devfs
681 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
682 if (rc < 0)
683 {
684 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
685 }
686#endif /* !CONFIG_VBOXDRV_AS_MISC */
687
688 /*
689 * Destroy GIP, delete the device extension and terminate IPRT.
690 */
691 VBoxDrvLinuxTermGip(&g_DevExt);
692 supdrvDeleteDevExt(&g_DevExt);
693 RTR0Term();
694}
695
696
697/**
698 * Device open. Called on open /dev/vboxdrv
699 *
700 * @param pInode Pointer to inode info structure.
701 * @param pFilp Associated file pointer.
702 */
703static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp)
704{
705 int rc;
706 PSUPDRVSESSION pSession;
707 dprintf(("VBoxDrvLinuxCreate: pFilp=%p\n", pFilp));
708
709 /*
710 * Call common code for the rest.
711 */
712 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
713 if (!rc)
714 {
715 pSession->Uid = current->euid;
716 pSession->Gid = current->egid;
717 pSession->Process = RTProcSelf();
718 pSession->R0Process = RTR0ProcHandleSelf();
719 }
720
721 dprintf(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
722 pFilp->private_data = pSession;
723
724 return VBoxDrvLinuxErr2LinuxErr(rc);
725}
726
727
728/**
729 * Close device.
730 *
731 * @param pInode Pointer to inode info structure.
732 * @param pFilp Associated file pointer.
733 */
734static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
735{
736 dprintf(("VBoxDrvLinuxClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
737 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
738 pFilp->private_data = NULL;
739 return 0;
740}
741
742
743/**
744 * Device I/O Control entry point.
745 *
746 * @param pInode Pointer to inode info structure.
747 * @param pFilp Associated file pointer.
748 * @param uCmd The function specified to ioctl().
749 * @param ulArg The argument specified to ioctl().
750 */
751static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
752{
753 /*
754 * Deal with the two high-speed IOCtl that takes it's arguments from
755 * the session and iCmd, and only returns a VBox status code.
756 */
757 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
758 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
759 || uCmd == SUP_IOCTL_FAST_DO_NOP))
760 return supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
761 return VBoxDrvLinuxIOCtlSlow(pInode, pFilp, uCmd, ulArg);
762}
763
764
765/**
766 * Device I/O Control entry point.
767 *
768 * @param pInode Pointer to inode info structure.
769 * @param pFilp Associated file pointer.
770 * @param uCmd The function specified to ioctl().
771 * @param ulArg The argument specified to ioctl().
772 */
773static int VBoxDrvLinuxIOCtlSlow(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
774{
775 int rc;
776 SUPREQHDR Hdr;
777 PSUPREQHDR pHdr;
778 uint32_t cbBuf;
779
780 dprintf2(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p\n", pFilp, uCmd, (void *)ulArg));
781
782 /*
783 * Read the header.
784 */
785 if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr))))
786 {
787 dprintf(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x.\n", ulArg, uCmd));
788 return -EFAULT;
789 }
790 if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
791 {
792 dprintf(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
793 return -EINVAL;
794 }
795
796 /*
797 * Buffer the request.
798 */
799 cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
800 if (RT_UNLIKELY(cbBuf > _1M*16))
801 {
802 dprintf(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
803 return -E2BIG;
804 }
805 if (RT_UNLIKELY(cbBuf != _IOC_SIZE(uCmd) && _IOC_SIZE(uCmd)))
806 {
807 dprintf(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
808 return -EINVAL;
809 }
810 pHdr = RTMemAlloc(cbBuf);
811 if (RT_UNLIKELY(!pHdr))
812 {
813 OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
814 return -ENOMEM;
815 }
816 if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn)))
817 {
818 dprintf(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x.\n", ulArg, Hdr.cbIn, uCmd));
819 RTMemFree(pHdr);
820 return -EFAULT;
821 }
822
823 /*
824 * Process the IOCtl.
825 */
826 rc = supdrvIOCtl(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data, pHdr);
827
828 /*
829 * Copy ioctl data and output buffer back to user space.
830 */
831 if (RT_LIKELY(!rc))
832 {
833 uint32_t cbOut = pHdr->cbOut;
834 if (RT_UNLIKELY(cbOut > cbBuf))
835 {
836 OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
837 cbOut = cbBuf;
838 }
839 if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut)))
840 {
841 /* this is really bad! */
842 OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
843 rc = -EFAULT;
844 }
845 }
846 else
847 {
848 dprintf(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
849 rc = -EINVAL;
850 }
851 RTMemFree(pHdr);
852
853 dprintf2(("VBoxDrvLinuxIOCtl: returns %d\n", rc));
854 return rc;
855}
856
857
858/**
859 * Initializes any OS specific object creator fields.
860 */
861void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
862{
863 NOREF(pObj);
864 NOREF(pSession);
865}
866
867
868/**
869 * Checks if the session can access the object.
870 *
871 * @returns true if a decision has been made.
872 * @returns false if the default access policy should be applied.
873 *
874 * @param pObj The object in question.
875 * @param pSession The session wanting to access the object.
876 * @param pszObjName The object name, can be NULL.
877 * @param prc Where to store the result when returning true.
878 */
879bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
880{
881 NOREF(pObj);
882 NOREF(pSession);
883 NOREF(pszObjName);
884 NOREF(prc);
885 return false;
886}
887
888
889#ifndef USE_NEW_OS_INTERFACE_FOR_MM
890
891/**
892 * Compute order. Some functions allocate 2^order pages.
893 *
894 * @returns order.
895 * @param cPages Number of pages.
896 */
897static int VBoxDrvOrder(unsigned long cPages)
898{
899 int iOrder;
900 unsigned long cTmp;
901
902 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
903 ;
904 if (cPages & ~(1 << iOrder))
905 ++iOrder;
906
907 return iOrder;
908}
909
910
911/**
912 * OS Specific code for locking down memory.
913 *
914 * @returns 0 on success.
915 * @returns SUPDRV_ERR_* on failure.
916 * @param pMem Pointer to memory.
917 * This is not linked in anywhere.
918 * @param paPages Array which should be filled with the address of the physical pages.
919 *
920 * @remark See sgl_map_user_pages() for an example of an similar function.
921 */
922int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
923{
924 int rc;
925 struct page **papPages;
926 unsigned iPage;
927 unsigned cPages = pMem->cb >> PAGE_SHIFT;
928 unsigned long pv = (unsigned long)pMem->pvR3;
929 struct vm_area_struct **papVMAs;
930
931 /*
932 * Allocate page pointer array.
933 */
934 papPages = vmalloc(cPages * sizeof(*papPages));
935 if (!papPages)
936 return SUPDRV_ERR_NO_MEMORY;
937
938 /*
939 * Allocate the VMA pointer array.
940 */
941 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
942 if (!papVMAs)
943 return SUPDRV_ERR_NO_MEMORY;
944
945 /*
946 * Get user pages.
947 */
948 down_read(&current->mm->mmap_sem);
949 rc = get_user_pages(current, /* Task for fault acounting. */
950 current->mm, /* Whose pages. */
951 (unsigned long)pv, /* Where from. */
952 cPages, /* How many pages. */
953 1, /* Write to memory. */
954 0, /* force. */
955 papPages, /* Page array. */
956 papVMAs); /* vmas */
957 if (rc != cPages)
958 {
959 up_read(&current->mm->mmap_sem);
960 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
961 return SUPDRV_ERR_LOCK_FAILED;
962 }
963
964 for (iPage = 0; iPage < cPages; iPage++)
965 flush_dcache_page(papPages[iPage]);
966 up_read(&current->mm->mmap_sem);
967
968 pMem->u.locked.papPages = papPages;
969 pMem->u.locked.cPages = cPages;
970
971 /*
972 * Get addresses, protect against fork()
973 */
974 for (iPage = 0; iPage < cPages; iPage++)
975 {
976 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
977 paPages[iPage].uReserved = 0;
978 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
979 }
980
981 vfree(papVMAs);
982
983 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
984 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
985 return 0;
986}
987
988
989/**
990 * Unlocks the memory pointed to by pv.
991 *
992 * @param pMem Pointer to memory to unlock.
993 *
994 * @remark See sgl_unmap_user_pages() for an example of an similar function.
995 */
996void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
997{
998 unsigned iPage;
999 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
1000 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
1001
1002 /*
1003 * Loop thru the pages and release them.
1004 */
1005 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
1006 {
1007 if (!PageReserved(pMem->u.locked.papPages[iPage]))
1008 SetPageDirty(pMem->u.locked.papPages[iPage]);
1009 page_cache_release(pMem->u.locked.papPages[iPage]);
1010 }
1011
1012 /* free the page array */
1013 vfree(pMem->u.locked.papPages);
1014 pMem->u.locked.cPages = 0;
1015}
1016
1017
1018/**
1019 * OS Specific code for allocating page aligned memory with continuous fixed
1020 * physical paged backing.
1021 *
1022 * @returns 0 on success.
1023 * @returns SUPDRV_ERR_* on failure.
1024 * @param pMem Memory reference record of the memory to be allocated.
1025 * (This is not linked in anywhere.)
1026 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
1027 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
1028 * @param pHCPhys Where to store the physical address.
1029 */
1030int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1031{
1032 struct page *paPages;
1033 unsigned iPage;
1034 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1035 unsigned cPages = cbAligned >> PAGE_SHIFT;
1036 unsigned cOrder = VBoxDrvOrder(cPages);
1037 unsigned long ulAddr;
1038 dma_addr_t HCPhys;
1039 int rc = 0;
1040 pgprot_t pgFlags;
1041 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1042
1043 Assert(ppvR3);
1044 Assert(pHCPhys);
1045
1046 /*
1047 * Allocate page pointer array.
1048 */
1049#ifdef RT_ARCH_AMD64 /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
1050 paPages = alloc_pages(GFP_DMA, cOrder);
1051#else
1052 paPages = alloc_pages(GFP_USER, cOrder);
1053#endif
1054 if (!paPages)
1055 return SUPDRV_ERR_NO_MEMORY;
1056
1057 /*
1058 * Lock the pages.
1059 */
1060 for (iPage = 0; iPage < cPages; iPage++)
1061 {
1062 SetPageReserved(&paPages[iPage]);
1063 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1064 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1065#ifdef DEBUG
1066 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
1067 {
1068 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
1069 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
1070 BUG();
1071 }
1072#endif
1073 }
1074 HCPhys = page_to_phys(paPages);
1075
1076 /*
1077 * Allocate user space mapping and put the physical pages into it.
1078 */
1079 down_write(&current->mm->mmap_sem);
1080 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
1081 if (!(ulAddr & ~PAGE_MASK))
1082 {
1083#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1084 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
1085#else
1086 int rc2 = 0;
1087 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1088 if (vma)
1089#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1090 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
1091#else
1092 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
1093#endif
1094 else
1095 {
1096 rc = SUPDRV_ERR_NO_MEMORY;
1097 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
1098 }
1099#endif
1100 if (rc2)
1101 {
1102 rc = SUPDRV_ERR_NO_MEMORY;
1103 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
1104 }
1105 }
1106 else
1107 {
1108 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1109 rc = SUPDRV_ERR_NO_MEMORY;
1110 }
1111 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1112
1113 /*
1114 * Success?
1115 */
1116 if (!rc)
1117 {
1118 *pHCPhys = HCPhys;
1119 *ppvR3 = ulAddr;
1120 if (ppvR0)
1121 *ppvR0 = (void *)ulAddr;
1122 pMem->pvR3 = ulAddr;
1123 pMem->pvR0 = NULL;
1124 pMem->u.cont.paPages = paPages;
1125 pMem->u.cont.cPages = cPages;
1126 pMem->cb = cbAligned;
1127
1128 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
1129 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
1130 global_flush_tlb();
1131 return 0;
1132 }
1133
1134 /*
1135 * Failure, cleanup and be gone.
1136 */
1137 down_write(&current->mm->mmap_sem);
1138 if (ulAddr & ~PAGE_MASK)
1139 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
1140 for (iPage = 0; iPage < cPages; iPage++)
1141 {
1142 ClearPageReserved(&paPages[iPage]);
1143 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1144 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
1145 }
1146 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1147 __free_pages(paPages, cOrder);
1148
1149 global_flush_tlb();
1150 return rc;
1151}
1152
1153
1154/**
1155 * Frees contiguous memory.
1156 *
1157 * @param pMem Memory reference record of the memory to be freed.
1158 */
1159void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
1160{
1161 unsigned iPage;
1162
1163 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
1164 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
1165
1166 /*
1167 * do_exit() destroys the mm before closing files.
1168 * I really hope it cleans up our stuff properly...
1169 */
1170 if (current->mm)
1171 {
1172 down_write(&current->mm->mmap_sem);
1173 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
1174 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1175 }
1176
1177 /*
1178 * Change page attributes freeing the pages.
1179 */
1180 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1181 {
1182 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
1183 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1184 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
1185 }
1186 __free_pages(pMem->u.cont.paPages, VBoxDrvOrder(pMem->u.cont.cPages));
1187
1188 pMem->u.cont.cPages = 0;
1189}
1190
1191
1192/**
1193 * Allocates memory which mapped into both kernel and user space.
1194 * The returned memory is page aligned and so is the allocation.
1195 *
1196 * @returns 0 on success.
1197 * @returns SUPDRV_ERR_* on failure.
1198 * @param pMem Memory reference record of the memory to be allocated.
1199 * (This is not linked in anywhere.)
1200 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1201 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1202 */
1203int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1204{
1205 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1206 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1207#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1208 unsigned cOrder = VBoxDrvOrder(cPages);
1209 struct page *paPages;
1210#endif
1211 struct page **papPages;
1212 unsigned iPage;
1213 pgprot_t pgFlags;
1214 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1215
1216 /*
1217 * Allocate array with page pointers.
1218 */
1219 pMem->u.mem.cPages = 0;
1220 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1221 if (!papPages)
1222 return SUPDRV_ERR_NO_MEMORY;
1223
1224 /*
1225 * Allocate the pages.
1226 */
1227#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1228 for (iPage = 0; iPage < cPages; iPage++)
1229 {
1230 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1231 if (!papPages[iPage])
1232 {
1233 pMem->u.mem.cPages = iPage;
1234 supdrvOSMemFreeOne(pMem);
1235 return SUPDRV_ERR_NO_MEMORY;
1236 }
1237 }
1238
1239#else /* < 2.4.22 */
1240 paPages = alloc_pages(GFP_USER, cOrder);
1241 if (!paPages)
1242 {
1243 supdrvOSMemFreeOne(pMem);
1244 return SUPDRV_ERR_NO_MEMORY;
1245 }
1246 for (iPage = 0; iPage < cPages; iPage++)
1247 {
1248 papPages[iPage] = &paPages[iPage];
1249 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1250 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1251 if (PageHighMem(papPages[iPage]))
1252 BUG();
1253 }
1254#endif
1255 pMem->u.mem.cPages = cPages;
1256
1257 /*
1258 * Reserve the pages.
1259 */
1260 for (iPage = 0; iPage < cPages; iPage++)
1261 SetPageReserved(papPages[iPage]);
1262
1263 /*
1264 * Create the Ring-0 mapping.
1265 */
1266 if (ppvR0)
1267 {
1268#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1269# ifdef VM_MAP
1270 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1271# else
1272 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1273# endif
1274#else
1275 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1276#endif
1277 }
1278 if (pMem->pvR0 || !ppvR0)
1279 {
1280 /*
1281 * Create the ring3 mapping.
1282 */
1283 if (ppvR3)
1284 *ppvR3 = pMem->pvR3 = VBoxDrvLinuxMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1285 if (pMem->pvR3 || !ppvR3)
1286 return 0;
1287 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1288 }
1289 else
1290 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1291
1292 supdrvOSMemFreeOne(pMem);
1293 return SUPDRV_ERR_NO_MEMORY;
1294}
1295
1296
1297/**
1298 * Get the physical addresses of the pages in the allocation.
1299 * This is called while inside bundle the spinlock.
1300 *
1301 * @param pMem Memory reference record of the memory.
1302 * @param paPages Where to store the page addresses.
1303 */
1304void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1305{
1306 unsigned iPage;
1307 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1308 {
1309 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1310 paPages[iPage].uReserved = 0;
1311 }
1312}
1313
1314
1315/**
1316 * Frees memory allocated by supdrvOSMemAllocOne().
1317 *
1318 * @param pMem Memory reference record of the memory to be free.
1319 */
1320void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1321{
1322 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1323 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1324
1325 /*
1326 * Unmap the user mapping (if any).
1327 * do_exit() destroys the mm before closing files.
1328 */
1329 if (pMem->pvR3 && current->mm)
1330 {
1331 down_write(&current->mm->mmap_sem);
1332 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1333 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1334 }
1335 pMem->pvR3 = NIL_RTR3PTR;
1336
1337 /*
1338 * Unmap the kernel mapping (if any).
1339 */
1340 if (pMem->pvR0)
1341 {
1342#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1343 vunmap(pMem->pvR0);
1344#endif
1345 pMem->pvR0 = NULL;
1346 }
1347
1348 /*
1349 * Free the physical pages.
1350 */
1351 if (pMem->u.mem.papPages)
1352 {
1353 struct page **papPages = pMem->u.mem.papPages;
1354 const unsigned cPages = pMem->u.mem.cPages;
1355 unsigned iPage;
1356
1357 /* Restore the page flags. */
1358 for (iPage = 0; iPage < cPages; iPage++)
1359 {
1360 ClearPageReserved(papPages[iPage]);
1361#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1362 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1363 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1364#endif
1365 }
1366
1367 /* Free the pages. */
1368#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1369 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1370 __free_page(papPages[iPage]);
1371#else
1372 if (cPages > 0)
1373 __free_pages(papPages[0], VBoxDrvOrder(cPages));
1374#endif
1375 /* Free the page pointer array. */
1376 kfree(papPages);
1377 pMem->u.mem.papPages = NULL;
1378 }
1379 pMem->u.mem.cPages = 0;
1380}
1381
1382
1383/**
1384 * Maps a range of pages into user space.
1385 *
1386 * @returns Pointer to the user space mapping on success.
1387 * @returns NULL on failure.
1388 * @param papPages Array of the pages to map.
1389 * @param cPages Number of pages to map.
1390 * @param fProt The mapping protection.
1391 * @param pgFlags The page level protection.
1392 */
1393static RTR3PTR VBoxDrvLinuxMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1394{
1395 int rc = SUPDRV_ERR_NO_MEMORY;
1396 unsigned long ulAddr;
1397
1398 /*
1399 * Allocate user space mapping.
1400 */
1401 down_write(&current->mm->mmap_sem);
1402 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1403 if (!(ulAddr & ~PAGE_MASK))
1404 {
1405 /*
1406 * Map page by page into the mmap area.
1407 * This is generic, paranoid and not very efficient.
1408 */
1409 int rc = 0;
1410 unsigned long ulAddrCur = ulAddr;
1411 unsigned iPage;
1412 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1413 {
1414#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1415 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1416 if (!vma)
1417 break;
1418#endif
1419
1420#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1421 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1422#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1423 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1424#else /* 2.4 */
1425 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1426#endif
1427 if (rc)
1428 break;
1429 }
1430
1431 /*
1432 * Successful?
1433 */
1434 if (iPage >= cPages)
1435 {
1436 up_write(&current->mm->mmap_sem);
1437 return ulAddr;
1438 }
1439
1440 /* no, cleanup! */
1441 if (rc)
1442 dprintf(("VBoxDrvLinuxMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1443 else
1444 dprintf(("VBoxDrvLinuxMapUser: find_vma failed!\n"));
1445
1446 MY_DO_MUNMAP(current->mm, ulAddr, cPages << PAGE_SHIFT);
1447 }
1448 else
1449 {
1450 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1451 rc = SUPDRV_ERR_NO_MEMORY;
1452 }
1453 up_write(&current->mm->mmap_sem);
1454
1455 return NIL_RTR3PTR;
1456}
1457
1458#endif /* !USE_NEW_OS_INTERFACE_FOR_MM */
1459
1460
1461/**
1462 * Initializes the GIP.
1463 *
1464 * @returns negative errno.
1465 * @param pDevExt Instance data. GIP stuff may be updated.
1466 */
1467static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt)
1468{
1469 struct page *pPage;
1470 dma_addr_t HCPhys;
1471 PSUPGLOBALINFOPAGE pGip;
1472#ifdef CONFIG_SMP
1473 unsigned i;
1474#endif
1475 dprintf(("VBoxDrvLinuxInitGip:\n"));
1476
1477 /*
1478 * Allocate the page.
1479 */
1480 pPage = alloc_pages(GFP_USER, 0);
1481 if (!pPage)
1482 {
1483 dprintf(("VBoxDrvLinuxInitGip: failed to allocate the GIP page\n"));
1484 return -ENOMEM;
1485 }
1486
1487 /*
1488 * Lock the page.
1489 */
1490 SetPageReserved(pPage);
1491 g_pGipPage = pPage;
1492
1493 /*
1494 * Call common initialization routine.
1495 */
1496 HCPhys = page_to_phys(pPage);
1497 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1498 pDevExt->ulLastJiffies = jiffies;
1499#ifdef TICK_NSEC
1500 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1501 dprintf(("VBoxDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1502 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1503#else
1504 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1505 dprintf(("VBoxDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1506 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1507#endif
1508 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1509 HZ <= 1000 ? HZ : 1000);
1510
1511 /*
1512 * Initialize the timer.
1513 */
1514 init_timer(&g_GipTimer);
1515 g_GipTimer.data = (unsigned long)pDevExt;
1516 g_GipTimer.function = VBoxDrvLinuxGipTimer;
1517 g_GipTimer.expires = jiffies;
1518#ifdef CONFIG_SMP
1519 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1520 {
1521 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1522 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1523 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1524 init_timer(&pDevExt->aCPUs[i].Timer);
1525 pDevExt->aCPUs[i].Timer.data = i;
1526 pDevExt->aCPUs[i].Timer.function = VBoxDrvLinuxGipTimerPerCpu;
1527 pDevExt->aCPUs[i].Timer.expires = jiffies;
1528 }
1529#endif
1530
1531 return 0;
1532}
1533
1534
1535/**
1536 * Terminates the GIP.
1537 *
1538 * @returns negative errno.
1539 * @param pDevExt Instance data. GIP stuff may be updated.
1540 */
1541static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt)
1542{
1543 struct page *pPage;
1544 PSUPGLOBALINFOPAGE pGip;
1545#ifdef CONFIG_SMP
1546 unsigned i;
1547#endif
1548 dprintf(("VBoxDrvLinuxTermGip:\n"));
1549
1550 /*
1551 * Delete the timer if it's pending.
1552 */
1553 if (timer_pending(&g_GipTimer))
1554 del_timer_sync(&g_GipTimer);
1555#ifdef CONFIG_SMP
1556 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1557 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1558 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1559#endif
1560
1561 /*
1562 * Uninitialize the content.
1563 */
1564 pGip = pDevExt->pGip;
1565 pDevExt->pGip = NULL;
1566 if (pGip)
1567 supdrvGipTerm(pGip);
1568
1569 /*
1570 * Free the page.
1571 */
1572 pPage = g_pGipPage;
1573 g_pGipPage = NULL;
1574 if (pPage)
1575 {
1576 ClearPageReserved(pPage);
1577 __free_pages(pPage, 0);
1578 }
1579
1580 return 0;
1581}
1582
1583/**
1584 * Timer callback function.
1585 *
1586 * In ASYNC TSC mode this is called on the primary CPU, and we're
1587 * assuming that the CPU remains online.
1588 *
1589 * @param ulUser The device extension pointer.
1590 */
1591static void VBoxDrvLinuxGipTimer(unsigned long ulUser)
1592{
1593 PSUPDRVDEVEXT pDevExt;
1594 PSUPGLOBALINFOPAGE pGip;
1595 unsigned long ulNow;
1596 unsigned long ulDiff;
1597 uint64_t u64Monotime;
1598 unsigned long SavedFlags;
1599
1600 local_irq_save(SavedFlags);
1601
1602 ulNow = jiffies;
1603 pDevExt = (PSUPDRVDEVEXT)ulUser;
1604 pGip = pDevExt->pGip;
1605
1606#ifdef CONFIG_SMP
1607 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1608 {
1609 uint8_t iCPU = ASMGetApicId();
1610 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1611 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1612#ifdef TICK_NSEC
1613 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1614#else
1615 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1616#endif
1617 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1618 }
1619 else
1620#endif /* CONFIG_SMP */
1621 {
1622 ulDiff = ulNow - pDevExt->ulLastJiffies;
1623 pDevExt->ulLastJiffies = ulNow;
1624#ifdef TICK_NSEC
1625 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1626#else
1627 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1628#endif
1629 pDevExt->u64LastMonotime = u64Monotime;
1630 }
1631 if (RT_LIKELY(pGip))
1632 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1633 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1634 mod_timer(&g_GipTimer, ulNow + (HZ <= 1000 ? 1 : ONE_MSEC_IN_JIFFIES));
1635
1636 local_irq_restore(SavedFlags);
1637}
1638
1639
1640#ifdef CONFIG_SMP
1641/**
1642 * Timer callback function for the other CPUs.
1643 *
1644 * @param iTimerCPU The APIC ID of this timer.
1645 */
1646static void VBoxDrvLinuxGipTimerPerCpu(unsigned long iTimerCPU)
1647{
1648 PSUPDRVDEVEXT pDevExt;
1649 PSUPGLOBALINFOPAGE pGip;
1650 uint8_t iCPU;
1651 uint64_t u64Monotime;
1652 unsigned long SavedFlags;
1653 unsigned long ulNow;
1654
1655 local_irq_save(SavedFlags);
1656
1657 ulNow = jiffies;
1658 pDevExt = &g_DevExt;
1659 pGip = pDevExt->pGip;
1660 iCPU = ASMGetApicId();
1661
1662 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1663 {
1664 if (RT_LIKELY(iTimerCPU == iCPU))
1665 {
1666 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1667 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1668#ifdef TICK_NSEC
1669 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1670#else
1671 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1672#endif
1673 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1674 if (RT_LIKELY(pGip))
1675 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1676 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1677 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + (HZ <= 1000 ? 1 : ONE_MSEC_IN_JIFFIES));
1678 }
1679 else
1680 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1681 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1682 }
1683 else
1684 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%lu cpuid=%d\n",
1685 iCPU, (unsigned long)RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1686
1687 local_irq_restore(SavedFlags);
1688}
1689#endif /* CONFIG_SMP */
1690
1691
1692/**
1693 * Maps the GIP into user space.
1694 *
1695 * @returns negative errno.
1696 * @param pDevExt Instance data.
1697 */
1698int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE *ppGip)
1699{
1700 int rc = 0;
1701 unsigned long ulAddr;
1702 unsigned long HCPhys = pDevExt->HCPhysGip;
1703 pgprot_t pgFlags;
1704 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1705 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1706
1707 /*
1708 * Allocate user space mapping and put the physical pages into it.
1709 */
1710 down_write(&current->mm->mmap_sem);
1711 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1712 if (!(ulAddr & ~PAGE_MASK))
1713 {
1714#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1715 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1716#else
1717 int rc2 = 0;
1718 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1719 if (vma)
1720#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1721 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1722#else
1723 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1724#endif
1725 else
1726 {
1727 rc = SUPDRV_ERR_NO_MEMORY;
1728 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1729 }
1730#endif
1731 if (rc2)
1732 {
1733 rc = SUPDRV_ERR_NO_MEMORY;
1734 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1735 }
1736 }
1737 else
1738 {
1739 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1740 rc = SUPDRV_ERR_NO_MEMORY;
1741 }
1742 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1743
1744 /*
1745 * Success?
1746 */
1747 if (!rc)
1748 {
1749 *ppGip = (PSUPGLOBALINFOPAGE)ulAddr;
1750 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1751 return 0;
1752 }
1753
1754 /*
1755 * Failure, cleanup and be gone.
1756 */
1757 if (ulAddr & ~PAGE_MASK)
1758 {
1759 down_write(&current->mm->mmap_sem);
1760 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1761 up_write(&current->mm->mmap_sem);
1762 }
1763
1764 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1765 return rc;
1766}
1767
1768
1769/**
1770 * Maps the GIP into user space.
1771 *
1772 * @returns negative errno.
1773 * @param pDevExt Instance data.
1774 */
1775int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
1776{
1777 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1778 if (current->mm)
1779 {
1780 down_write(&current->mm->mmap_sem);
1781 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1782 up_write(&current->mm->mmap_sem);
1783 }
1784 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1785 return 0;
1786}
1787
1788
1789/**
1790 * Resumes the GIP updating.
1791 *
1792 * @param pDevExt Instance data.
1793 */
1794void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1795{
1796 dprintf2(("supdrvOSGipResume:\n"));
1797 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1798#ifdef CONFIG_SMP
1799 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1800#endif
1801 mod_timer(&g_GipTimer, jiffies);
1802#ifdef CONFIG_SMP
1803 else
1804 {
1805 mod_timer(&g_GipTimer, jiffies);
1806 smp_call_function(VBoxDrvLinuxGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1807 }
1808#endif
1809}
1810
1811
1812#ifdef CONFIG_SMP
1813/**
1814 * Callback for resuming GIP updating on the other CPUs.
1815 *
1816 * This is only used when the GIP is in async tsc mode.
1817 *
1818 * @param pvUser Pointer to the device instance.
1819 */
1820static void VBoxDrvLinuxGipResumePerCpu(void *pvUser)
1821{
1822 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1823 uint8_t iCPU = ASMGetApicId();
1824
1825 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1826 {
1827 printk("vboxdrv: error: apicid=%d max=%lu cpuid=%d\n",
1828 iCPU, (unsigned long)RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1829 return;
1830 }
1831
1832 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1833 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1834}
1835#endif /* CONFIG_SMP */
1836
1837
1838/**
1839 * Suspends the GIP updating.
1840 *
1841 * @param pDevExt Instance data.
1842 */
1843void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1844{
1845#ifdef CONFIG_SMP
1846 unsigned i;
1847#endif
1848 dprintf2(("supdrvOSGipSuspend:\n"));
1849 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1850
1851 if (timer_pending(&g_GipTimer))
1852 del_timer_sync(&g_GipTimer);
1853#ifdef CONFIG_SMP
1854 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1855 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1856 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1857#endif
1858}
1859
1860
1861/**
1862 * Get the current CPU count.
1863 * @returns Number of cpus.
1864 */
1865unsigned VBOXCALL supdrvOSGetCPUCount(void)
1866{
1867#ifdef CONFIG_SMP
1868# if defined(num_present_cpus)
1869 return num_present_cpus();
1870# elif defined(num_online_cpus)
1871 return num_online_cpus();
1872# else
1873 return smp_num_cpus;
1874# endif
1875#else
1876 return 1;
1877#endif
1878}
1879
1880/**
1881 * Force async tsc mode.
1882 * @todo add a module argument for this.
1883 */
1884bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1885{
1886 return false;
1887}
1888
1889
1890/**
1891 * Converts a supdrv error code to an linux error code.
1892 *
1893 * @returns corresponding linux error code.
1894 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1895 */
1896static int VBoxDrvLinuxErr2LinuxErr(int rc)
1897{
1898 switch (rc)
1899 {
1900 case 0: return 0;
1901 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1902 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1903 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1904 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1905 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1906 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1907 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1908 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1909 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1910 case SUPDRV_ERR_IDT_FAILED: return -1000;
1911 }
1912
1913 return -EPERM;
1914}
1915
1916
1917RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1918{
1919#if 1
1920 va_list args;
1921 char szMsg[512];
1922
1923 va_start(args, pszFormat);
1924 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1925 szMsg[sizeof(szMsg) - 1] = '\0';
1926 printk("%s", szMsg);
1927 va_end(args);
1928#else
1929 /* forward to printf - needs some more GCC hacking to fix ebp... */
1930 __asm__ __volatile__ ("mov %0, %esp\n\t"
1931 "jmp %1\n\t",
1932 :: "r" ((uintptr_t)&pszFormat - 4),
1933 "m" (printk));
1934#endif
1935 return 0;
1936}
1937
1938
1939/** Runtime assert implementation for Linux Ring-0. */
1940RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1941{
1942 printk("!!Assertion Failed!!\n"
1943 "Expression: %s\n"
1944 "Location : %s(%d) %s\n",
1945 pszExpr, pszFile, uLine, pszFunction);
1946}
1947
1948
1949/** Runtime assert implementation for Linux Ring-0. */
1950RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1951{ /* forwarder. */
1952 va_list ap;
1953 char msg[256];
1954
1955 va_start(ap, pszFormat);
1956 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1957 msg[sizeof(msg) - 1] = '\0';
1958 printk("%s", msg);
1959 va_end(ap);
1960}
1961
1962
1963/* GCC C++ hack. */
1964unsigned __gxx_personality_v0 = 0xcccccccc;
1965
1966
1967module_init(VBoxDrvLinuxInit);
1968module_exit(VBoxDrvLinuxUnload);
1969
1970MODULE_AUTHOR("innotek GmbH");
1971MODULE_DESCRIPTION("VirtualBox Support Driver");
1972MODULE_LICENSE("GPL");
1973#ifdef MODULE_VERSION
1974#define xstr(s) str(s)
1975#define str(s) #s
1976MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1977#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette