VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 1869

Last change on this file since 1869 was 1869, checked in by vboxsync, 18 years ago

Fixed wrong u64LastMonotime usage (the cause of all the trouble). Disable interrupts while updating GIP like on NT.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 51.2 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.215389.xyz. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21/*******************************************************************************
22* Header Files *
23*******************************************************************************/
24#include "SUPDRV.h"
25#include "version-generated.h"
26
27#include <iprt/assert.h>
28#include <iprt/spinlock.h>
29#include <iprt/semaphore.h>
30#include <iprt/initterm.h>
31#include <iprt/process.h>
32#include <iprt/err.h>
33#include <iprt/mem.h>
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/init.h>
38#include <linux/fs.h>
39#include <linux/mm.h>
40#include <linux/pagemap.h>
41#include <linux/slab.h>
42#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
43# include <linux/jiffies.h>
44#endif
45#include <asm/mman.h>
46#include <asm/io.h>
47#include <asm/uaccess.h>
48#ifdef CONFIG_DEVFS_FS
49# include <linux/devfs_fs_kernel.h>
50#endif
51#ifdef CONFIG_VBOXDRV_AS_MISC
52# include <linux/miscdevice.h>
53#endif
54#ifdef CONFIG_X86_LOCAL_APIC
55# include <asm/apic.h>
56# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
57# include <asm/nmi.h>
58# endif
59#endif
60
61#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
62# ifndef page_to_pfn
63# define page_to_pfn(page) ((page) - mem_map)
64# endif
65# include <asm/pgtable.h>
66# define global_flush_tlb __flush_tlb_global
67#endif
68
69/* devfs defines */
70#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
71# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
72
73# define VBOX_REGISTER_DEVFS() \
74({ \
75 void *rc = NULL; \
76 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
77 S_IFCHR | S_IRUGO | S_IWUGO, \
78 DEVICE_NAME) == 0) \
79 rc = (void *)' '; /* return not NULL */ \
80 rc; \
81 })
82
83# define VBOX_UNREGISTER_DEVFS(handle) \
84 devfs_remove(DEVICE_NAME);
85
86# else /* < 2.6.0 */
87
88# define VBOX_REGISTER_DEVFS() \
89 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
90 DEVICE_MAJOR, 0, \
91 S_IFCHR | S_IRUGO | S_IWUGO, \
92 &gFileOpsVBoxDrv, NULL)
93
94# define VBOX_UNREGISTER_DEVFS(handle) \
95 if (handle != NULL) \
96 devfs_unregister(handle)
97
98# endif /* < 2.6.0 */
99#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
100
101#ifndef CONFIG_VBOXDRV_AS_MISC
102# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
103# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
104# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
105# else
106# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
107# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
108# endif
109#endif /* !CONFIG_VBOXDRV_AS_MISC */
110
111
112#ifdef CONFIG_X86_HIGH_ENTRY
113# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
114#endif
115
116/*
117 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
118 */
119#if defined(__AMD64__)
120# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
121#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
122# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
123#else
124# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
125#endif
126
127/*
128 * The redhat hack section.
129 * - The current hacks are for 2.4.21-15.EL only.
130 */
131#ifndef NO_REDHAT_HACKS
132/* accounting. */
133# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
134# ifdef VM_ACCOUNT
135# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
136# endif
137# endif
138
139/* backported remap_page_range. */
140# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
141# include <asm/tlb.h>
142# ifdef tlb_vma /* probably not good enough... */
143# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
144# endif
145# endif
146
147# ifndef __AMD64__
148/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
149 * the page attributes from PAGE_KERNEL to something else, because there appears
150 * to be a bug in one of the many patches that redhat applied.
151 * It should be safe to do this on less buggy linux kernels too. ;-)
152 */
153# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
154 do { \
155 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
156 change_page_attr(pPages, cPages, prot); \
157 change_page_attr(pPages, cPages, prot); \
158 } while (0)
159# endif
160#endif /* !NO_REDHAT_HACKS */
161
162
163#ifndef MY_DO_MUNMAP
164# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
165#endif
166
167#ifndef MY_CHANGE_PAGE_ATTR
168# ifdef __AMD64__ /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
169# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
170 do { \
171 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
172 change_page_attr(pPages, cPages, prot); \
173 } while (0)
174# else
175# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
176# endif
177#endif
178
179
180/** @def ONE_MSEC_IN_JIFFIES
181 * The number of jiffies that make up 1 millisecond. This is only actually used
182 * when HZ is > 1000. */
183#if HZ <= 1000
184# define ONE_MSEC_IN_JIFFIES 0
185#elif !(HZ % 1000)
186# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
187#else
188# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
189# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
190#endif
191
192
193/*******************************************************************************
194* Defined Constants And Macros *
195*******************************************************************************/
196/**
197 * Device extention & session data association structure.
198 */
199static SUPDRVDEVEXT g_DevExt;
200
201/** Timer structure for the GIP update. */
202static struct timer_list g_GipTimer;
203/** Pointer to the page structure for the GIP. */
204struct page *g_pGipPage;
205
206/** Registered devfs device handle. */
207#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
208# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
209static void *g_hDevFsVBoxDrv = NULL;
210# else
211static devfs_handle_t g_hDevFsVBoxDrv = NULL;
212# endif
213#endif
214
215#ifndef CONFIG_VBOXDRV_AS_MISC
216/** Module major number */
217#define DEVICE_MAJOR 234
218/** Saved major device number */
219static int g_iModuleMajor;
220#endif /* !CONFIG_VBOXDRV_AS_MISC */
221
222/** The module name. */
223#define DEVICE_NAME "vboxdrv"
224
225#ifdef __AMD64__
226/**
227 * Memory for the executable memory heap (in IPRT).
228 */
229extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
230__asm__(".section execmemory, \"awx\", @progbits\n\t"
231 ".align 32\n\t"
232 ".globl g_abExecMemory\n"
233 "g_abExecMemory:\n\t"
234 ".zero 1572864\n\t"
235 ".type g_abExecMemory, @object\n\t"
236 ".size g_abExecMemory, 1572864\n\t"
237 ".text\n\t");
238#endif
239
240
241/*******************************************************************************
242* Internal Functions *
243*******************************************************************************/
244static int VBoxSupDrvInit(void);
245static void VBoxSupDrvUnload(void);
246static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp);
247static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp);
248static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
249 unsigned int IOCmd, unsigned long IOArg);
250static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
251static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt);
252static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt);
253static void VBoxSupGipTimer(unsigned long ulUser);
254#ifdef CONFIG_SMP
255static void VBoxSupGipTimerPerCpu(unsigned long ulUser);
256static void VBoxSupGipResumePerCpu(void *pvUser);
257#endif
258static int VBoxSupDrvOrder(unsigned long size);
259static int VBoxSupDrvErr2LinuxErr(int);
260
261
262/** The file_operations structure. */
263static struct file_operations gFileOpsVBoxDrv =
264{
265 owner: THIS_MODULE,
266 open: VBoxSupDrvCreate,
267 release: VBoxSupDrvClose,
268 ioctl: VBoxSupDrvDeviceControl,
269};
270
271#ifdef CONFIG_VBOXDRV_AS_MISC
272/** The miscdevice structure. */
273static struct miscdevice gMiscDevice =
274{
275 minor: MISC_DYNAMIC_MINOR,
276 name: DEVICE_NAME,
277 fops: &gFileOpsVBoxDrv,
278# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
279 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
280 devfs_name: DEVICE_NAME,
281# endif
282};
283#endif
284
285
286/**
287 * Initialize module.
288 *
289 * @returns appropriate status code.
290 */
291static int __init VBoxSupDrvInit(void)
292{
293 int rc;
294
295 dprintf(("VBoxDrv::ModuleInit\n"));
296
297#ifdef CONFIG_X86_LOCAL_APIC
298 /*
299 * If an NMI occurs while we are inside the world switcher the macine will crash.
300 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
301 * compared with another counter increased in the timer interrupt handler. Therefore
302 * we don't allow to setup an NMI watchdog.
303 */
304# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
305 /*
306 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
307 * the nmi_watchdog variable.
308 */
309# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
310 /*
311 * Permanent IO_APIC mode active? No way to handle this!
312 */
313 if (nmi_watchdog == NMI_IO_APIC)
314 {
315 printk(KERN_ERR DEVICE_NAME
316 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
317 DEVICE_NAME
318 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
319 DEVICE_NAME
320 ": command line.\n");
321 return -EINVAL;
322 }
323
324 /*
325 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
326 */
327 atomic_set(&nmi_active, -1);
328 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate NMI watchdog permanently...\n");
329
330 /*
331 * Now fall through and see if it actually was enabled before. If so, fail
332 * as we cannot deactivate it cleanly from here.
333 */
334# else /* < 2.6.19 */
335 /*
336 * Older 2.6 kernels: nmi_watchdog is not initalized by default
337 */
338 if (nmi_watchdog != NMI_NONE)
339 goto nmi_activated;
340# endif
341# endif /* >= 2.6.0 */
342
343 /*
344 * Second test: Interrupt generated by performance counter not masked and can
345 * generate an NMI. Works also with Linux 2.4.
346 */
347 {
348 unsigned int v, ver, maxlvt;
349
350 v = apic_read(APIC_LVR);
351 ver = GET_APIC_VERSION(v);
352 /* 82489DXs do not report # of LVT entries. */
353 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
354 if (maxlvt >= 4)
355 {
356 /* Read status of performance counter IRQ vector */
357 v = apic_read(APIC_LVTPC);
358
359 /* performance counter generates NMI and is not masked? */
360 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
361 {
362# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
363 printk(KERN_ERR DEVICE_NAME
364 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
365 DEVICE_NAME
366 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
367 return -EINVAL;
368# else /* < 2.6.19 */
369# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
370nmi_activated:
371# endif
372 printk(KERN_ERR DEVICE_NAME
373 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
374 DEVICE_NAME
375 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
376 return -EINVAL;
377# endif /* >= 2.6.19 */
378 }
379 }
380 }
381# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
382 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
383# endif /* >= 2.6.19 */
384#endif /* CONFIG_X86_LOCAL_APIC */
385
386#ifdef CONFIG_VBOXDRV_AS_MISC
387 rc = misc_register(&gMiscDevice);
388 if (rc)
389 {
390 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
391 return rc;
392 }
393#else /* !CONFIG_VBOXDRV_AS_MISC */
394 /*
395 * Register character device.
396 */
397 g_iModuleMajor = DEVICE_MAJOR;
398 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
399 if (rc < 0)
400 {
401 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
402 return rc;
403 }
404
405 /*
406 * Save returned module major number
407 */
408 if (DEVICE_MAJOR != 0)
409 g_iModuleMajor = DEVICE_MAJOR;
410 else
411 g_iModuleMajor = rc;
412 rc = 0;
413
414#ifdef CONFIG_DEVFS_FS
415 /*
416 * Register a device entry
417 */
418 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
419 if (g_hDevFsVBoxDrv == NULL)
420 {
421 dprintf(("devfs_register failed!\n"));
422 rc = -EINVAL;
423 }
424#endif
425#endif /* !CONFIG_VBOXDRV_AS_MISC */
426 if (!rc)
427 {
428 /*
429 * Initialize the runtime.
430 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
431 */
432 rc = RTR0Init(0);
433 if (RT_SUCCESS(rc))
434 {
435#ifdef __AMD64__
436 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
437#endif
438 /*
439 * Initialize the device extension.
440 */
441 if (RT_SUCCESS(rc))
442 rc = supdrvInitDevExt(&g_DevExt);
443 if (!rc)
444 {
445 /*
446 * Create the GIP page.
447 */
448 rc = VBoxSupDrvInitGip(&g_DevExt);
449 if (!rc)
450 {
451 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
452 return rc;
453 }
454
455 supdrvDeleteDevExt(&g_DevExt);
456 }
457 else
458 rc = -EINVAL;
459 RTR0Term();
460 }
461 else
462 rc = -EINVAL;
463
464 /*
465 * Failed, cleanup and return the error code.
466 */
467#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
468 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
469#endif
470 }
471#ifdef CONFIG_VBOXDRV_AS_MISC
472 misc_deregister(&gMiscDevice);
473 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
474#else
475 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
476 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
477#endif
478 return rc;
479}
480
481
482/**
483 * Unload the module.
484 */
485static void __exit VBoxSupDrvUnload(void)
486{
487 int rc;
488 dprintf(("VBoxSupDrvUnload\n"));
489
490 /*
491 * I Don't think it's possible to unload a driver which processes have
492 * opened, at least we'll blindly assume that here.
493 */
494#ifdef CONFIG_VBOXDRV_AS_MISC
495 rc = misc_deregister(&gMiscDevice);
496 if (rc < 0)
497 {
498 dprintf(("misc_deregister failed with rc=%#x\n", rc));
499 }
500#else /* !CONFIG_VBOXDRV_AS_MISC */
501#ifdef CONFIG_DEVFS_FS
502 /*
503 * Unregister a device entry
504 */
505 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
506#endif // devfs
507 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
508 if (rc < 0)
509 {
510 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
511 }
512#endif /* !CONFIG_VBOXDRV_AS_MISC */
513
514 /*
515 * Destroy GIP, delete the device extension and terminate IPRT.
516 */
517 VBoxSupDrvTermGip(&g_DevExt);
518 supdrvDeleteDevExt(&g_DevExt);
519 RTR0Term();
520}
521
522
523/**
524 * Device open. Called on open /dev/vboxdrv
525 *
526 * @param pInode Pointer to inode info structure.
527 * @param pFilp Associated file pointer.
528 */
529static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp)
530{
531 int rc;
532 PSUPDRVSESSION pSession;
533 dprintf(("VBoxSupDrvCreate: pFilp=%p\n", pFilp));
534
535 /*
536 * Call common code for the rest.
537 */
538 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
539 if (!rc)
540 {
541 pSession->Uid = current->euid;
542 pSession->Gid = current->egid;
543 pSession->Process = RTProcSelf();
544 pSession->R0Process = RTR0ProcHandleSelf();
545 }
546
547 dprintf(("VBoxSupDrvCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
548 pFilp->private_data = pSession;
549
550 return VBoxSupDrvErr2LinuxErr(rc);
551}
552
553
554/**
555 * Close device.
556 *
557 * @param pInode Pointer to inode info structure.
558 * @param pFilp Associated file pointer.
559 */
560static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp)
561{
562 dprintf(("VBoxSupDrvClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
563 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
564 pFilp->private_data = NULL;
565 return 0;
566}
567
568
569/**
570 * Device I/O Control entry point.
571 *
572 * @param pInode Pointer to inode info structure.
573 * @param pFilp Associated file pointer.
574 * @param IOCmd The function specified to ioctl().
575 * @param IOArg The argument specified to ioctl().
576 */
577static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
578 unsigned int IOCmd, unsigned long IOArg)
579{
580 int rc;
581 SUPDRVIOCTLDATA Args;
582 void *pvBuf = NULL;
583 int cbBuf = 0;
584 unsigned cbOut = 0;
585
586 dprintf2(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p\n", pFilp, IOCmd, (void *)IOArg));
587
588 /*
589 * Copy ioctl data structure from user space.
590 */
591 if (_IOC_SIZE(IOCmd) != sizeof(SUPDRVIOCTLDATA))
592 {
593 dprintf(("VBoxSupDrvDeviceControl: incorrect input length! cbArgs=%d\n", _IOC_SIZE(IOCmd)));
594 return -EINVAL;
595 }
596 if (copy_from_user(&Args, (void *)IOArg, _IOC_SIZE(IOCmd)))
597 {
598 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(&Args) failed.\n"));
599 return -EFAULT;
600 }
601
602 /*
603 * Allocate and copy user space input data buffer to kernel space.
604 */
605 if (Args.cbIn > 0 || Args.cbOut > 0)
606 {
607 cbBuf = max(Args.cbIn, Args.cbOut);
608 pvBuf = vmalloc(cbBuf);
609 if (pvBuf == NULL)
610 {
611 dprintf(("VBoxSupDrvDeviceControl: failed to allocate buffer of %d bytes.\n", cbBuf));
612 return -ENOMEM;
613 }
614
615 if (copy_from_user(pvBuf, (void *)Args.pvIn, Args.cbIn))
616 {
617 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(pvBuf) failed.\n"));
618 vfree(pvBuf);
619 return -EFAULT;
620 }
621 }
622
623 /*
624 * Process the IOCtl.
625 */
626 rc = supdrvIOCtl(IOCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data,
627 pvBuf, Args.cbIn, pvBuf, Args.cbOut, &cbOut);
628
629 /*
630 * Copy ioctl data and output buffer back to user space.
631 */
632 if (rc)
633 {
634 dprintf(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p failed, rc=%d (linux rc=%d)\n",
635 pFilp, IOCmd, (void *)IOArg, rc, VBoxSupDrvErr2LinuxErr(rc)));
636 rc = VBoxSupDrvErr2LinuxErr(rc);
637 }
638 else if (cbOut > 0)
639 {
640 if (pvBuf != NULL && cbOut <= cbBuf)
641 {
642 if (copy_to_user((void *)Args.pvOut, pvBuf, cbOut))
643 {
644 dprintf(("copy_to_user failed.\n"));
645 rc = -EFAULT;
646 }
647 }
648 else
649 {
650 dprintf(("WHAT!?! supdrvIOCtl messed up! cbOut=%d cbBuf=%d pvBuf=%p\n", cbOut, cbBuf, pvBuf));
651 rc = -EPERM;
652 }
653 }
654
655 if (pvBuf)
656 vfree(pvBuf);
657
658 dprintf2(("VBoxSupDrvDeviceControl: returns %d\n", rc));
659 return rc;
660}
661
662
663/**
664 * Initializes any OS specific object creator fields.
665 */
666void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
667{
668 NOREF(pObj);
669 NOREF(pSession);
670}
671
672
673/**
674 * Checks if the session can access the object.
675 *
676 * @returns true if a decision has been made.
677 * @returns false if the default access policy should be applied.
678 *
679 * @param pObj The object in question.
680 * @param pSession The session wanting to access the object.
681 * @param pszObjName The object name, can be NULL.
682 * @param prc Where to store the result when returning true.
683 */
684bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
685{
686 NOREF(pObj);
687 NOREF(pSession);
688 NOREF(pszObjName);
689 NOREF(prc);
690 return false;
691}
692
693
694/**
695 * Compute order. Some functions allocate 2^order pages.
696 *
697 * @returns order.
698 * @param cPages Number of pages.
699 */
700static int VBoxSupDrvOrder(unsigned long cPages)
701{
702 int iOrder;
703 unsigned long cTmp;
704
705 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
706 ;
707 if (cPages & ~(1 << iOrder))
708 ++iOrder;
709
710 return iOrder;
711}
712
713
714/**
715 * OS Specific code for locking down memory.
716 *
717 * @returns 0 on success.
718 * @returns SUPDRV_ERR_* on failure.
719 * @param pMem Pointer to memory.
720 * This is not linked in anywhere.
721 * @param paPages Array which should be filled with the address of the physical pages.
722 *
723 * @remark See sgl_map_user_pages() for an example of an similar function.
724 */
725int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
726{
727 int rc;
728 struct page **papPages;
729 unsigned iPage;
730 unsigned cPages = pMem->cb >> PAGE_SHIFT;
731 unsigned long pv = (unsigned long)pMem->pvR3;
732 struct vm_area_struct **papVMAs;
733
734 /*
735 * Allocate page pointer array.
736 */
737 papPages = vmalloc(cPages * sizeof(*papPages));
738 if (!papPages)
739 return SUPDRV_ERR_NO_MEMORY;
740
741 /*
742 * Allocate the VMA pointer array.
743 */
744 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
745 if (!papVMAs)
746 return SUPDRV_ERR_NO_MEMORY;
747
748 /*
749 * Get user pages.
750 */
751 down_read(&current->mm->mmap_sem);
752 rc = get_user_pages(current, /* Task for fault acounting. */
753 current->mm, /* Whose pages. */
754 (unsigned long)pv, /* Where from. */
755 cPages, /* How many pages. */
756 1, /* Write to memory. */
757 0, /* force. */
758 papPages, /* Page array. */
759 papVMAs); /* vmas */
760 if (rc != cPages)
761 {
762 up_read(&current->mm->mmap_sem);
763 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
764 return SUPDRV_ERR_LOCK_FAILED;
765 }
766
767 for (iPage = 0; iPage < cPages; iPage++)
768 flush_dcache_page(papPages[iPage]);
769 up_read(&current->mm->mmap_sem);
770
771 pMem->u.locked.papPages = papPages;
772 pMem->u.locked.cPages = cPages;
773
774 /*
775 * Get addresses, protect against fork()
776 */
777 for (iPage = 0; iPage < cPages; iPage++)
778 {
779 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
780 paPages[iPage].uReserved = 0;
781 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
782 }
783
784 vfree(papVMAs);
785
786 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
787 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
788 return 0;
789}
790
791
792/**
793 * Unlocks the memory pointed to by pv.
794 *
795 * @param pv Memory to unlock.
796 * @param cb Size of the memory (debug).
797 *
798 * @remark See sgl_unmap_user_pages() for an example of an similar function.
799 */
800void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
801{
802 unsigned iPage;
803 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
804 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
805
806 /*
807 * Loop thru the pages and release them.
808 */
809 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
810 {
811 if (!PageReserved(pMem->u.locked.papPages[iPage]))
812 SetPageDirty(pMem->u.locked.papPages[iPage]);
813 page_cache_release(pMem->u.locked.papPages[iPage]);
814 }
815
816 /* free the page array */
817 vfree(pMem->u.locked.papPages);
818 pMem->u.locked.cPages = 0;
819}
820
821
822/**
823 * OS Specific code for allocating page aligned memory with continuous fixed
824 * physical paged backing.
825 *
826 * @returns 0 on success.
827 * @returns SUPDRV_ERR_* on failure.
828 * @param pMem Memory reference record of the memory to be allocated.
829 * (This is not linked in anywhere.)
830 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
831 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
832 * @param pHCPhys Where to store the physical address.
833 */
834int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
835{
836 struct page *paPages;
837 unsigned iPage;
838 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
839 unsigned cPages = cbAligned >> PAGE_SHIFT;
840 unsigned cOrder = VBoxSupDrvOrder(cPages);
841 unsigned long ulAddr;
842 dma_addr_t HCPhys;
843 int rc = 0;
844 pgprot_t pgFlags;
845 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
846
847 Assert(ppvR3);
848 Assert(pHCPhys);
849
850 /*
851 * Allocate page pointer array.
852 */
853#ifdef __AMD64__ /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
854 paPages = alloc_pages(GFP_DMA, cOrder);
855#else
856 paPages = alloc_pages(GFP_USER, cOrder);
857#endif
858 if (!paPages)
859 return SUPDRV_ERR_NO_MEMORY;
860
861 /*
862 * Lock the pages.
863 */
864 for (iPage = 0; iPage < cPages; iPage++)
865 {
866 SetPageReserved(&paPages[iPage]);
867 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
868 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
869#ifdef DEBUG
870 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
871 {
872 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
873 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
874 BUG();
875 }
876#endif
877 }
878 HCPhys = page_to_phys(paPages);
879
880 /*
881 * Allocate user space mapping and put the physical pages into it.
882 */
883 down_write(&current->mm->mmap_sem);
884 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
885 if (!(ulAddr & ~PAGE_MASK))
886 {
887#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
888 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
889#else
890 int rc2 = 0;
891 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
892 if (vma)
893#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
894 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
895#else
896 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
897#endif
898 else
899 {
900 rc = SUPDRV_ERR_NO_MEMORY;
901 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
902 }
903#endif
904 if (rc2)
905 {
906 rc = SUPDRV_ERR_NO_MEMORY;
907 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
908 }
909 }
910 else
911 {
912 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
913 rc = SUPDRV_ERR_NO_MEMORY;
914 }
915 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
916
917 /*
918 * Success?
919 */
920 if (!rc)
921 {
922 *pHCPhys = HCPhys;
923 *ppvR3 = ulAddr;
924 if (ppvR0)
925 *ppvR0 = (void *)ulAddr;
926 pMem->pvR3 = ulAddr;
927 pMem->pvR0 = NULL;
928 pMem->u.cont.paPages = paPages;
929 pMem->u.cont.cPages = cPages;
930 pMem->cb = cbAligned;
931
932 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
933 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
934 global_flush_tlb();
935 return 0;
936 }
937
938 /*
939 * Failure, cleanup and be gone.
940 */
941 down_write(&current->mm->mmap_sem);
942 if (ulAddr & ~PAGE_MASK)
943 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
944 for (iPage = 0; iPage < cPages; iPage++)
945 {
946 ClearPageReserved(&paPages[iPage]);
947 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
948 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
949 }
950 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
951 __free_pages(paPages, cOrder);
952
953 global_flush_tlb();
954 return rc;
955}
956
957
958/**
959 * Frees contiguous memory.
960 *
961 * @param pMem Memory reference record of the memory to be freed.
962 */
963void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
964{
965 unsigned iPage;
966
967 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
968 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
969
970 /*
971 * do_exit() destroys the mm before closing files.
972 * I really hope it cleans up our stuff properly...
973 */
974 if (current->mm)
975 {
976 down_write(&current->mm->mmap_sem);
977 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
978 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
979 }
980
981 /*
982 * Change page attributes freeing the pages.
983 */
984 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
985 {
986 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
987 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
988 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
989 }
990 __free_pages(pMem->u.cont.paPages, VBoxSupDrvOrder(pMem->u.cont.cPages));
991
992 pMem->u.cont.cPages = 0;
993}
994
995
996/**
997 * Allocates memory which mapped into both kernel and user space.
998 * The returned memory is page aligned and so is the allocation.
999 *
1000 * @returns 0 on success.
1001 * @returns SUPDRV_ERR_* on failure.
1002 * @param pMem Memory reference record of the memory to be allocated.
1003 * (This is not linked in anywhere.)
1004 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1005 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1006 */
1007int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1008{
1009 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1010 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1011#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1012 unsigned cOrder = VBoxSupDrvOrder(cPages);
1013 struct page *paPages;
1014#endif
1015 struct page **papPages;
1016 unsigned iPage;
1017 pgprot_t pgFlags;
1018 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1019
1020 /*
1021 * Allocate array with page pointers.
1022 */
1023 pMem->u.mem.cPages = 0;
1024 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1025 if (!papPages)
1026 return SUPDRV_ERR_NO_MEMORY;
1027
1028 /*
1029 * Allocate the pages.
1030 */
1031#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1032 for (iPage = 0; iPage < cPages; iPage++)
1033 {
1034 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1035 if (!papPages[iPage])
1036 {
1037 pMem->u.mem.cPages = iPage;
1038 supdrvOSMemFreeOne(pMem);
1039 return SUPDRV_ERR_NO_MEMORY;
1040 }
1041 }
1042
1043#else /* < 2.4.22 */
1044 paPages = alloc_pages(GFP_USER, cOrder);
1045 if (!paPages)
1046 {
1047 supdrvOSMemFreeOne(pMem);
1048 return SUPDRV_ERR_NO_MEMORY;
1049 }
1050 for (iPage = 0; iPage < cPages; iPage++)
1051 {
1052 papPages[iPage] = &paPages[iPage];
1053 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1054 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1055 if (PageHighMem(papPages[iPage]))
1056 BUG();
1057 }
1058#endif
1059 pMem->u.mem.cPages = cPages;
1060
1061 /*
1062 * Reserve the pages.
1063 */
1064 for (iPage = 0; iPage < cPages; iPage++)
1065 SetPageReserved(papPages[iPage]);
1066
1067 /*
1068 * Create the Ring-0 mapping.
1069 */
1070 if (ppvR0)
1071 {
1072#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1073# ifdef VM_MAP
1074 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1075# else
1076 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1077# endif
1078#else
1079 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1080#endif
1081 }
1082 if (pMem->pvR0 || !ppvR0)
1083 {
1084 /*
1085 * Create the ring3 mapping.
1086 */
1087 if (ppvR3)
1088 *ppvR3 = pMem->pvR3 = VBoxSupDrvMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1089 if (pMem->pvR3 || !ppvR3)
1090 return 0;
1091 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1092 }
1093 else
1094 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1095
1096 supdrvOSMemFreeOne(pMem);
1097 return SUPDRV_ERR_NO_MEMORY;
1098}
1099
1100
1101/**
1102 * Get the physical addresses of the pages in the allocation.
1103 * This is called while inside bundle the spinlock.
1104 *
1105 * @param pMem Memory reference record of the memory.
1106 * @param paPages Where to store the page addresses.
1107 */
1108void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1109{
1110 unsigned iPage;
1111 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1112 {
1113 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1114 paPages[iPage].uReserved = 0;
1115 }
1116}
1117
1118
1119/**
1120 * Frees memory allocated by supdrvOSMemAllocOne().
1121 *
1122 * @param pMem Memory reference record of the memory to be free.
1123 */
1124void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1125{
1126 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1127 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1128
1129 /*
1130 * Unmap the user mapping (if any).
1131 * do_exit() destroys the mm before closing files.
1132 */
1133 if (pMem->pvR3 && current->mm)
1134 {
1135 down_write(&current->mm->mmap_sem);
1136 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1137 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1138 }
1139 pMem->pvR3 = NIL_RTR3PTR;
1140
1141 /*
1142 * Unmap the kernel mapping (if any).
1143 */
1144 if (pMem->pvR0)
1145 {
1146#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1147 vunmap(pMem->pvR0);
1148#endif
1149 pMem->pvR0 = NULL;
1150 }
1151
1152 /*
1153 * Free the physical pages.
1154 */
1155 if (pMem->u.mem.papPages)
1156 {
1157 struct page **papPages = pMem->u.mem.papPages;
1158 const unsigned cPages = pMem->u.mem.cPages;
1159 unsigned iPage;
1160
1161 /* Restore the page flags. */
1162 for (iPage = 0; iPage < cPages; iPage++)
1163 {
1164 ClearPageReserved(papPages[iPage]);
1165#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1166 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1167 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1168#endif
1169 }
1170
1171 /* Free the pages. */
1172#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1173 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1174 __free_page(papPages[iPage]);
1175#else
1176 if (cPages > 0)
1177 __free_pages(papPages[0], VBoxSupDrvOrder(cPages));
1178#endif
1179 /* Free the page pointer array. */
1180 kfree(papPages);
1181 pMem->u.mem.papPages = NULL;
1182 }
1183 pMem->u.mem.cPages = 0;
1184}
1185
1186
1187/**
1188 * Maps a range of pages into user space.
1189 *
1190 * @returns Pointer to the user space mapping on success.
1191 * @returns NULL on failure.
1192 * @param papPages Array of the pages to map.
1193 * @param cPages Number of pages to map.
1194 * @param fProt The mapping protection.
1195 * @param pgFlags The page level protection.
1196 */
1197static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1198{
1199 int rc = SUPDRV_ERR_NO_MEMORY;
1200 unsigned long ulAddr;
1201
1202 /*
1203 * Allocate user space mapping.
1204 */
1205 down_write(&current->mm->mmap_sem);
1206 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1207 if (!(ulAddr & ~PAGE_MASK))
1208 {
1209 /*
1210 * Map page by page into the mmap area.
1211 * This is generic, paranoid and not very efficient.
1212 */
1213 int rc = 0;
1214 unsigned long ulAddrCur = ulAddr;
1215 unsigned iPage;
1216 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1217 {
1218#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1219 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1220 if (!vma)
1221 break;
1222#endif
1223
1224#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1225 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1226#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1227 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1228#else /* 2.4 */
1229 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1230#endif
1231 if (rc)
1232 break;
1233 }
1234
1235 /*
1236 * Successful?
1237 */
1238 if (iPage >= cPages)
1239 {
1240 up_write(&current->mm->mmap_sem);
1241 return ulAddr;
1242 }
1243
1244 /* no, cleanup! */
1245 if (rc)
1246 dprintf(("VBoxSupDrvMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1247 else
1248 dprintf(("VBoxSupDrvMapUser: find_vma failed!\n"));
1249
1250 MY_DO_MUNMAP(current->mm, ulAddr, cPages * PAGE_SIZE);
1251 }
1252 else
1253 {
1254 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1255 rc = SUPDRV_ERR_NO_MEMORY;
1256 }
1257 up_write(&current->mm->mmap_sem);
1258
1259 return NIL_RTR3PTR;
1260}
1261
1262
1263/**
1264 * Initializes the GIP.
1265 *
1266 * @returns negative errno.
1267 * @param pDevExt Instance data. GIP stuff may be updated.
1268 */
1269static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt)
1270{
1271 struct page *pPage;
1272 dma_addr_t HCPhys;
1273 PSUPGLOBALINFOPAGE pGip;
1274#ifdef CONFIG_SMP
1275 unsigned i;
1276#endif
1277 dprintf(("VBoxSupDrvInitGip:\n"));
1278
1279 /*
1280 * Allocate the page.
1281 */
1282 pPage = alloc_pages(GFP_USER, 0);
1283 if (!pPage)
1284 {
1285 dprintf(("VBoxSupDrvInitGip: failed to allocate the GIP page\n"));
1286 return -ENOMEM;
1287 }
1288
1289 /*
1290 * Lock the page.
1291 */
1292 SetPageReserved(pPage);
1293 g_pGipPage = pPage;
1294
1295 /*
1296 * Call common initialization routine.
1297 */
1298 HCPhys = page_to_phys(pPage);
1299 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1300 pDevExt->ulLastJiffies = jiffies;
1301#ifdef TICK_NSEC
1302 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1303 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1304 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1305#else
1306 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1307 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1308 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1309#endif
1310 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1311 HZ <= 1000 ? HZ : 1000);
1312
1313 /*
1314 * Initialize the timer.
1315 */
1316 init_timer(&g_GipTimer);
1317 g_GipTimer.data = (unsigned long)pDevExt;
1318 g_GipTimer.function = VBoxSupGipTimer;
1319 g_GipTimer.expires = jiffies;
1320#ifdef CONFIG_SMP
1321 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1322 {
1323 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1324 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1325 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1326 init_timer(&pDevExt->aCPUs[i].Timer);
1327 pDevExt->aCPUs[i].Timer.data = i;
1328 pDevExt->aCPUs[i].Timer.function = VBoxSupGipTimerPerCpu;
1329 pDevExt->aCPUs[i].Timer.expires = jiffies;
1330 }
1331#endif
1332
1333 return 0;
1334}
1335
1336
1337/**
1338 * Terminates the GIP.
1339 *
1340 * @returns negative errno.
1341 * @param pDevExt Instance data. GIP stuff may be updated.
1342 */
1343static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt)
1344{
1345 struct page *pPage;
1346 PSUPGLOBALINFOPAGE pGip;
1347#ifdef CONFIG_SMP
1348 unsigned i;
1349#endif
1350 dprintf(("VBoxSupDrvTermGip:\n"));
1351
1352 /*
1353 * Delete the timer if it's pending.
1354 */
1355 if (timer_pending(&g_GipTimer))
1356 del_timer_sync(&g_GipTimer);
1357#ifdef CONFIG_SMP
1358 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1359 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1360 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1361#endif
1362
1363 /*
1364 * Uninitialize the content.
1365 */
1366 pGip = pDevExt->pGip;
1367 pDevExt->pGip = NULL;
1368 if (pGip)
1369 supdrvGipTerm(pGip);
1370
1371 /*
1372 * Free the page.
1373 */
1374 pPage = g_pGipPage;
1375 g_pGipPage = NULL;
1376 if (pPage)
1377 {
1378 ClearPageReserved(pPage);
1379 __free_pages(pPage, 0);
1380 }
1381
1382 return 0;
1383}
1384
1385/**
1386 * Timer callback function.
1387 *
1388 * In ASYNC TSC mode this is called on the primary CPU, and we're
1389 * assuming that the CPU remains online.
1390 *
1391 * @param ulUser The device extension pointer.
1392 */
1393static void VBoxSupGipTimer(unsigned long ulUser)
1394{
1395 PSUPDRVDEVEXT pDevExt;
1396 PSUPGLOBALINFOPAGE pGip;
1397 unsigned long ulNow;
1398 unsigned long ulDiff;
1399 uint64_t u64Monotime;
1400 unsigned long SavedFlags;
1401
1402 local_irq_save(SavedFlags);
1403
1404 pDevExt = (PSUPDRVDEVEXT)ulUser;
1405 pGip = pDevExt->pGip;
1406 ulNow = jiffies;
1407 ulDiff = ulNow - pDevExt->ulLastJiffies;
1408
1409 pDevExt->ulLastJiffies = ulNow;
1410#ifdef TICK_NSEC
1411 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1412#else
1413 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1414#endif
1415 ASMAtomicXchgU64(&pDevExt->u64LastMonotime, u64Monotime);
1416 if (RT_LIKELY(pGip))
1417 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1418 mod_timer(&g_GipTimer, jiffies + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1419
1420 local_irq_restore(SavedFlags);
1421}
1422
1423
1424#ifdef CONFIG_SMP
1425/**
1426 * Timer callback function for the other CPUs.
1427 *
1428 * @param iLnxCPU The APIC ID of this timer.
1429 */
1430static void VBoxSupGipTimerPerCpu(unsigned long iLnxCPU)
1431{
1432 PSUPDRVDEVEXT pDevExt;
1433 PSUPGLOBALINFOPAGE pGip;
1434 uint8_t iCPU;
1435 uint64_t u64Monotime;
1436 unsigned long SavedFlags;
1437
1438 local_irq_save(SavedFlags);
1439
1440 pDevExt = &g_DevExt;
1441 pGip = pDevExt->pGip;
1442 iCPU = ASMGetApicId();
1443
1444 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1445 {
1446 if (RT_LIKELY(iCPU == iLnxCPU))
1447 {
1448 unsigned long ulNow = jiffies;
1449 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iLnxCPU].ulLastJiffies;
1450
1451 pDevExt->aCPUs[iLnxCPU].ulLastJiffies = ulNow;
1452#ifdef TICK_NSEC
1453 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1454#else
1455 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1456#endif
1457 ASMAtomicXchgU64(&pDevExt->aCPUs[iCPU].u64LastMonotime, u64Monotime);
1458 if (RT_LIKELY(pGip))
1459 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1460 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1461 }
1462 else
1463 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d != timer-cpuid=%d)\n",
1464 iCPU, iLnxCPU, smp_processor_id(), pDevExt->aCPUs[iLnxCPU].iSmpProcessorId);
1465 }
1466 else
1467 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%d cpuid=%d\n",
1468 iCPU, RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1469
1470 local_irq_restore(SavedFlags);
1471}
1472#endif /* CONFIG_SMP */
1473
1474
1475/**
1476 * Maps the GIP into user space.
1477 *
1478 * @returns negative errno.
1479 * @param pDevExt Instance data.
1480 */
1481int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE *ppGip)
1482{
1483 int rc = 0;
1484 unsigned long ulAddr;
1485 unsigned long HCPhys = pDevExt->HCPhysGip;
1486 pgprot_t pgFlags;
1487 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1488 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1489
1490 /*
1491 * Allocate user space mapping and put the physical pages into it.
1492 */
1493 down_write(&current->mm->mmap_sem);
1494 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1495 if (!(ulAddr & ~PAGE_MASK))
1496 {
1497#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1498 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1499#else
1500 int rc2 = 0;
1501 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1502 if (vma)
1503#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1504 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1505#else
1506 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1507#endif
1508 else
1509 {
1510 rc = SUPDRV_ERR_NO_MEMORY;
1511 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1512 }
1513#endif
1514 if (rc2)
1515 {
1516 rc = SUPDRV_ERR_NO_MEMORY;
1517 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1518 }
1519 }
1520 else
1521 {
1522 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1523 rc = SUPDRV_ERR_NO_MEMORY;
1524 }
1525 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1526
1527 /*
1528 * Success?
1529 */
1530 if (!rc)
1531 {
1532 *ppGip = (PCSUPGLOBALINFOPAGE)ulAddr;
1533 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1534 return 0;
1535 }
1536
1537 /*
1538 * Failure, cleanup and be gone.
1539 */
1540 if (ulAddr & ~PAGE_MASK)
1541 {
1542 down_write(&current->mm->mmap_sem);
1543 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1544 up_write(&current->mm->mmap_sem);
1545 }
1546
1547 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1548 return rc;
1549}
1550
1551
1552/**
1553 * Maps the GIP into user space.
1554 *
1555 * @returns negative errno.
1556 * @param pDevExt Instance data.
1557 */
1558int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE pGip)
1559{
1560 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1561 if (current->mm)
1562 {
1563 down_write(&current->mm->mmap_sem);
1564 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1565 up_write(&current->mm->mmap_sem);
1566 }
1567 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1568 return 0;
1569}
1570
1571
1572/**
1573 * Resumes the GIP updating.
1574 *
1575 * @param pDevExt Instance data.
1576 */
1577void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1578{
1579 dprintf2(("supdrvOSGipResume:\n"));
1580#ifdef CONFIG_SMP
1581 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1582#endif
1583 mod_timer(&g_GipTimer, jiffies);
1584#ifdef CONFIG_SMP
1585 else
1586 {
1587 mod_timer(&g_GipTimer, jiffies);
1588 smp_call_function(VBoxSupGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1589 }
1590#endif
1591}
1592
1593
1594#ifdef CONFIG_SMP
1595/**
1596 * Callback for resuming GIP updating on the other CPUs.
1597 *
1598 * This is only used when the GIP is in async tsc mode.
1599 *
1600 * @param pvUser Pointer to the device instance.
1601 */
1602static void VBoxSupGipResumePerCpu(void *pvUser)
1603{
1604 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1605 uint8_t iCPU = ASMGetApicId();
1606
1607 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1608 {
1609 printk("vboxdrv: error: apicid=%d max=%d cpuid=%d\n",
1610 iCPU, RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1611 return;
1612 }
1613
1614 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1615 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1616}
1617#endif /* CONFIG_SMP */
1618
1619
1620/**
1621 * Suspends the GIP updating.
1622 *
1623 * @param pDevExt Instance data.
1624 */
1625void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1626{
1627#ifdef CONFIG_SMP
1628 unsigned i;
1629#endif
1630 dprintf2(("supdrvOSGipSuspend:\n"));
1631
1632 if (timer_pending(&g_GipTimer))
1633 del_timer_sync(&g_GipTimer);
1634#ifdef CONFIG_SMP
1635 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1636 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1637 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1638#endif
1639}
1640
1641
1642/**
1643 * Get the current CPU count.
1644 * @returns Number of cpus.
1645 */
1646unsigned VBOXCALL supdrvOSGetCPUCount(void)
1647{
1648#ifdef CONFIG_SMP
1649# ifdef num_present_cpus
1650 return num_present_cpus();
1651# else
1652 return smp_num_cpus;
1653# endif
1654#else
1655 return 1;
1656#endif
1657}
1658
1659/**
1660 * Force async tsc mode.
1661 * @todo add a module argument for this.
1662 */
1663bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1664{
1665 return false;
1666}
1667
1668
1669/**
1670 * Converts a supdrv error code to an linux error code.
1671 *
1672 * @returns corresponding linux error code.
1673 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1674 */
1675static int VBoxSupDrvErr2LinuxErr(int rc)
1676{
1677 switch (rc)
1678 {
1679 case 0: return 0;
1680 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1681 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1682 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1683 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1684 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1685 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1686 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1687 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1688 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1689 }
1690
1691 return -EPERM;
1692}
1693
1694
1695RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1696{
1697#if 1
1698 va_list args;
1699 char szMsg[512];
1700
1701 va_start(args, pszFormat);
1702 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1703 szMsg[sizeof(szMsg) - 1] = '\0';
1704 printk("%s", szMsg);
1705 va_end(args);
1706#else
1707 /* forward to printf - needs some more GCC hacking to fix ebp... */
1708 __asm__ __volatile__ ("mov %0, %esp\n\t"
1709 "jmp %1\n\t",
1710 :: "r" ((uintptr_t)&pszFormat - 4),
1711 "m" (printk));
1712#endif
1713 return 0;
1714}
1715
1716
1717/** Runtime assert implementation for Linux Ring-0. */
1718RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1719{
1720 printk("!!Assertion Failed!!\n"
1721 "Expression: %s\n"
1722 "Location : %s(%d) %s\n",
1723 pszExpr, pszFile, uLine, pszFunction);
1724}
1725
1726
1727/** Runtime assert implementation for Linux Ring-0. */
1728RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1729{ /* forwarder. */
1730 va_list ap;
1731 char msg[256];
1732
1733 va_start(ap, pszFormat);
1734 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1735 msg[sizeof(msg) - 1] = '\0';
1736 printk("%s", msg);
1737 va_end(ap);
1738}
1739
1740
1741/* GCC C++ hack. */
1742unsigned __gxx_personality_v0 = 0xcccccccc;
1743
1744
1745module_init(VBoxSupDrvInit);
1746module_exit(VBoxSupDrvUnload);
1747
1748MODULE_AUTHOR("InnoTek Systemberatung GmbH");
1749MODULE_DESCRIPTION("VirtualBox Support Driver");
1750MODULE_LICENSE("GPL");
1751#ifdef MODULE_VERSION
1752#define xstr(s) str(s)
1753#define str(s) #s
1754MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1755#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette