VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDRVShared.c@ 8789

Last change on this file since 8789 was 8789, checked in by vboxsync, 17 years ago

Windows host: also check for tsc consistency across cores/cpus. The TSC invariant cpuid bit doesn't always imply consistent behaviour on the new barcelona cpus.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 146.8 KB
Line 
1/* $Revision: 8789 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include "SUPDRV.h"
36#ifndef PAGE_SHIFT
37# include <iprt/param.h>
38#endif
39#include <iprt/alloc.h>
40#include <iprt/semaphore.h>
41#include <iprt/spinlock.h>
42#include <iprt/thread.h>
43#include <iprt/process.h>
44#include <iprt/mp.h>
45#include <iprt/cpuset.h>
46#include <iprt/log.h>
47#include <VBox/x86.h>
48
49/*
50 * Logging assignments:
51 * Log - useful stuff, like failures.
52 * LogFlow - program flow, except the really noisy bits.
53 * Log2 - Cleanup and IDTE
54 * Log3 - Loader flow noise.
55 * Log4 - Call VMMR0 flow noise.
56 * Log5 - Native yet-to-be-defined noise.
57 * Log6 - Native ioctl flow noise.
58 *
59 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
60 * instanciation in log-vbox.c(pp).
61 */
62
63
64/*******************************************************************************
65* Defined Constants And Macros *
66*******************************************************************************/
67/* from x86.h - clashes with linux thus this duplication */
68#undef X86_CR0_PG
69#define X86_CR0_PG RT_BIT(31)
70#undef X86_CR0_PE
71#define X86_CR0_PE RT_BIT(0)
72#undef X86_CPUID_AMD_FEATURE_EDX_NX
73#define X86_CPUID_AMD_FEATURE_EDX_NX RT_BIT(20)
74#undef MSR_K6_EFER
75#define MSR_K6_EFER 0xc0000080
76#undef MSR_K6_EFER_NXE
77#define MSR_K6_EFER_NXE RT_BIT(11)
78#undef MSR_K6_EFER_LMA
79#define MSR_K6_EFER_LMA RT_BIT(10)
80#undef X86_CR4_PGE
81#define X86_CR4_PGE RT_BIT(7)
82#undef X86_CR4_PAE
83#define X86_CR4_PAE RT_BIT(5)
84#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
85#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE RT_BIT(29)
86
87
88/** The frequency by which we recalculate the u32UpdateHz and
89 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
90#define GIP_UPDATEHZ_RECALC_FREQ 0x800
91
92/**
93 * Validates a session pointer.
94 *
95 * @returns true/false accordingly.
96 * @param pSession The session.
97 */
98#define SUP_IS_SESSION_VALID(pSession) \
99 ( VALID_PTR(pSession) \
100 && pSession->u32Cookie == BIRD_INV)
101
102
103/*******************************************************************************
104* Global Variables *
105*******************************************************************************/
106/**
107 * Array of the R0 SUP API.
108 */
109static SUPFUNC g_aFunctions[] =
110{
111 /* name function */
112 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
113 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
114 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
115 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
116 { "SUPR0LockMem", (void *)SUPR0LockMem },
117 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
118 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
119 { "SUPR0ContFree", (void *)SUPR0ContFree },
120 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
121 { "SUPR0LowFree", (void *)SUPR0LowFree },
122 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
123 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
124 { "SUPR0MemFree", (void *)SUPR0MemFree },
125 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
126 { "SUPR0PageFree", (void *)SUPR0PageFree },
127 { "SUPR0Printf", (void *)SUPR0Printf },
128 { "RTMemAlloc", (void *)RTMemAlloc },
129 { "RTMemAllocZ", (void *)RTMemAllocZ },
130 { "RTMemFree", (void *)RTMemFree },
131 /*{ "RTMemDup", (void *)RTMemDup },*/
132 { "RTMemRealloc", (void *)RTMemRealloc },
133 { "RTR0MemObjAllocLow", (void *)RTR0MemObjAllocLow },
134 { "RTR0MemObjAllocPage", (void *)RTR0MemObjAllocPage },
135 { "RTR0MemObjAllocPhys", (void *)RTR0MemObjAllocPhys },
136 { "RTR0MemObjAllocPhysNC", (void *)RTR0MemObjAllocPhysNC },
137 { "RTR0MemObjAllocCont", (void *)RTR0MemObjAllocCont },
138 { "RTR0MemObjLockUser", (void *)RTR0MemObjLockUser },
139 { "RTR0MemObjMapKernel", (void *)RTR0MemObjMapKernel },
140 { "RTR0MemObjMapUser", (void *)RTR0MemObjMapUser },
141 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
142 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
143 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
144 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
145 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
146 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
147/* These don't work yet on linux - use fast mutexes!
148 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
149 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
150 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
151 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
152*/
153 { "RTProcSelf", (void *)RTProcSelf },
154 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
155 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
156 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
157 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
158 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
159 { "RTSemEventCreate", (void *)RTSemEventCreate },
160 { "RTSemEventSignal", (void *)RTSemEventSignal },
161 { "RTSemEventWait", (void *)RTSemEventWait },
162 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
163 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
164 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
165 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
166 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
167 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
168 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
169 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
170 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
171 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
172 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
173 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
174 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
175 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
176 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
177 { "RTThreadSleep", (void *)RTThreadSleep },
178 { "RTThreadYield", (void *)RTThreadYield },
179#if 0 /* Thread APIs, Part 2. */
180 { "RTThreadSelf", (void *)RTThreadSelf },
181 { "RTThreadCreate", (void *)RTThreadCreate },
182 { "RTThreadGetNative", (void *)RTThreadGetNative },
183 { "RTThreadWait", (void *)RTThreadWait },
184 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
185 { "RTThreadGetName", (void *)RTThreadGetName },
186 { "RTThreadSelfName", (void *)RTThreadSelfName },
187 { "RTThreadGetType", (void *)RTThreadGetType },
188 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
189 { "RTThreadUserReset", (void *)RTThreadUserReset },
190 { "RTThreadUserWait", (void *)RTThreadUserWait },
191 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
192#endif
193 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
194 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
195 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
196 { "RTMpDoesCpuExist", (void *)RTMpDoesCpuExist },
197 { "RTMpGetCount", (void *)RTMpGetCount },
198 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
199 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
200 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
201 { "RTMpGetSet", (void *)RTMpGetSet },
202 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
203 { "RTMpOnAll", (void *)RTMpOnAll },
204 { "RTMpOnOthers", (void *)RTMpOnOthers },
205 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
206 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
207 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
208 { "RTLogLogger", (void *)RTLogLogger },
209 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
210 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
211 { "RTLogPrintf", (void *)RTLogPrintf },
212 { "RTLogPrintfV", (void *)RTLogPrintfV },
213 { "AssertMsg1", (void *)AssertMsg1 },
214 { "AssertMsg2", (void *)AssertMsg2 },
215};
216
217
218/*******************************************************************************
219* Internal Functions *
220*******************************************************************************/
221static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
222static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
223#ifdef VBOX_WITH_IDT_PATCHING
224static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
225static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
226static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
227static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
228static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
229#endif /* VBOX_WITH_IDT_PATCHING */
230static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
231static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
232static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
233static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
234static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
235static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
236static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
237static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
238static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
239static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt);
240#ifdef RT_OS_WINDOWS
241static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
242static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
243#endif
244#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
245static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
246static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
247static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser);
248#endif
249
250
251/**
252 * Initializes the device extentsion structure.
253 *
254 * @returns IPRT status code.
255 * @param pDevExt The device extension to initialize.
256 */
257int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
258{
259 /*
260 * Initialize it.
261 */
262 int rc;
263 memset(pDevExt, 0, sizeof(*pDevExt));
264 rc = RTSpinlockCreate(&pDevExt->Spinlock);
265 if (!rc)
266 {
267 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
268 if (!rc)
269 {
270 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
271 if (!rc)
272 {
273#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
274 rc = supdrvGipCreate(pDevExt);
275 if (RT_SUCCESS(rc))
276 {
277 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
278 return VINF_SUCCESS;
279 }
280#else
281 pDevExt->u32Cookie = BIRD;
282 return VINF_SUCCESS;
283#endif
284 }
285 RTSemFastMutexDestroy(pDevExt->mtxLdr);
286 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
287 }
288 RTSpinlockDestroy(pDevExt->Spinlock);
289 pDevExt->Spinlock = NIL_RTSPINLOCK;
290 }
291 return rc;
292}
293
294
295/**
296 * Delete the device extension (e.g. cleanup members).
297 *
298 * @param pDevExt The device extension to delete.
299 */
300void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
301{
302#ifdef VBOX_WITH_IDT_PATCHING
303 PSUPDRVPATCH pPatch;
304#endif
305 PSUPDRVOBJ pObj;
306 PSUPDRVUSAGE pUsage;
307
308 /*
309 * Kill mutexes and spinlocks.
310 */
311 RTSemFastMutexDestroy(pDevExt->mtxGip);
312 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
313 RTSemFastMutexDestroy(pDevExt->mtxLdr);
314 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
315 RTSpinlockDestroy(pDevExt->Spinlock);
316 pDevExt->Spinlock = NIL_RTSPINLOCK;
317
318 /*
319 * Free lists.
320 */
321#ifdef VBOX_WITH_IDT_PATCHING
322 /* patches */
323 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
324 pPatch = pDevExt->pIdtPatchesFree;
325 pDevExt->pIdtPatchesFree = NULL;
326 while (pPatch)
327 {
328 void *pvFree = pPatch;
329 pPatch = pPatch->pNext;
330 RTMemExecFree(pvFree);
331 }
332#endif /* VBOX_WITH_IDT_PATCHING */
333
334 /* objects. */
335 pObj = pDevExt->pObjs;
336#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
337 Assert(!pObj); /* (can trigger on forced unloads) */
338#endif
339 pDevExt->pObjs = NULL;
340 while (pObj)
341 {
342 void *pvFree = pObj;
343 pObj = pObj->pNext;
344 RTMemFree(pvFree);
345 }
346
347 /* usage records. */
348 pUsage = pDevExt->pUsageFree;
349 pDevExt->pUsageFree = NULL;
350 while (pUsage)
351 {
352 void *pvFree = pUsage;
353 pUsage = pUsage->pNext;
354 RTMemFree(pvFree);
355 }
356
357#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
358 /* kill the GIP */
359 supdrvGipDestroy(pDevExt);
360#endif
361}
362
363
364/**
365 * Create session.
366 *
367 * @returns IPRT status code.
368 * @param pDevExt Device extension.
369 * @param ppSession Where to store the pointer to the session data.
370 */
371int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
372{
373 /*
374 * Allocate memory for the session data.
375 */
376 int rc = VERR_NO_MEMORY;
377 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
378 if (pSession)
379 {
380 /* Initialize session data. */
381 rc = RTSpinlockCreate(&pSession->Spinlock);
382 if (!rc)
383 {
384 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
385 pSession->pDevExt = pDevExt;
386 pSession->u32Cookie = BIRD_INV;
387 /*pSession->pLdrUsage = NULL;
388 pSession->pPatchUsage = NULL;
389 pSession->pUsage = NULL;
390 pSession->pGip = NULL;
391 pSession->fGipReferenced = false;
392 pSession->Bundle.cUsed = 0 */
393
394 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
395 return VINF_SUCCESS;
396 }
397
398 RTMemFree(pSession);
399 *ppSession = NULL;
400 Log(("Failed to create spinlock, rc=%d!\n", rc));
401 }
402
403 return rc;
404}
405
406
407/**
408 * Shared code for cleaning up a session.
409 *
410 * @param pDevExt Device extension.
411 * @param pSession Session data.
412 * This data will be freed by this routine.
413 */
414void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
415{
416 /*
417 * Cleanup the session first.
418 */
419 supdrvCleanupSession(pDevExt, pSession);
420
421 /*
422 * Free the rest of the session stuff.
423 */
424 RTSpinlockDestroy(pSession->Spinlock);
425 pSession->Spinlock = NIL_RTSPINLOCK;
426 pSession->pDevExt = NULL;
427 RTMemFree(pSession);
428 LogFlow(("supdrvCloseSession: returns\n"));
429}
430
431
432/**
433 * Shared code for cleaning up a session (but not quite freeing it).
434 *
435 * This is primarily intended for MAC OS X where we have to clean up the memory
436 * stuff before the file handle is closed.
437 *
438 * @param pDevExt Device extension.
439 * @param pSession Session data.
440 * This data will be freed by this routine.
441 */
442void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
443{
444 PSUPDRVBUNDLE pBundle;
445 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
446
447 /*
448 * Remove logger instances related to this session.
449 */
450 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
451
452#ifdef VBOX_WITH_IDT_PATCHING
453 /*
454 * Uninstall any IDT patches installed for this session.
455 */
456 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
457#endif
458
459 /*
460 * Release object references made in this session.
461 * In theory there should be noone racing us in this session.
462 */
463 Log2(("release objects - start\n"));
464 if (pSession->pUsage)
465 {
466 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
467 PSUPDRVUSAGE pUsage;
468 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
469
470 while ((pUsage = pSession->pUsage) != NULL)
471 {
472 PSUPDRVOBJ pObj = pUsage->pObj;
473 pSession->pUsage = pUsage->pNext;
474
475 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
476 if (pUsage->cUsage < pObj->cUsage)
477 {
478 pObj->cUsage -= pUsage->cUsage;
479 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
480 }
481 else
482 {
483 /* Destroy the object and free the record. */
484 if (pDevExt->pObjs == pObj)
485 pDevExt->pObjs = pObj->pNext;
486 else
487 {
488 PSUPDRVOBJ pObjPrev;
489 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
490 if (pObjPrev->pNext == pObj)
491 {
492 pObjPrev->pNext = pObj->pNext;
493 break;
494 }
495 Assert(pObjPrev);
496 }
497 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
498
499 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
500 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
501 if (pObj->pfnDestructor)
502 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
503 RTMemFree(pObj);
504 }
505
506 /* free it and continue. */
507 RTMemFree(pUsage);
508
509 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
510 }
511
512 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
513 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
514 }
515 Log2(("release objects - done\n"));
516
517 /*
518 * Release memory allocated in the session.
519 *
520 * We do not serialize this as we assume that the application will
521 * not allocated memory while closing the file handle object.
522 */
523 Log2(("freeing memory:\n"));
524 pBundle = &pSession->Bundle;
525 while (pBundle)
526 {
527 PSUPDRVBUNDLE pToFree;
528 unsigned i;
529
530 /*
531 * Check and unlock all entries in the bundle.
532 */
533 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
534 {
535 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
536 {
537 int rc;
538 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
539 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
540 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
541 {
542 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
543 AssertRC(rc); /** @todo figure out how to handle this. */
544 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
545 }
546 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
547 AssertRC(rc); /** @todo figure out how to handle this. */
548 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
549 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
550 }
551 }
552
553 /*
554 * Advance and free previous bundle.
555 */
556 pToFree = pBundle;
557 pBundle = pBundle->pNext;
558
559 pToFree->pNext = NULL;
560 pToFree->cUsed = 0;
561 if (pToFree != &pSession->Bundle)
562 RTMemFree(pToFree);
563 }
564 Log2(("freeing memory - done\n"));
565
566 /*
567 * Loaded images needs to be dereferenced and possibly freed up.
568 */
569 RTSemFastMutexRequest(pDevExt->mtxLdr);
570 Log2(("freeing images:\n"));
571 if (pSession->pLdrUsage)
572 {
573 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
574 pSession->pLdrUsage = NULL;
575 while (pUsage)
576 {
577 void *pvFree = pUsage;
578 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
579 if (pImage->cUsage > pUsage->cUsage)
580 pImage->cUsage -= pUsage->cUsage;
581 else
582 supdrvLdrFree(pDevExt, pImage);
583 pUsage->pImage = NULL;
584 pUsage = pUsage->pNext;
585 RTMemFree(pvFree);
586 }
587 }
588 RTSemFastMutexRelease(pDevExt->mtxLdr);
589 Log2(("freeing images - done\n"));
590
591 /*
592 * Unmap the GIP.
593 */
594 Log2(("umapping GIP:\n"));
595#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
596 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
597#else
598 if (pSession->pGip)
599#endif
600 {
601 SUPR0GipUnmap(pSession);
602#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
603 pSession->pGip = NULL;
604#endif
605 pSession->fGipReferenced = 0;
606 }
607 Log2(("umapping GIP - done\n"));
608}
609
610
611/**
612 * Fast path I/O Control worker.
613 *
614 * @returns VBox status code that should be passed down to ring-3 unchanged.
615 * @param uIOCtl Function number.
616 * @param pDevExt Device extention.
617 * @param pSession Session data.
618 */
619int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
620{
621 int rc;
622
623 /*
624 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
625 */
626 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
627 {
628 switch (uIOCtl)
629 {
630 case SUP_IOCTL_FAST_DO_RAW_RUN:
631 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
632 break;
633 case SUP_IOCTL_FAST_DO_HWACC_RUN:
634 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
635 break;
636 case SUP_IOCTL_FAST_DO_NOP:
637 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
638 break;
639 default:
640 rc = VERR_INTERNAL_ERROR;
641 break;
642 }
643 }
644 else
645 rc = VERR_INTERNAL_ERROR;
646
647 return rc;
648}
649
650
651/**
652 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
653 * We would use strpbrk here if this function would be contained in the RedHat kABI white
654 * list, see http://www.kerneldrivers.org/RHEL5.
655 *
656 * @return 1 if pszStr does contain any character of pszChars, 0 otherwise.
657 * @param pszStr String to check
658 * @param pszChars Character set
659 */
660static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
661{
662 int chCur;
663 while ((chCur = *pszStr++) != '\0')
664 {
665 int ch;
666 const char *psz = pszChars;
667 while ((ch = *psz++) != '\0')
668 if (ch == chCur)
669 return 1;
670
671 }
672 return 0;
673}
674
675
676/**
677 * I/O Control worker.
678 *
679 * @returns 0 on success.
680 * @returns VERR_INVALID_PARAMETER if the request is invalid.
681 *
682 * @param uIOCtl Function number.
683 * @param pDevExt Device extention.
684 * @param pSession Session data.
685 * @param pReqHdr The request header.
686 */
687int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
688{
689 /*
690 * Validate the request.
691 */
692 /* this first check could probably be omitted as its also done by the OS specific code... */
693 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
694 || pReqHdr->cbIn < sizeof(*pReqHdr)
695 || pReqHdr->cbOut < sizeof(*pReqHdr)))
696 {
697 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
698 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
699 return VERR_INVALID_PARAMETER;
700 }
701 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
702 {
703 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
704 {
705 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
706 return VERR_INVALID_PARAMETER;
707 }
708 }
709 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
710 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
711 {
712 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
713 return VERR_INVALID_PARAMETER;
714 }
715
716/*
717 * Validation macros
718 */
719#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
720 do { \
721 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
722 { \
723 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
724 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
725 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
726 } \
727 } while (0)
728
729#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
730
731#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
732 do { \
733 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
734 { \
735 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
736 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
737 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
738 } \
739 } while (0)
740
741#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
742 do { \
743 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
744 { \
745 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
746 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
747 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
748 } \
749 } while (0)
750
751#define REQ_CHECK_EXPR(Name, expr) \
752 do { \
753 if (RT_UNLIKELY(!(expr))) \
754 { \
755 OSDBGPRINT(( #Name ": %s\n", #expr)); \
756 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
757 } \
758 } while (0)
759
760#define REQ_CHECK_EXPR_FMT(expr, fmt) \
761 do { \
762 if (RT_UNLIKELY(!(expr))) \
763 { \
764 OSDBGPRINT( fmt ); \
765 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
766 } \
767 } while (0)
768
769
770 /*
771 * The switch.
772 */
773 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
774 {
775 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
776 {
777 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
778 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
779 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
780 {
781 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
782 pReq->Hdr.rc = VERR_INVALID_MAGIC;
783 return 0;
784 }
785
786#if 0
787 /*
788 * Call out to the OS specific code and let it do permission checks on the
789 * client process.
790 */
791 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
792 {
793 pReq->u.Out.u32Cookie = 0xffffffff;
794 pReq->u.Out.u32SessionCookie = 0xffffffff;
795 pReq->u.Out.u32SessionVersion = 0xffffffff;
796 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
797 pReq->u.Out.pSession = NULL;
798 pReq->u.Out.cFunctions = 0;
799 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
800 return 0;
801 }
802#endif
803
804 /*
805 * Match the version.
806 * The current logic is very simple, match the major interface version.
807 */
808 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
809 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
810 {
811 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
812 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
813 pReq->u.Out.u32Cookie = 0xffffffff;
814 pReq->u.Out.u32SessionCookie = 0xffffffff;
815 pReq->u.Out.u32SessionVersion = 0xffffffff;
816 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
817 pReq->u.Out.pSession = NULL;
818 pReq->u.Out.cFunctions = 0;
819 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
820 return 0;
821 }
822
823 /*
824 * Fill in return data and be gone.
825 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
826 * u32SessionVersion <= u32ReqVersion!
827 */
828 /** @todo Somehow validate the client and negotiate a secure cookie... */
829 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
830 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
831 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
832 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
833 pReq->u.Out.pSession = pSession;
834 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
835 pReq->Hdr.rc = VINF_SUCCESS;
836 return 0;
837 }
838
839 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
840 {
841 /* validate */
842 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
843 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
844
845 /* execute */
846 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
847 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
848 pReq->Hdr.rc = VINF_SUCCESS;
849 return 0;
850 }
851
852 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
853 {
854 /* validate */
855 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
856 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
857
858 /* execute */
859#ifdef VBOX_WITH_IDT_PATCHING
860 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
861#else
862 pReq->u.Out.u8Idt = 3;
863 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
864#endif
865 return 0;
866 }
867
868 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
869 {
870 /* validate */
871 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
872 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
873
874 /* execute */
875#ifdef VBOX_WITH_IDT_PATCHING
876 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
877#else
878 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
879#endif
880 return 0;
881 }
882
883 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
884 {
885 /* validate */
886 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
887 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
888 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
889 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
890 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
891
892 /* execute */
893 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
894 if (RT_FAILURE(pReq->Hdr.rc))
895 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
896 return 0;
897 }
898
899 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
900 {
901 /* validate */
902 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
903 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
904
905 /* execute */
906 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
907 return 0;
908 }
909
910 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
911 {
912 /* validate */
913 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
914 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
915
916 /* execute */
917 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
918 if (RT_FAILURE(pReq->Hdr.rc))
919 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
920 return 0;
921 }
922
923 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
924 {
925 /* validate */
926 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
927 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
928
929 /* execute */
930 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
931 return 0;
932 }
933
934 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
935 {
936 /* validate */
937 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
938 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
939 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
940 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
941 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
942 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
943 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
944
945 /* execute */
946 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
947 return 0;
948 }
949
950 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
951 {
952 /* validate */
953 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
954 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
955 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
956 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
957 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
958 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
959 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
960 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
961 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
962 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
963 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
964 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
965 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
966 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
967 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
968
969 if (pReq->u.In.cSymbols)
970 {
971 uint32_t i;
972 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
973 for (i = 0; i < pReq->u.In.cSymbols; i++)
974 {
975 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
976 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
977 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
978 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
979 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
980 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
981 }
982 }
983
984 /* execute */
985 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
986 return 0;
987 }
988
989 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
990 {
991 /* validate */
992 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
993 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
994
995 /* execute */
996 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
997 return 0;
998 }
999
1000 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1001 {
1002 /* validate */
1003 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1004 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1005 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
1006
1007 /* execute */
1008 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1009 return 0;
1010 }
1011
1012 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1013 {
1014 /* validate */
1015 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1016 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1017 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1018
1019 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1020 {
1021 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1022
1023 /* execute */
1024 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1025 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
1026 else
1027 pReq->Hdr.rc = VERR_WRONG_ORDER;
1028 }
1029 else
1030 {
1031 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1032 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1033 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1034 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1035 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1036
1037 /* execute */
1038 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1039 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
1040 else
1041 pReq->Hdr.rc = VERR_WRONG_ORDER;
1042 }
1043
1044 if ( RT_FAILURE(pReq->Hdr.rc)
1045 && pReq->Hdr.rc != VERR_INTERRUPTED
1046 && pReq->Hdr.rc != VERR_TIMEOUT)
1047 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1048 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1049 else
1050 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1051 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1052 return 0;
1053 }
1054
1055 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1056 {
1057 /* validate */
1058 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1059 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1060
1061 /* execute */
1062 pReq->Hdr.rc = VINF_SUCCESS;
1063 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
1064 return 0;
1065 }
1066
1067 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1068 {
1069 /* validate */
1070 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1071 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1072 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1073
1074 /* execute */
1075 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1076 if (RT_FAILURE(pReq->Hdr.rc))
1077 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1078 return 0;
1079 }
1080
1081 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1082 {
1083 /* validate */
1084 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1085 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1086
1087 /* execute */
1088 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1089 return 0;
1090 }
1091
1092 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1093 {
1094 /* validate */
1095 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1096 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1097
1098 /* execute */
1099 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1100 if (RT_SUCCESS(pReq->Hdr.rc))
1101 pReq->u.Out.pGipR0 = pDevExt->pGip;
1102 return 0;
1103 }
1104
1105 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1106 {
1107 /* validate */
1108 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1109 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1110
1111 /* execute */
1112 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1113 return 0;
1114 }
1115
1116 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1117 {
1118 /* validate */
1119 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1120 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1121 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1122 || ( VALID_PTR(pReq->u.In.pVMR0)
1123 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1124 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1125 /* execute */
1126 pSession->pVM = pReq->u.In.pVMR0;
1127 pReq->Hdr.rc = VINF_SUCCESS;
1128 return 0;
1129 }
1130
1131 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1132 {
1133 /* validate */
1134 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1135 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1136 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1137
1138 /* execute */
1139 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1140 if (RT_FAILURE(pReq->Hdr.rc))
1141 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1142 return 0;
1143 }
1144
1145 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1146 {
1147 /* validate */
1148 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1149 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1150
1151 /* execute */
1152 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1153 return 0;
1154 }
1155
1156 default:
1157 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1158 break;
1159 }
1160 return SUPDRV_ERR_GENERAL_FAILURE;
1161}
1162
1163
1164/**
1165 * Register a object for reference counting.
1166 * The object is registered with one reference in the specified session.
1167 *
1168 * @returns Unique identifier on success (pointer).
1169 * All future reference must use this identifier.
1170 * @returns NULL on failure.
1171 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1172 * @param pvUser1 The first user argument.
1173 * @param pvUser2 The second user argument.
1174 */
1175SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1176{
1177 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1178 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1179 PSUPDRVOBJ pObj;
1180 PSUPDRVUSAGE pUsage;
1181
1182 /*
1183 * Validate the input.
1184 */
1185 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1186 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1187 AssertPtrReturn(pfnDestructor, NULL);
1188
1189 /*
1190 * Allocate and initialize the object.
1191 */
1192 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1193 if (!pObj)
1194 return NULL;
1195 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1196 pObj->enmType = enmType;
1197 pObj->pNext = NULL;
1198 pObj->cUsage = 1;
1199 pObj->pfnDestructor = pfnDestructor;
1200 pObj->pvUser1 = pvUser1;
1201 pObj->pvUser2 = pvUser2;
1202 pObj->CreatorUid = pSession->Uid;
1203 pObj->CreatorGid = pSession->Gid;
1204 pObj->CreatorProcess= pSession->Process;
1205 supdrvOSObjInitCreator(pObj, pSession);
1206
1207 /*
1208 * Allocate the usage record.
1209 * (We keep freed usage records around to simplity SUPR0ObjAddRef().)
1210 */
1211 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1212
1213 pUsage = pDevExt->pUsageFree;
1214 if (pUsage)
1215 pDevExt->pUsageFree = pUsage->pNext;
1216 else
1217 {
1218 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1219 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1220 if (!pUsage)
1221 {
1222 RTMemFree(pObj);
1223 return NULL;
1224 }
1225 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1226 }
1227
1228 /*
1229 * Insert the object and create the session usage record.
1230 */
1231 /* The object. */
1232 pObj->pNext = pDevExt->pObjs;
1233 pDevExt->pObjs = pObj;
1234
1235 /* The session record. */
1236 pUsage->cUsage = 1;
1237 pUsage->pObj = pObj;
1238 pUsage->pNext = pSession->pUsage;
1239 Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1240 pSession->pUsage = pUsage;
1241
1242 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1243
1244 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1245 return pObj;
1246}
1247
1248
1249/**
1250 * Increment the reference counter for the object associating the reference
1251 * with the specified session.
1252 *
1253 * @returns IPRT status code.
1254 * @param pvObj The identifier returned by SUPR0ObjRegister().
1255 * @param pSession The session which is referencing the object.
1256 */
1257SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1258{
1259 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1260 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1261 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1262 PSUPDRVUSAGE pUsagePre;
1263 PSUPDRVUSAGE pUsage;
1264
1265 /*
1266 * Validate the input.
1267 */
1268 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1269 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1270 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1271 VERR_INVALID_PARAMETER);
1272
1273 /*
1274 * Preallocate the usage record.
1275 */
1276 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1277
1278 pUsagePre = pDevExt->pUsageFree;
1279 if (pUsagePre)
1280 pDevExt->pUsageFree = pUsagePre->pNext;
1281 else
1282 {
1283 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1284 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1285 if (!pUsagePre)
1286 return VERR_NO_MEMORY;
1287 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1288 }
1289
1290 /*
1291 * Reference the object.
1292 */
1293 pObj->cUsage++;
1294
1295 /*
1296 * Look for the session record.
1297 */
1298 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1299 {
1300 Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1301 if (pUsage->pObj == pObj)
1302 break;
1303 }
1304 if (pUsage)
1305 pUsage->cUsage++;
1306 else
1307 {
1308 /* create a new session record. */
1309 pUsagePre->cUsage = 1;
1310 pUsagePre->pObj = pObj;
1311 pUsagePre->pNext = pSession->pUsage;
1312 pSession->pUsage = pUsagePre;
1313 Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1314
1315 pUsagePre = NULL;
1316 }
1317
1318 /*
1319 * Put any unused usage record into the free list..
1320 */
1321 if (pUsagePre)
1322 {
1323 pUsagePre->pNext = pDevExt->pUsageFree;
1324 pDevExt->pUsageFree = pUsagePre;
1325 }
1326
1327 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1328
1329 return VINF_SUCCESS;
1330}
1331
1332
1333/**
1334 * Decrement / destroy a reference counter record for an object.
1335 *
1336 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1337 *
1338 * @returns IPRT status code.
1339 * @param pvObj The identifier returned by SUPR0ObjRegister().
1340 * @param pSession The session which is referencing the object.
1341 */
1342SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1343{
1344 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1345 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1346 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1347 bool fDestroy = false;
1348 PSUPDRVUSAGE pUsage;
1349 PSUPDRVUSAGE pUsagePrev;
1350
1351 /*
1352 * Validate the input.
1353 */
1354 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1355 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1356 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1357 VERR_INVALID_PARAMETER);
1358
1359 /*
1360 * Acquire the spinlock and look for the usage record.
1361 */
1362 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1363
1364 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1365 pUsage;
1366 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1367 {
1368 Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1369 if (pUsage->pObj == pObj)
1370 {
1371 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1372 if (pUsage->cUsage > 1)
1373 {
1374 pObj->cUsage--;
1375 pUsage->cUsage--;
1376 }
1377 else
1378 {
1379 /*
1380 * Free the session record.
1381 */
1382 if (pUsagePrev)
1383 pUsagePrev->pNext = pUsage->pNext;
1384 else
1385 pSession->pUsage = pUsage->pNext;
1386 pUsage->pNext = pDevExt->pUsageFree;
1387 pDevExt->pUsageFree = pUsage;
1388
1389 /* What about the object? */
1390 if (pObj->cUsage > 1)
1391 pObj->cUsage--;
1392 else
1393 {
1394 /*
1395 * Object is to be destroyed, unlink it.
1396 */
1397 pObj->u32Magic = SUPDRVOBJ_MAGIC + 1;
1398 fDestroy = true;
1399 if (pDevExt->pObjs == pObj)
1400 pDevExt->pObjs = pObj->pNext;
1401 else
1402 {
1403 PSUPDRVOBJ pObjPrev;
1404 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1405 if (pObjPrev->pNext == pObj)
1406 {
1407 pObjPrev->pNext = pObj->pNext;
1408 break;
1409 }
1410 Assert(pObjPrev);
1411 }
1412 }
1413 }
1414 break;
1415 }
1416 }
1417
1418 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1419
1420 /*
1421 * Call the destructor and free the object if required.
1422 */
1423 if (fDestroy)
1424 {
1425 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
1426 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
1427 if (pObj->pfnDestructor)
1428 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1429 RTMemFree(pObj);
1430 }
1431
1432 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1433 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1434}
1435
1436/**
1437 * Verifies that the current process can access the specified object.
1438 *
1439 * @returns The following IPRT status code:
1440 * @retval VINF_SUCCESS if access was granted.
1441 * @retval VERR_PERMISSION_DENIED if denied access.
1442 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1443 *
1444 * @param pvObj The identifier returned by SUPR0ObjRegister().
1445 * @param pSession The session which wishes to access the object.
1446 * @param pszObjName Object string name. This is optional and depends on the object type.
1447 *
1448 * @remark The caller is responsible for making sure the object isn't removed while
1449 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1450 */
1451SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1452{
1453 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1454 int rc;
1455
1456 /*
1457 * Validate the input.
1458 */
1459 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1460 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1461 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1462 VERR_INVALID_PARAMETER);
1463
1464 /*
1465 * Check access. (returns true if a decision has been made.)
1466 */
1467 rc = VERR_INTERNAL_ERROR;
1468 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1469 return rc;
1470
1471 /*
1472 * Default policy is to allow the user to access his own
1473 * stuff but nothing else.
1474 */
1475 if (pObj->CreatorUid == pSession->Uid)
1476 return VINF_SUCCESS;
1477 return VERR_PERMISSION_DENIED;
1478}
1479
1480
1481/**
1482 * Lock pages.
1483 *
1484 * @returns IPRT status code.
1485 * @param pSession Session to which the locked memory should be associated.
1486 * @param pvR3 Start of the memory range to lock.
1487 * This must be page aligned.
1488 * @param cb Size of the memory range to lock.
1489 * This must be page aligned.
1490 */
1491SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1492{
1493 int rc;
1494 SUPDRVMEMREF Mem = {0};
1495 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1496 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1497
1498 /*
1499 * Verify input.
1500 */
1501 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1502 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1503 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1504 || !pvR3)
1505 {
1506 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1507 return VERR_INVALID_PARAMETER;
1508 }
1509
1510#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1511 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1512 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1513 if (RT_SUCCESS(rc))
1514 return rc;
1515#endif
1516
1517 /*
1518 * Let IPRT do the job.
1519 */
1520 Mem.eType = MEMREF_TYPE_LOCKED;
1521 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1522 if (RT_SUCCESS(rc))
1523 {
1524 uint32_t iPage = cPages;
1525 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1526 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1527
1528 while (iPage-- > 0)
1529 {
1530 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1531 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1532 {
1533 AssertMsgFailed(("iPage=%d\n", iPage));
1534 rc = VERR_INTERNAL_ERROR;
1535 break;
1536 }
1537 }
1538 if (RT_SUCCESS(rc))
1539 rc = supdrvMemAdd(&Mem, pSession);
1540 if (RT_FAILURE(rc))
1541 {
1542 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1543 AssertRC(rc2);
1544 }
1545 }
1546
1547 return rc;
1548}
1549
1550
1551/**
1552 * Unlocks the memory pointed to by pv.
1553 *
1554 * @returns IPRT status code.
1555 * @param pSession Session to which the memory was locked.
1556 * @param pvR3 Memory to unlock.
1557 */
1558SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1559{
1560 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1561 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1562#ifdef RT_OS_WINDOWS
1563 /*
1564 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1565 * allocations; ignore this call.
1566 */
1567 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1568 {
1569 LogFlow(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1570 return VINF_SUCCESS;
1571 }
1572#endif
1573 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1574}
1575
1576
1577/**
1578 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1579 * backing.
1580 *
1581 * @returns IPRT status code.
1582 * @param pSession Session data.
1583 * @param cb Number of bytes to allocate.
1584 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1585 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1586 * @param pHCPhys Where to put the physical address of allocated memory.
1587 */
1588SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1589{
1590 int rc;
1591 SUPDRVMEMREF Mem = {0};
1592 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1593
1594 /*
1595 * Validate input.
1596 */
1597 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1598 if (!ppvR3 || !ppvR0 || !pHCPhys)
1599 {
1600 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1601 pSession, ppvR0, ppvR3, pHCPhys));
1602 return VERR_INVALID_PARAMETER;
1603
1604 }
1605 if (cPages < 1 || cPages >= 256)
1606 {
1607 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1608 return VERR_INVALID_PARAMETER;
1609 }
1610
1611 /*
1612 * Let IPRT do the job.
1613 */
1614 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1615 if (RT_SUCCESS(rc))
1616 {
1617 int rc2;
1618 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1619 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1620 if (RT_SUCCESS(rc))
1621 {
1622 Mem.eType = MEMREF_TYPE_CONT;
1623 rc = supdrvMemAdd(&Mem, pSession);
1624 if (!rc)
1625 {
1626 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1627 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1628 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1629 return 0;
1630 }
1631
1632 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1633 AssertRC(rc2);
1634 }
1635 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1636 AssertRC(rc2);
1637 }
1638
1639 return rc;
1640}
1641
1642
1643/**
1644 * Frees memory allocated using SUPR0ContAlloc().
1645 *
1646 * @returns IPRT status code.
1647 * @param pSession The session to which the memory was allocated.
1648 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1649 */
1650SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1651{
1652 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1653 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1654 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1655}
1656
1657
1658/**
1659 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1660 *
1661 * The memory isn't zeroed.
1662 *
1663 * @returns IPRT status code.
1664 * @param pSession Session data.
1665 * @param cPages Number of pages to allocate.
1666 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1667 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1668 * @param paPages Where to put the physical addresses of allocated memory.
1669 */
1670SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1671{
1672 unsigned iPage;
1673 int rc;
1674 SUPDRVMEMREF Mem = {0};
1675 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1676
1677 /*
1678 * Validate input.
1679 */
1680 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1681 if (!ppvR3 || !ppvR0 || !paPages)
1682 {
1683 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1684 pSession, ppvR3, ppvR0, paPages));
1685 return VERR_INVALID_PARAMETER;
1686
1687 }
1688 if (cPages < 1 || cPages > 256)
1689 {
1690 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1691 return VERR_INVALID_PARAMETER;
1692 }
1693
1694 /*
1695 * Let IPRT do the work.
1696 */
1697 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1698 if (RT_SUCCESS(rc))
1699 {
1700 int rc2;
1701 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1702 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1703 if (RT_SUCCESS(rc))
1704 {
1705 Mem.eType = MEMREF_TYPE_LOW;
1706 rc = supdrvMemAdd(&Mem, pSession);
1707 if (!rc)
1708 {
1709 for (iPage = 0; iPage < cPages; iPage++)
1710 {
1711 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1712 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1713 }
1714 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1715 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1716 return 0;
1717 }
1718
1719 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1720 AssertRC(rc2);
1721 }
1722
1723 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1724 AssertRC(rc2);
1725 }
1726
1727 return rc;
1728}
1729
1730
1731/**
1732 * Frees memory allocated using SUPR0LowAlloc().
1733 *
1734 * @returns IPRT status code.
1735 * @param pSession The session to which the memory was allocated.
1736 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1737 */
1738SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1739{
1740 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1741 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1742 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1743}
1744
1745
1746
1747/**
1748 * Allocates a chunk of memory with both R0 and R3 mappings.
1749 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1750 *
1751 * @returns IPRT status code.
1752 * @param pSession The session to associated the allocation with.
1753 * @param cb Number of bytes to allocate.
1754 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1755 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1756 */
1757SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1758{
1759 int rc;
1760 SUPDRVMEMREF Mem = {0};
1761 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1762
1763 /*
1764 * Validate input.
1765 */
1766 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1767 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1768 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1769 if (cb < 1 || cb >= _4M)
1770 {
1771 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1772 return VERR_INVALID_PARAMETER;
1773 }
1774
1775 /*
1776 * Let IPRT do the work.
1777 */
1778 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1779 if (RT_SUCCESS(rc))
1780 {
1781 int rc2;
1782 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1783 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1784 if (RT_SUCCESS(rc))
1785 {
1786 Mem.eType = MEMREF_TYPE_MEM;
1787 rc = supdrvMemAdd(&Mem, pSession);
1788 if (!rc)
1789 {
1790 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1791 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1792 return VINF_SUCCESS;
1793 }
1794 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1795 AssertRC(rc2);
1796 }
1797
1798 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1799 AssertRC(rc2);
1800 }
1801
1802 return rc;
1803}
1804
1805
1806/**
1807 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1808 *
1809 * @returns IPRT status code.
1810 * @param pSession The session to which the memory was allocated.
1811 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1812 * @param paPages Where to store the physical addresses.
1813 */
1814SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1815{
1816 PSUPDRVBUNDLE pBundle;
1817 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1818 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1819
1820 /*
1821 * Validate input.
1822 */
1823 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1824 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1825 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1826
1827 /*
1828 * Search for the address.
1829 */
1830 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1831 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1832 {
1833 if (pBundle->cUsed > 0)
1834 {
1835 unsigned i;
1836 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1837 {
1838 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1839 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1840 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1841 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1842 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1843 )
1844 )
1845 {
1846 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1847 unsigned iPage;
1848 for (iPage = 0; iPage < cPages; iPage++)
1849 {
1850 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1851 paPages[iPage].uReserved = 0;
1852 }
1853 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1854 return VINF_SUCCESS;
1855 }
1856 }
1857 }
1858 }
1859 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1860 Log(("Failed to find %p!!!\n", (void *)uPtr));
1861 return VERR_INVALID_PARAMETER;
1862}
1863
1864
1865/**
1866 * Free memory allocated by SUPR0MemAlloc().
1867 *
1868 * @returns IPRT status code.
1869 * @param pSession The session owning the allocation.
1870 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1871 */
1872SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1873{
1874 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1875 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1876 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1877}
1878
1879
1880/**
1881 * Allocates a chunk of memory with only a R3 mappings.
1882 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1883 *
1884 * @returns IPRT status code.
1885 * @param pSession The session to associated the allocation with.
1886 * @param cPages The number of pages to allocate.
1887 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1888 * @param paPages Where to store the addresses of the pages. Optional.
1889 */
1890SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1891{
1892 int rc;
1893 SUPDRVMEMREF Mem = {0};
1894 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1895
1896 /*
1897 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
1898 */
1899 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1900 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1901 if (cPages < 1 || cPages > (128 * _1M)/PAGE_SIZE)
1902 {
1903 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 128MB.\n", cPages));
1904 return VERR_INVALID_PARAMETER;
1905 }
1906
1907 /*
1908 * Let IPRT do the work.
1909 */
1910 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1911 if (RT_SUCCESS(rc))
1912 {
1913 int rc2;
1914 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1915 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1916 if (RT_SUCCESS(rc))
1917 {
1918 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
1919 rc = supdrvMemAdd(&Mem, pSession);
1920 if (!rc)
1921 {
1922 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1923 if (paPages)
1924 {
1925 uint32_t iPage = cPages;
1926 while (iPage-- > 0)
1927 {
1928 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
1929 Assert(paPages[iPage] != NIL_RTHCPHYS);
1930 }
1931 }
1932 return VINF_SUCCESS;
1933 }
1934 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1935 AssertRC(rc2);
1936 }
1937
1938 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1939 AssertRC(rc2);
1940 }
1941 return rc;
1942}
1943
1944
1945#ifdef RT_OS_WINDOWS
1946/**
1947 * Check if the pages were locked by SUPR0PageAlloc
1948 *
1949 * This function will be removed along with the lock/unlock hacks when
1950 * we've cleaned up the ring-3 code properly.
1951 *
1952 * @returns boolean
1953 * @param pSession The session to which the memory was allocated.
1954 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1955 */
1956static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1957{
1958 PSUPDRVBUNDLE pBundle;
1959 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1960 LogFlow(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1961
1962 /*
1963 * Search for the address.
1964 */
1965 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1966 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1967 {
1968 if (pBundle->cUsed > 0)
1969 {
1970 unsigned i;
1971 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1972 {
1973 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1974 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1975 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1976 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1977 {
1978 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1979 return true;
1980 }
1981 }
1982 }
1983 }
1984 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1985 return false;
1986}
1987
1988
1989/**
1990 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
1991 *
1992 * This function will be removed along with the lock/unlock hacks when
1993 * we've cleaned up the ring-3 code properly.
1994 *
1995 * @returns IPRT status code.
1996 * @param pSession The session to which the memory was allocated.
1997 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1998 * @param cPages Number of pages in paPages
1999 * @param paPages Where to store the physical addresses.
2000 */
2001static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2002{
2003 PSUPDRVBUNDLE pBundle;
2004 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2005 LogFlow(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
2006
2007 /*
2008 * Search for the address.
2009 */
2010 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2011 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2012 {
2013 if (pBundle->cUsed > 0)
2014 {
2015 unsigned i;
2016 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2017 {
2018 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
2019 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2020 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2021 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
2022 {
2023 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
2024 cPages = RT_MIN(iPage, cPages);
2025 for (iPage = 0; iPage < cPages; iPage++)
2026 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
2027 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2028 return VINF_SUCCESS;
2029 }
2030 }
2031 }
2032 }
2033 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2034 return VERR_INVALID_PARAMETER;
2035}
2036#endif /* RT_OS_WINDOWS */
2037
2038
2039/**
2040 * Free memory allocated by SUPR0PageAlloc().
2041 *
2042 * @returns IPRT status code.
2043 * @param pSession The session owning the allocation.
2044 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2045 */
2046SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
2047{
2048 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
2049 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2050 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
2051}
2052
2053
2054/**
2055 * Maps the GIP into userspace and/or get the physical address of the GIP.
2056 *
2057 * @returns IPRT status code.
2058 * @param pSession Session to which the GIP mapping should belong.
2059 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
2060 * @param pHCPhysGip Where to store the physical address. (optional)
2061 *
2062 * @remark There is no reference counting on the mapping, so one call to this function
2063 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
2064 * and remove the session as a GIP user.
2065 */
2066SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
2067{
2068 int rc = 0;
2069 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2070 RTR3PTR pGip = NIL_RTR3PTR;
2071 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2072 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
2073
2074 /*
2075 * Validate
2076 */
2077 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2078 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
2079 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
2080
2081 RTSemFastMutexRequest(pDevExt->mtxGip);
2082 if (pDevExt->pGip)
2083 {
2084 /*
2085 * Map it?
2086 */
2087 if (ppGipR3)
2088 {
2089#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2090 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
2091 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
2092 RTMEM_PROT_READ, RTR0ProcHandleSelf());
2093 if (RT_SUCCESS(rc))
2094 {
2095 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
2096 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
2097 }
2098#else /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2099 if (!pSession->pGip)
2100 rc = supdrvOSGipMap(pSession->pDevExt, &pSession->pGip);
2101 if (!rc)
2102 pGip = (RTR3PTR)pSession->pGip;
2103#endif /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2104 }
2105
2106 /*
2107 * Get physical address.
2108 */
2109 if (pHCPhysGip && !rc)
2110 HCPhys = pDevExt->HCPhysGip;
2111
2112 /*
2113 * Reference globally.
2114 */
2115 if (!pSession->fGipReferenced && !rc)
2116 {
2117 pSession->fGipReferenced = 1;
2118 pDevExt->cGipUsers++;
2119 if (pDevExt->cGipUsers == 1)
2120 {
2121 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2122 unsigned i;
2123
2124 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
2125
2126 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2127 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2128 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2129
2130#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2131 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2132 AssertRC(rc); rc = VINF_SUCCESS;
2133#else
2134 supdrvOSGipResume(pDevExt);
2135#endif
2136 }
2137 }
2138 }
2139 else
2140 {
2141 rc = SUPDRV_ERR_GENERAL_FAILURE;
2142 Log(("SUPR0GipMap: GIP is not available!\n"));
2143 }
2144 RTSemFastMutexRelease(pDevExt->mtxGip);
2145
2146 /*
2147 * Write returns.
2148 */
2149 if (pHCPhysGip)
2150 *pHCPhysGip = HCPhys;
2151 if (ppGipR3)
2152 *ppGipR3 = pGip;
2153
2154#ifdef DEBUG_DARWIN_GIP
2155 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2156#else
2157 LogFlow(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2158#endif
2159 return rc;
2160}
2161
2162
2163/**
2164 * Unmaps any user mapping of the GIP and terminates all GIP access
2165 * from this session.
2166 *
2167 * @returns IPRT status code.
2168 * @param pSession Session to which the GIP mapping should belong.
2169 */
2170SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2171{
2172 int rc = VINF_SUCCESS;
2173 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2174#ifdef DEBUG_DARWIN_GIP
2175 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2176 pSession,
2177 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2178 pSession->GipMapObjR3));
2179#else
2180 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
2181#endif
2182 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2183
2184 RTSemFastMutexRequest(pDevExt->mtxGip);
2185
2186 /*
2187 * Unmap anything?
2188 */
2189#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2190 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2191 {
2192 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2193 AssertRC(rc);
2194 if (RT_SUCCESS(rc))
2195 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2196 }
2197#else
2198 if (pSession->pGip)
2199 {
2200 rc = supdrvOSGipUnmap(pDevExt, pSession->pGip);
2201 if (!rc)
2202 pSession->pGip = NULL;
2203 }
2204#endif
2205
2206 /*
2207 * Dereference global GIP.
2208 */
2209 if (pSession->fGipReferenced && !rc)
2210 {
2211 pSession->fGipReferenced = 0;
2212 if ( pDevExt->cGipUsers > 0
2213 && !--pDevExt->cGipUsers)
2214 {
2215 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
2216#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2217 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2218#else
2219 supdrvOSGipSuspend(pDevExt);
2220#endif
2221 }
2222 }
2223
2224 RTSemFastMutexRelease(pDevExt->mtxGip);
2225
2226 return rc;
2227}
2228
2229
2230/**
2231 * Adds a memory object to the session.
2232 *
2233 * @returns IPRT status code.
2234 * @param pMem Memory tracking structure containing the
2235 * information to track.
2236 * @param pSession The session.
2237 */
2238static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2239{
2240 PSUPDRVBUNDLE pBundle;
2241 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2242
2243 /*
2244 * Find free entry and record the allocation.
2245 */
2246 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2247 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2248 {
2249 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2250 {
2251 unsigned i;
2252 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2253 {
2254 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2255 {
2256 pBundle->cUsed++;
2257 pBundle->aMem[i] = *pMem;
2258 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2259 return VINF_SUCCESS;
2260 }
2261 }
2262 AssertFailed(); /* !!this can't be happening!!! */
2263 }
2264 }
2265 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2266
2267 /*
2268 * Need to allocate a new bundle.
2269 * Insert into the last entry in the bundle.
2270 */
2271 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2272 if (!pBundle)
2273 return VERR_NO_MEMORY;
2274
2275 /* take last entry. */
2276 pBundle->cUsed++;
2277 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2278
2279 /* insert into list. */
2280 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2281 pBundle->pNext = pSession->Bundle.pNext;
2282 pSession->Bundle.pNext = pBundle;
2283 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2284
2285 return VINF_SUCCESS;
2286}
2287
2288
2289/**
2290 * Releases a memory object referenced by pointer and type.
2291 *
2292 * @returns IPRT status code.
2293 * @param pSession Session data.
2294 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2295 * @param eType Memory type.
2296 */
2297static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2298{
2299 PSUPDRVBUNDLE pBundle;
2300 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2301
2302 /*
2303 * Validate input.
2304 */
2305 if (!uPtr)
2306 {
2307 Log(("Illegal address %p\n", (void *)uPtr));
2308 return VERR_INVALID_PARAMETER;
2309 }
2310
2311 /*
2312 * Search for the address.
2313 */
2314 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2315 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2316 {
2317 if (pBundle->cUsed > 0)
2318 {
2319 unsigned i;
2320 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2321 {
2322 if ( pBundle->aMem[i].eType == eType
2323 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2324 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2325 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2326 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2327 )
2328 {
2329 /* Make a copy of it and release it outside the spinlock. */
2330 SUPDRVMEMREF Mem = pBundle->aMem[i];
2331 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2332 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2333 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2334 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2335
2336 if (Mem.MapObjR3)
2337 {
2338 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2339 AssertRC(rc); /** @todo figure out how to handle this. */
2340 }
2341 if (Mem.MemObj)
2342 {
2343 int rc = RTR0MemObjFree(Mem.MemObj, false);
2344 AssertRC(rc); /** @todo figure out how to handle this. */
2345 }
2346 return VINF_SUCCESS;
2347 }
2348 }
2349 }
2350 }
2351 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2352 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2353 return VERR_INVALID_PARAMETER;
2354}
2355
2356
2357#ifdef VBOX_WITH_IDT_PATCHING
2358/**
2359 * Install IDT for the current CPU.
2360 *
2361 * @returns One of the following IPRT status codes:
2362 * @retval VINF_SUCCESS on success.
2363 * @retval VERR_IDT_FAILED.
2364 * @retval VERR_NO_MEMORY.
2365 * @param pDevExt The device extension.
2366 * @param pSession The session data.
2367 * @param pReq The request.
2368 */
2369static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2370{
2371 PSUPDRVPATCHUSAGE pUsagePre;
2372 PSUPDRVPATCH pPatchPre;
2373 RTIDTR Idtr;
2374 PSUPDRVPATCH pPatch;
2375 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2376 LogFlow(("supdrvIOCtl_IdtInstall\n"));
2377
2378 /*
2379 * Preallocate entry for this CPU cause we don't wanna do
2380 * that inside the spinlock!
2381 */
2382 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2383 if (!pUsagePre)
2384 return VERR_NO_MEMORY;
2385
2386 /*
2387 * Take the spinlock and see what we need to do.
2388 */
2389 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2390
2391 /* check if we already got a free patch. */
2392 if (!pDevExt->pIdtPatchesFree)
2393 {
2394 /*
2395 * Allocate a patch - outside the spinlock of course.
2396 */
2397 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2398
2399 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2400 if (!pPatchPre)
2401 return VERR_NO_MEMORY;
2402
2403 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2404 }
2405 else
2406 {
2407 pPatchPre = pDevExt->pIdtPatchesFree;
2408 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2409 }
2410
2411 /* look for matching patch entry */
2412 ASMGetIDTR(&Idtr);
2413 pPatch = pDevExt->pIdtPatches;
2414 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2415 pPatch = pPatch->pNext;
2416
2417 if (!pPatch)
2418 {
2419 /*
2420 * Create patch.
2421 */
2422 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2423 if (pPatch)
2424 pPatchPre = NULL; /* mark as used. */
2425 }
2426 else
2427 {
2428 /*
2429 * Simply increment patch usage.
2430 */
2431 pPatch->cUsage++;
2432 }
2433
2434 if (pPatch)
2435 {
2436 /*
2437 * Increment and add if need be the session usage record for this patch.
2438 */
2439 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2440 while (pUsage && pUsage->pPatch != pPatch)
2441 pUsage = pUsage->pNext;
2442
2443 if (!pUsage)
2444 {
2445 /*
2446 * Add usage record.
2447 */
2448 pUsagePre->cUsage = 1;
2449 pUsagePre->pPatch = pPatch;
2450 pUsagePre->pNext = pSession->pPatchUsage;
2451 pSession->pPatchUsage = pUsagePre;
2452 pUsagePre = NULL; /* mark as used. */
2453 }
2454 else
2455 {
2456 /*
2457 * Increment usage count.
2458 */
2459 pUsage->cUsage++;
2460 }
2461 }
2462
2463 /* free patch - we accumulate them for paranoid saftly reasons. */
2464 if (pPatchPre)
2465 {
2466 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2467 pDevExt->pIdtPatchesFree = pPatchPre;
2468 }
2469
2470 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2471
2472 /*
2473 * Free unused preallocated buffers.
2474 */
2475 if (pUsagePre)
2476 RTMemFree(pUsagePre);
2477
2478 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2479
2480 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2481}
2482
2483
2484/**
2485 * This creates a IDT patch entry.
2486 * If the first patch being installed it'll also determin the IDT entry
2487 * to use.
2488 *
2489 * @returns pPatch on success.
2490 * @returns NULL on failure.
2491 * @param pDevExt Pointer to globals.
2492 * @param pPatch Patch entry to use.
2493 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2494 * successful return.
2495 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2496 */
2497static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2498{
2499 RTIDTR Idtr;
2500 PSUPDRVIDTE paIdt;
2501 LogFlow(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2502
2503 /*
2504 * Get IDT.
2505 */
2506 ASMGetIDTR(&Idtr);
2507 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2508 /*
2509 * Recent Linux kernels can be configured to 1G user /3G kernel.
2510 */
2511 if ((uintptr_t)paIdt < 0x40000000)
2512 {
2513 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2514 return NULL;
2515 }
2516
2517 if (!pDevExt->u8Idt)
2518 {
2519 /*
2520 * Test out the alternatives.
2521 *
2522 * At the moment we do not support chaining thus we ASSUME that one of
2523 * these 48 entries is unused (which is not a problem on Win32 and
2524 * Linux to my knowledge).
2525 */
2526 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2527 * combined with gathering info about which guest system call gates we can hook up directly. */
2528 unsigned i;
2529 uint8_t u8Idt = 0;
2530 static uint8_t au8Ints[] =
2531 {
2532#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2533 * local apic timer, or some other frequently fireing thing). */
2534 0xef, 0xee, 0xed, 0xec,
2535#endif
2536 0xeb, 0xea, 0xe9, 0xe8,
2537 0xdf, 0xde, 0xdd, 0xdc,
2538 0x7b, 0x7a, 0x79, 0x78,
2539 0xbf, 0xbe, 0xbd, 0xbc,
2540 };
2541#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2542 static int s_iWobble = 0;
2543 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2544 Log2(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2545 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2546 {
2547 Log2(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2548 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2549 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2550 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2551 }
2552#endif
2553 /* look for entries which are not present or otherwise unused. */
2554 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2555 {
2556 u8Idt = au8Ints[i];
2557 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2558 && ( !paIdt[u8Idt].u1Present
2559 || paIdt[u8Idt].u5Type2 == 0))
2560 break;
2561 u8Idt = 0;
2562 }
2563 if (!u8Idt)
2564 {
2565 /* try again, look for a compatible entry .*/
2566 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2567 {
2568 u8Idt = au8Ints[i];
2569 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2570 && paIdt[u8Idt].u1Present
2571 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2572 && !(paIdt[u8Idt].u16SegSel & 3))
2573 break;
2574 u8Idt = 0;
2575 }
2576 if (!u8Idt)
2577 {
2578 Log(("Failed to find appropirate IDT entry!!\n"));
2579 return NULL;
2580 }
2581 }
2582 pDevExt->u8Idt = u8Idt;
2583 LogFlow(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2584 }
2585
2586 /*
2587 * Prepare the patch
2588 */
2589 memset(pPatch, 0, sizeof(*pPatch));
2590 pPatch->pvIdt = paIdt;
2591 pPatch->cUsage = 1;
2592 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2593 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2594 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2595 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2596#ifdef RT_ARCH_AMD64
2597 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2598#endif
2599 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2600#ifdef RT_ARCH_AMD64
2601 pPatch->ChangedIdt.u3IST = 0;
2602 pPatch->ChangedIdt.u5Reserved = 0;
2603#else /* x86 */
2604 pPatch->ChangedIdt.u5Reserved = 0;
2605 pPatch->ChangedIdt.u3Type1 = 0;
2606#endif /* x86 */
2607 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2608 pPatch->ChangedIdt.u2DPL = 3;
2609 pPatch->ChangedIdt.u1Present = 1;
2610
2611 /*
2612 * Generate the patch code.
2613 */
2614 {
2615#ifdef RT_ARCH_AMD64
2616 union
2617 {
2618 uint8_t *pb;
2619 uint32_t *pu32;
2620 uint64_t *pu64;
2621 } u, uFixJmp, uFixCall, uNotNested;
2622 u.pb = &pPatch->auCode[0];
2623
2624 /* check the cookie */
2625 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2626 *u.pu32++ = pDevExt->u32Cookie;
2627
2628 *u.pb++ = 0x74; // jz @VBoxCall
2629 *u.pb++ = 2;
2630
2631 /* jump to forwarder code. */
2632 *u.pb++ = 0xeb;
2633 uFixJmp = u;
2634 *u.pb++ = 0xfe;
2635
2636 // @VBoxCall:
2637 *u.pb++ = 0x0f; // swapgs
2638 *u.pb++ = 0x01;
2639 *u.pb++ = 0xf8;
2640
2641 /*
2642 * Call VMMR0Entry
2643 * We don't have to push the arguments here, but we have top
2644 * reserve some stack space for the interrupt forwarding.
2645 */
2646# ifdef RT_OS_WINDOWS
2647 *u.pb++ = 0x50; // push rax ; alignment filler.
2648 *u.pb++ = 0x41; // push r8 ; uArg
2649 *u.pb++ = 0x50;
2650 *u.pb++ = 0x52; // push rdx ; uOperation
2651 *u.pb++ = 0x51; // push rcx ; pVM
2652# else
2653 *u.pb++ = 0x51; // push rcx ; alignment filler.
2654 *u.pb++ = 0x52; // push rdx ; uArg
2655 *u.pb++ = 0x56; // push rsi ; uOperation
2656 *u.pb++ = 0x57; // push rdi ; pVM
2657# endif
2658
2659 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2660 *u.pb++ = 0x15;
2661 uFixCall = u;
2662 *u.pu32++ = 0;
2663
2664 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2665 *u.pb++ = 0x81;
2666 *u.pb++ = 0xc4;
2667 *u.pu32++ = 0x20;
2668
2669 *u.pb++ = 0x0f; // swapgs
2670 *u.pb++ = 0x01;
2671 *u.pb++ = 0xf8;
2672
2673 /* Return to R3. */
2674 uNotNested = u;
2675 *u.pb++ = 0x48; // iretq
2676 *u.pb++ = 0xcf;
2677
2678 while ((uintptr_t)u.pb & 0x7) // align 8
2679 *u.pb++ = 0xcc;
2680
2681 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2682 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2683 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2684 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2685
2686 /* stub entry. */ // StubVMMR0Entry:
2687 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2688 *u.pb++ = 0x33; // xor eax, eax
2689 *u.pb++ = 0xc0;
2690
2691 *u.pb++ = 0x48; // dec rax
2692 *u.pb++ = 0xff;
2693 *u.pb++ = 0xc8;
2694
2695 *u.pb++ = 0xc3; // ret
2696
2697 /* forward to the original handler using a retf. */
2698 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2699
2700 *u.pb++ = 0x68; // push <target cs>
2701 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2702
2703 *u.pb++ = 0x68; // push <low target rip>
2704 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2705 ? (uint32_t)(uintptr_t)uNotNested.pb
2706 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2707 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2708
2709 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2710 *u.pb++ = 0x44;
2711 *u.pb++ = 0x24;
2712 *u.pb++ = 0x04;
2713 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2714 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2715 : pPatch->SavedIdt.u32OffsetTop;
2716
2717 *u.pb++ = 0x48; // retf ; does this require prefix?
2718 *u.pb++ = 0xcb;
2719
2720#else /* RT_ARCH_X86 */
2721
2722 union
2723 {
2724 uint8_t *pb;
2725 uint16_t *pu16;
2726 uint32_t *pu32;
2727 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2728 u.pb = &pPatch->auCode[0];
2729
2730 /* check the cookie */
2731 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2732 *u.pb++ = 0xfe;
2733 *u.pu32++ = pDevExt->u32Cookie;
2734
2735 *u.pb++ = 0x74; // jz VBoxCall
2736 uFixJmp = u;
2737 *u.pb++ = 0;
2738
2739 /* jump (far) to the original handler / not-nested-stub. */
2740 *u.pb++ = 0xea; // jmp far NotNested
2741 uFixJmpNotNested = u;
2742 *u.pu32++ = 0;
2743 *u.pu16++ = 0;
2744
2745 /* save selector registers. */ // VBoxCall:
2746 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2747 *u.pb++ = 0x0f; // push fs
2748 *u.pb++ = 0xa0;
2749
2750 *u.pb++ = 0x1e; // push ds
2751
2752 *u.pb++ = 0x06; // push es
2753
2754 /* call frame */
2755 *u.pb++ = 0x51; // push ecx
2756
2757 *u.pb++ = 0x52; // push edx
2758
2759 *u.pb++ = 0x50; // push eax
2760
2761 /* load ds, es and perhaps fs before call. */
2762 *u.pb++ = 0xb8; // mov eax, KernelDS
2763 *u.pu32++ = ASMGetDS();
2764
2765 *u.pb++ = 0x8e; // mov ds, eax
2766 *u.pb++ = 0xd8;
2767
2768 *u.pb++ = 0x8e; // mov es, eax
2769 *u.pb++ = 0xc0;
2770
2771#ifdef RT_OS_WINDOWS
2772 *u.pb++ = 0xb8; // mov eax, KernelFS
2773 *u.pu32++ = ASMGetFS();
2774
2775 *u.pb++ = 0x8e; // mov fs, eax
2776 *u.pb++ = 0xe0;
2777#endif
2778
2779 /* do the call. */
2780 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2781 uFixCall = u;
2782 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2783 *u.pu32++ = 0xfffffffb;
2784
2785 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2786 *u.pb++ = 0xc4;
2787 *u.pb++ = 0x0c;
2788
2789 /* restore selector registers. */
2790 *u.pb++ = 0x07; // pop es
2791 //
2792 *u.pb++ = 0x1f; // pop ds
2793
2794 *u.pb++ = 0x0f; // pop fs
2795 *u.pb++ = 0xa1;
2796
2797 uNotNested = u; // NotNested:
2798 *u.pb++ = 0xcf; // iretd
2799
2800 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2801 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2802 *u.pb++ = 0x33; // xor eax, eax
2803 *u.pb++ = 0xc0;
2804
2805 *u.pb++ = 0x48; // dec eax
2806
2807 *u.pb++ = 0xc3; // ret
2808
2809 /* Fixup the VMMR0Entry call. */
2810 if (pDevExt->pvVMMR0)
2811 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2812 else
2813 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2814
2815 /* Fixup the forward / nested far jump. */
2816 if (!pPatch->SavedIdt.u5Type2)
2817 {
2818 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2819 *uFixJmpNotNested.pu16++ = ASMGetCS();
2820 }
2821 else
2822 {
2823 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2824 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2825 }
2826#endif /* RT_ARCH_X86 */
2827 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2828#if 0
2829 /* dump the patch code */
2830 Log2(("patch code: %p\n", &pPatch->auCode[0]));
2831 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2832 Log2(("0x%02x,\n", *uFixCall.pb));
2833#endif
2834 }
2835
2836 /*
2837 * Install the patch.
2838 */
2839 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2840 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2841
2842 /*
2843 * Link in the patch.
2844 */
2845 pPatch->pNext = pDevExt->pIdtPatches;
2846 pDevExt->pIdtPatches = pPatch;
2847
2848 return pPatch;
2849}
2850
2851
2852/**
2853 * Removes the sessions IDT references.
2854 * This will uninstall our IDT patch if we left unreferenced.
2855 *
2856 * @returns VINF_SUCCESS.
2857 * @param pDevExt Device globals.
2858 * @param pSession Session data.
2859 */
2860static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2861{
2862 PSUPDRVPATCHUSAGE pUsage;
2863 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2864 LogFlow(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2865
2866 /*
2867 * Take the spinlock.
2868 */
2869 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2870
2871 /*
2872 * Walk usage list, removing patches as their usage count reaches zero.
2873 */
2874 pUsage = pSession->pPatchUsage;
2875 while (pUsage)
2876 {
2877 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2878 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2879 else
2880 pUsage->pPatch->cUsage -= pUsage->cUsage;
2881
2882 /* next */
2883 pUsage = pUsage->pNext;
2884 }
2885
2886 /*
2887 * Empty the usage chain and we're done inside the spinlock.
2888 */
2889 pUsage = pSession->pPatchUsage;
2890 pSession->pPatchUsage = NULL;
2891
2892 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2893
2894 /*
2895 * Free usage entries.
2896 */
2897 while (pUsage)
2898 {
2899 void *pvToFree = pUsage;
2900 pUsage->cUsage = 0;
2901 pUsage->pPatch = NULL;
2902 pUsage = pUsage->pNext;
2903 RTMemFree(pvToFree);
2904 }
2905
2906 return VINF_SUCCESS;
2907}
2908
2909
2910/**
2911 * Remove one patch.
2912 *
2913 * Worker for supdrvIOCtl_IdtRemoveAll.
2914 *
2915 * @param pDevExt Device globals.
2916 * @param pPatch Patch entry to remove.
2917 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2918 */
2919static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2920{
2921 LogFlow(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2922
2923 pPatch->cUsage = 0;
2924
2925 /*
2926 * If the IDT entry was changed it have to kick around for ever!
2927 * This will be attempted freed again, perhaps next time we'll succeed :-)
2928 */
2929 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2930 {
2931 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2932 return;
2933 }
2934
2935 /*
2936 * Unlink it.
2937 */
2938 if (pDevExt->pIdtPatches != pPatch)
2939 {
2940 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
2941 while (pPatchPrev)
2942 {
2943 if (pPatchPrev->pNext == pPatch)
2944 {
2945 pPatchPrev->pNext = pPatch->pNext;
2946 break;
2947 }
2948 pPatchPrev = pPatchPrev->pNext;
2949 }
2950 Assert(!pPatchPrev);
2951 }
2952 else
2953 pDevExt->pIdtPatches = pPatch->pNext;
2954 pPatch->pNext = NULL;
2955
2956
2957 /*
2958 * Verify and restore the IDT.
2959 */
2960 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2961 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
2962 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2963
2964 /*
2965 * Put it in the free list.
2966 * (This free list stuff is to calm my paranoia.)
2967 */
2968 pPatch->pvIdt = NULL;
2969 pPatch->pIdtEntry = NULL;
2970
2971 pPatch->pNext = pDevExt->pIdtPatchesFree;
2972 pDevExt->pIdtPatchesFree = pPatch;
2973}
2974
2975
2976/**
2977 * Write to an IDT entry.
2978 *
2979 * @param pvIdtEntry Where to write.
2980 * @param pNewIDTEntry What to write.
2981 */
2982static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
2983{
2984 RTUINTREG uCR0;
2985 RTUINTREG uFlags;
2986
2987 /*
2988 * On SMP machines (P4 hyperthreading included) we must preform a
2989 * 64-bit locked write when updating the IDT entry.
2990 *
2991 * The F00F bugfix for linux (and probably other OSes) causes
2992 * the IDT to be pointing to an readonly mapping. We get around that
2993 * by temporarily turning of WP. Since we're inside a spinlock at this
2994 * point, interrupts are disabled and there isn't any way the WP bit
2995 * flipping can cause any trouble.
2996 */
2997
2998 /* Save & Clear interrupt flag; Save & clear WP. */
2999 uFlags = ASMGetFlags();
3000 ASMSetFlags(uFlags & ~(RTUINTREG)(1 << 9)); /*X86_EFL_IF*/
3001 Assert(!(ASMGetFlags() & (1 << 9)));
3002 uCR0 = ASMGetCR0();
3003 ASMSetCR0(uCR0 & ~(RTUINTREG)(1 << 16)); /*X86_CR0_WP*/
3004
3005 /* Update IDT Entry */
3006#ifdef RT_ARCH_AMD64
3007 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
3008#else
3009 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
3010#endif
3011
3012 /* Restore CR0 & Flags */
3013 ASMSetCR0(uCR0);
3014 ASMSetFlags(uFlags);
3015}
3016#endif /* VBOX_WITH_IDT_PATCHING */
3017
3018
3019/**
3020 * Opens an image. If it's the first time it's opened the call must upload
3021 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
3022 *
3023 * This is the 1st step of the loading.
3024 *
3025 * @returns IPRT status code.
3026 * @param pDevExt Device globals.
3027 * @param pSession Session data.
3028 * @param pReq The open request.
3029 */
3030static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
3031{
3032 PSUPDRVLDRIMAGE pImage;
3033 unsigned cb;
3034 void *pv;
3035 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
3036
3037 /*
3038 * Check if we got an instance of the image already.
3039 */
3040 RTSemFastMutexRequest(pDevExt->mtxLdr);
3041 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
3042 {
3043 if (!strcmp(pImage->szName, pReq->u.In.szName))
3044 {
3045 pImage->cUsage++;
3046 pReq->u.Out.pvImageBase = pImage->pvImage;
3047 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
3048 supdrvLdrAddUsage(pSession, pImage);
3049 RTSemFastMutexRelease(pDevExt->mtxLdr);
3050 return VINF_SUCCESS;
3051 }
3052 }
3053 /* (not found - add it!) */
3054
3055 /*
3056 * Allocate memory.
3057 */
3058 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
3059 pv = RTMemExecAlloc(cb);
3060 if (!pv)
3061 {
3062 RTSemFastMutexRelease(pDevExt->mtxLdr);
3063 Log(("supdrvIOCtl_LdrOpen: RTMemExecAlloc(%u) failed\n", cb));
3064 return VERR_NO_MEMORY;
3065 }
3066
3067 /*
3068 * Setup and link in the LDR stuff.
3069 */
3070 pImage = (PSUPDRVLDRIMAGE)pv;
3071 pImage->pvImage = RT_ALIGN_P(pImage + 1, 32);
3072 pImage->cbImage = pReq->u.In.cbImage;
3073 pImage->pfnModuleInit = NULL;
3074 pImage->pfnModuleTerm = NULL;
3075 pImage->uState = SUP_IOCTL_LDR_OPEN;
3076 pImage->cUsage = 1;
3077 strcpy(pImage->szName, pReq->u.In.szName);
3078
3079 pImage->pNext = pDevExt->pLdrImages;
3080 pDevExt->pLdrImages = pImage;
3081
3082 supdrvLdrAddUsage(pSession, pImage);
3083
3084 pReq->u.Out.pvImageBase = pImage->pvImage;
3085 pReq->u.Out.fNeedsLoading = true;
3086 RTSemFastMutexRelease(pDevExt->mtxLdr);
3087 return VINF_SUCCESS;
3088}
3089
3090
3091/**
3092 * Loads the image bits.
3093 *
3094 * This is the 2nd step of the loading.
3095 *
3096 * @returns IPRT status code.
3097 * @param pDevExt Device globals.
3098 * @param pSession Session data.
3099 * @param pReq The request.
3100 */
3101static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
3102{
3103 PSUPDRVLDRUSAGE pUsage;
3104 PSUPDRVLDRIMAGE pImage;
3105 int rc;
3106 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
3107
3108 /*
3109 * Find the ldr image.
3110 */
3111 RTSemFastMutexRequest(pDevExt->mtxLdr);
3112 pUsage = pSession->pLdrUsage;
3113 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3114 pUsage = pUsage->pNext;
3115 if (!pUsage)
3116 {
3117 RTSemFastMutexRelease(pDevExt->mtxLdr);
3118 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3119 return VERR_INVALID_HANDLE;
3120 }
3121 pImage = pUsage->pImage;
3122 if (pImage->cbImage != pReq->u.In.cbImage)
3123 {
3124 RTSemFastMutexRelease(pDevExt->mtxLdr);
3125 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3126 return VERR_INVALID_HANDLE;
3127 }
3128 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3129 {
3130 unsigned uState = pImage->uState;
3131 RTSemFastMutexRelease(pDevExt->mtxLdr);
3132 if (uState != SUP_IOCTL_LDR_LOAD)
3133 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3134 return SUPDRV_ERR_ALREADY_LOADED;
3135 }
3136 switch (pReq->u.In.eEPType)
3137 {
3138 case SUPLDRLOADEP_NOTHING:
3139 break;
3140 case SUPLDRLOADEP_VMMR0:
3141 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3142 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3143 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3144 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3145 {
3146 RTSemFastMutexRelease(pDevExt->mtxLdr);
3147 Log(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3148 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3149 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3150 return VERR_INVALID_PARAMETER;
3151 }
3152 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3153 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3154 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3155 {
3156 RTSemFastMutexRelease(pDevExt->mtxLdr);
3157 Log(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3158 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3159 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3160 return VERR_INVALID_PARAMETER;
3161 }
3162 break;
3163 default:
3164 RTSemFastMutexRelease(pDevExt->mtxLdr);
3165 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3166 return VERR_INVALID_PARAMETER;
3167 }
3168 if ( pReq->u.In.pfnModuleInit
3169 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3170 {
3171 RTSemFastMutexRelease(pDevExt->mtxLdr);
3172 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3173 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3174 return VERR_INVALID_PARAMETER;
3175 }
3176 if ( pReq->u.In.pfnModuleTerm
3177 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3178 {
3179 RTSemFastMutexRelease(pDevExt->mtxLdr);
3180 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3181 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3182 return VERR_INVALID_PARAMETER;
3183 }
3184
3185 /*
3186 * Copy the memory.
3187 */
3188 /* no need to do try/except as this is a buffered request. */
3189 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3190 pImage->uState = SUP_IOCTL_LDR_LOAD;
3191 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3192 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3193 pImage->offSymbols = pReq->u.In.offSymbols;
3194 pImage->cSymbols = pReq->u.In.cSymbols;
3195 pImage->offStrTab = pReq->u.In.offStrTab;
3196 pImage->cbStrTab = pReq->u.In.cbStrTab;
3197
3198 /*
3199 * Update any entry points.
3200 */
3201 switch (pReq->u.In.eEPType)
3202 {
3203 default:
3204 case SUPLDRLOADEP_NOTHING:
3205 rc = VINF_SUCCESS;
3206 break;
3207 case SUPLDRLOADEP_VMMR0:
3208 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3209 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3210 break;
3211 }
3212
3213 /*
3214 * On success call the module initialization.
3215 */
3216 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3217 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3218 {
3219 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3220 rc = pImage->pfnModuleInit();
3221 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3222 supdrvLdrUnsetR0EP(pDevExt);
3223 }
3224
3225 if (rc)
3226 pImage->uState = SUP_IOCTL_LDR_OPEN;
3227
3228 RTSemFastMutexRelease(pDevExt->mtxLdr);
3229 return rc;
3230}
3231
3232
3233/**
3234 * Frees a previously loaded (prep'ed) image.
3235 *
3236 * @returns IPRT status code.
3237 * @param pDevExt Device globals.
3238 * @param pSession Session data.
3239 * @param pReq The request.
3240 */
3241static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3242{
3243 int rc;
3244 PSUPDRVLDRUSAGE pUsagePrev;
3245 PSUPDRVLDRUSAGE pUsage;
3246 PSUPDRVLDRIMAGE pImage;
3247 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3248
3249 /*
3250 * Find the ldr image.
3251 */
3252 RTSemFastMutexRequest(pDevExt->mtxLdr);
3253 pUsagePrev = NULL;
3254 pUsage = pSession->pLdrUsage;
3255 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3256 {
3257 pUsagePrev = pUsage;
3258 pUsage = pUsage->pNext;
3259 }
3260 if (!pUsage)
3261 {
3262 RTSemFastMutexRelease(pDevExt->mtxLdr);
3263 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3264 return VERR_INVALID_HANDLE;
3265 }
3266
3267 /*
3268 * Check if we can remove anything.
3269 */
3270 rc = VINF_SUCCESS;
3271 pImage = pUsage->pImage;
3272 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3273 {
3274 /*
3275 * Check if there are any objects with destructors in the image, if
3276 * so leave it for the session cleanup routine so we get a chance to
3277 * clean things up in the right order and not leave them all dangling.
3278 */
3279 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3280 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3281 if (pImage->cUsage <= 1)
3282 {
3283 PSUPDRVOBJ pObj;
3284 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3285 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3286 {
3287 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3288 break;
3289 }
3290 }
3291 else
3292 {
3293 PSUPDRVUSAGE pGenUsage;
3294 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
3295 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3296 {
3297 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3298 break;
3299 }
3300 }
3301 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3302 if (rc == VINF_SUCCESS)
3303 {
3304 /* unlink it */
3305 if (pUsagePrev)
3306 pUsagePrev->pNext = pUsage->pNext;
3307 else
3308 pSession->pLdrUsage = pUsage->pNext;
3309
3310 /* free it */
3311 pUsage->pImage = NULL;
3312 pUsage->pNext = NULL;
3313 RTMemFree(pUsage);
3314
3315 /*
3316 * Derefrence the image.
3317 */
3318 if (pImage->cUsage <= 1)
3319 supdrvLdrFree(pDevExt, pImage);
3320 else
3321 pImage->cUsage--;
3322 }
3323 else
3324 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
3325 }
3326 else
3327 {
3328 /*
3329 * Dereference both image and usage.
3330 */
3331 pImage->cUsage--;
3332 pUsage->cUsage--;
3333 }
3334
3335 RTSemFastMutexRelease(pDevExt->mtxLdr);
3336 return VINF_SUCCESS;
3337}
3338
3339
3340/**
3341 * Gets the address of a symbol in an open image.
3342 *
3343 * @returns 0 on success.
3344 * @returns SUPDRV_ERR_* on failure.
3345 * @param pDevExt Device globals.
3346 * @param pSession Session data.
3347 * @param pReq The request buffer.
3348 */
3349static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3350{
3351 PSUPDRVLDRIMAGE pImage;
3352 PSUPDRVLDRUSAGE pUsage;
3353 uint32_t i;
3354 PSUPLDRSYM paSyms;
3355 const char *pchStrings;
3356 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3357 void *pvSymbol = NULL;
3358 int rc = VERR_GENERAL_FAILURE;
3359 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3360
3361 /*
3362 * Find the ldr image.
3363 */
3364 RTSemFastMutexRequest(pDevExt->mtxLdr);
3365 pUsage = pSession->pLdrUsage;
3366 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3367 pUsage = pUsage->pNext;
3368 if (!pUsage)
3369 {
3370 RTSemFastMutexRelease(pDevExt->mtxLdr);
3371 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3372 return VERR_INVALID_HANDLE;
3373 }
3374 pImage = pUsage->pImage;
3375 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3376 {
3377 unsigned uState = pImage->uState;
3378 RTSemFastMutexRelease(pDevExt->mtxLdr);
3379 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3380 return VERR_ALREADY_LOADED;
3381 }
3382
3383 /*
3384 * Search the symbol string.
3385 */
3386 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3387 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3388 for (i = 0; i < pImage->cSymbols; i++)
3389 {
3390 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3391 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3392 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3393 {
3394 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3395 rc = VINF_SUCCESS;
3396 break;
3397 }
3398 }
3399 RTSemFastMutexRelease(pDevExt->mtxLdr);
3400 pReq->u.Out.pvSymbol = pvSymbol;
3401 return rc;
3402}
3403
3404
3405/**
3406 * Updates the IDT patches to point to the specified VMM R0 entry
3407 * point (i.e. VMMR0Enter()).
3408 *
3409 * @returns IPRT status code.
3410 * @param pDevExt Device globals.
3411 * @param pSession Session data.
3412 * @param pVMMR0 VMMR0 image handle.
3413 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3414 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3415 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3416 * @remark Caller must own the loader mutex.
3417 */
3418static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3419{
3420 int rc = VINF_SUCCESS;
3421 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3422
3423
3424 /*
3425 * Check if not yet set.
3426 */
3427 if (!pDevExt->pvVMMR0)
3428 {
3429#ifdef VBOX_WITH_IDT_PATCHING
3430 PSUPDRVPATCH pPatch;
3431#endif
3432
3433 /*
3434 * Set it and update IDT patch code.
3435 */
3436 pDevExt->pvVMMR0 = pvVMMR0;
3437 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3438 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3439 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3440#ifdef VBOX_WITH_IDT_PATCHING
3441 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3442 {
3443# ifdef RT_ARCH_AMD64
3444 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3445# else /* RT_ARCH_X86 */
3446 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3447 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3448# endif
3449 }
3450#endif /* VBOX_WITH_IDT_PATCHING */
3451 }
3452 else
3453 {
3454 /*
3455 * Return failure or success depending on whether the values match or not.
3456 */
3457 if ( pDevExt->pvVMMR0 != pvVMMR0
3458 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3459 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3460 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3461 {
3462 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3463 rc = VERR_INVALID_PARAMETER;
3464 }
3465 }
3466 return rc;
3467}
3468
3469
3470/**
3471 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3472 *
3473 * @param pDevExt Device globals.
3474 */
3475static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3476{
3477#ifdef VBOX_WITH_IDT_PATCHING
3478 PSUPDRVPATCH pPatch;
3479#endif
3480
3481 pDevExt->pvVMMR0 = NULL;
3482 pDevExt->pfnVMMR0EntryInt = NULL;
3483 pDevExt->pfnVMMR0EntryFast = NULL;
3484 pDevExt->pfnVMMR0EntryEx = NULL;
3485
3486#ifdef VBOX_WITH_IDT_PATCHING
3487 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3488 {
3489# ifdef RT_ARCH_AMD64
3490 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3491 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3492# else /* RT_ARCH_X86 */
3493 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3494 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3495# endif
3496 }
3497#endif /* VBOX_WITH_IDT_PATCHING */
3498}
3499
3500
3501/**
3502 * Adds a usage reference in the specified session of an image.
3503 *
3504 * @param pSession Session in question.
3505 * @param pImage Image which the session is using.
3506 */
3507static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3508{
3509 PSUPDRVLDRUSAGE pUsage;
3510 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3511
3512 /*
3513 * Referenced it already?
3514 */
3515 pUsage = pSession->pLdrUsage;
3516 while (pUsage)
3517 {
3518 if (pUsage->pImage == pImage)
3519 {
3520 pUsage->cUsage++;
3521 return;
3522 }
3523 pUsage = pUsage->pNext;
3524 }
3525
3526 /*
3527 * Allocate new usage record.
3528 */
3529 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3530 Assert(pUsage);
3531 if (pUsage)
3532 {
3533 pUsage->cUsage = 1;
3534 pUsage->pImage = pImage;
3535 pUsage->pNext = pSession->pLdrUsage;
3536 pSession->pLdrUsage = pUsage;
3537 }
3538 /* ignore errors... */
3539}
3540
3541
3542/**
3543 * Frees a load image.
3544 *
3545 * @param pDevExt Pointer to device extension.
3546 * @param pImage Pointer to the image we're gonna free.
3547 * This image must exit!
3548 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3549 */
3550static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3551{
3552 PSUPDRVLDRIMAGE pImagePrev;
3553 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
3554
3555 /* find it - arg. should've used doubly linked list. */
3556 Assert(pDevExt->pLdrImages);
3557 pImagePrev = NULL;
3558 if (pDevExt->pLdrImages != pImage)
3559 {
3560 pImagePrev = pDevExt->pLdrImages;
3561 while (pImagePrev->pNext != pImage)
3562 pImagePrev = pImagePrev->pNext;
3563 Assert(pImagePrev->pNext == pImage);
3564 }
3565
3566 /* unlink */
3567 if (pImagePrev)
3568 pImagePrev->pNext = pImage->pNext;
3569 else
3570 pDevExt->pLdrImages = pImage->pNext;
3571
3572 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3573 if (pDevExt->pvVMMR0 == pImage->pvImage)
3574 supdrvLdrUnsetR0EP(pDevExt);
3575
3576 /* check for objects with destructors in this image. (Shouldn't happen.) */
3577 if (pDevExt->pObjs)
3578 {
3579 unsigned cObjs = 0;
3580 PSUPDRVOBJ pObj;
3581 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3582 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3583 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3584 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3585 {
3586 pObj->pfnDestructor = NULL;
3587 cObjs++;
3588 }
3589 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3590 if (cObjs)
3591 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
3592 }
3593
3594 /* call termination function if fully loaded. */
3595 if ( pImage->pfnModuleTerm
3596 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3597 {
3598 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3599 pImage->pfnModuleTerm();
3600 }
3601
3602 /* free the image */
3603 pImage->cUsage = 0;
3604 pImage->pNext = 0;
3605 pImage->uState = SUP_IOCTL_LDR_FREE;
3606 RTMemExecFree(pImage);
3607}
3608
3609
3610/**
3611 * Gets the current paging mode of the CPU and stores in in pOut.
3612 */
3613static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3614{
3615 SUPPAGINGMODE enmMode;
3616
3617 RTUINTREG cr0 = ASMGetCR0();
3618 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3619 enmMode = SUPPAGINGMODE_INVALID;
3620 else
3621 {
3622 RTUINTREG cr4 = ASMGetCR4();
3623 uint32_t fNXEPlusLMA = 0;
3624 if (cr4 & X86_CR4_PAE)
3625 {
3626 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3627 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3628 {
3629 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3630 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3631 fNXEPlusLMA |= RT_BIT(0);
3632 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3633 fNXEPlusLMA |= RT_BIT(1);
3634 }
3635 }
3636
3637 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3638 {
3639 case 0:
3640 enmMode = SUPPAGINGMODE_32_BIT;
3641 break;
3642
3643 case X86_CR4_PGE:
3644 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3645 break;
3646
3647 case X86_CR4_PAE:
3648 enmMode = SUPPAGINGMODE_PAE;
3649 break;
3650
3651 case X86_CR4_PAE | RT_BIT(0):
3652 enmMode = SUPPAGINGMODE_PAE_NX;
3653 break;
3654
3655 case X86_CR4_PAE | X86_CR4_PGE:
3656 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3657 break;
3658
3659 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3660 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3661 break;
3662
3663 case RT_BIT(1) | X86_CR4_PAE:
3664 enmMode = SUPPAGINGMODE_AMD64;
3665 break;
3666
3667 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3668 enmMode = SUPPAGINGMODE_AMD64_NX;
3669 break;
3670
3671 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3672 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3673 break;
3674
3675 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3676 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3677 break;
3678
3679 default:
3680 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3681 enmMode = SUPPAGINGMODE_INVALID;
3682 break;
3683 }
3684 }
3685 return enmMode;
3686}
3687
3688
3689#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
3690/**
3691 * Creates the GIP.
3692 *
3693 * @returns negative errno.
3694 * @param pDevExt Instance data. GIP stuff may be updated.
3695 */
3696static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3697{
3698 PSUPGLOBALINFOPAGE pGip;
3699 RTHCPHYS HCPhysGip;
3700 uint32_t u32SystemResolution;
3701 uint32_t u32Interval;
3702 int rc;
3703
3704 LogFlow(("supdrvGipCreate:\n"));
3705
3706 /* assert order */
3707 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3708 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3709 Assert(!pDevExt->pGipTimer);
3710
3711 /*
3712 * Allocate a suitable page with a default kernel mapping.
3713 */
3714 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3715 if (RT_FAILURE(rc))
3716 {
3717 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3718 return rc;
3719 }
3720 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3721 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3722
3723 /*
3724 * Try bump up the system timer resolution.
3725 * The more interrupts the better...
3726 */
3727 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3728 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3729 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3730 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3731 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3732 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3733 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3734 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3735 )
3736 {
3737 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3738 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3739 }
3740
3741 /*
3742 * Find a reasonable update interval, something close to 10ms would be nice,
3743 * and create a recurring timer.
3744 */
3745 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3746 while (u32Interval < 10000000 /* 10 ms */)
3747 u32Interval += u32SystemResolution;
3748
3749 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipTimer, pDevExt);
3750 if (RT_FAILURE(rc))
3751 {
3752 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %RU32 ns interval. rc=%d\n", u32Interval, rc));
3753 Assert(!pDevExt->pGipTimer);
3754 supdrvGipDestroy(pDevExt);
3755 return rc;
3756 }
3757
3758 /*
3759 * We're good.
3760 */
3761 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3762 return VINF_SUCCESS;
3763}
3764
3765
3766/**
3767 * Terminates the GIP.
3768 *
3769 * @param pDevExt Instance data. GIP stuff may be updated.
3770 */
3771static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3772{
3773 int rc;
3774#ifdef DEBUG_DARWIN_GIP
3775 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3776 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3777 pDevExt->pGipTimer, pDevExt->GipMemObj));
3778#endif
3779
3780 /*
3781 * Invalid the GIP data.
3782 */
3783 if (pDevExt->pGip)
3784 {
3785 supdrvGipTerm(pDevExt->pGip);
3786 pDevExt->pGip = NULL;
3787 }
3788
3789 /*
3790 * Destroy the timer and free the GIP memory object.
3791 */
3792 if (pDevExt->pGipTimer)
3793 {
3794 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3795 pDevExt->pGipTimer = NULL;
3796 }
3797
3798 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3799 {
3800 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3801 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3802 }
3803
3804 /*
3805 * Finally, release the system timer resolution request if one succeeded.
3806 */
3807 if (pDevExt->u32SystemTimerGranularityGrant)
3808 {
3809 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3810 pDevExt->u32SystemTimerGranularityGrant = 0;
3811 }
3812}
3813
3814
3815/**
3816 * Timer callback function.
3817 * @param pTimer The timer.
3818 * @param pvUser The device extension.
3819 */
3820static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser)
3821{
3822 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3823 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3824}
3825#endif /* USE_NEW_OS_INTERFACE_FOR_GIP */
3826
3827
3828/**
3829 * Initializes the GIP data.
3830 *
3831 * @returns IPRT status code.
3832 * @param pDevExt Pointer to the device instance data.
3833 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3834 * @param HCPhys The physical address of the GIP.
3835 * @param u64NanoTS The current nanosecond timestamp.
3836 * @param uUpdateHz The update freqence.
3837 */
3838int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3839{
3840 unsigned i;
3841#ifdef DEBUG_DARWIN_GIP
3842 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3843#else
3844 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3845#endif
3846
3847 /*
3848 * Initialize the structure.
3849 */
3850 memset(pGip, 0, PAGE_SIZE);
3851 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
3852 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
3853 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
3854 pGip->u32UpdateHz = uUpdateHz;
3855 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
3856 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
3857
3858 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3859 {
3860 pGip->aCPUs[i].u32TransactionId = 2;
3861 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
3862 pGip->aCPUs[i].u64TSC = ASMReadTSC();
3863
3864 /*
3865 * We don't know the following values until we've executed updates.
3866 * So, we'll just insert very high values.
3867 */
3868 pGip->aCPUs[i].u64CpuHz = _4G + 1;
3869 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
3870 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
3871 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
3872 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
3873 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
3874 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
3875 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
3876 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
3877 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
3878 }
3879
3880 /*
3881 * Link it to the device extension.
3882 */
3883 pDevExt->pGip = pGip;
3884 pDevExt->HCPhysGip = HCPhys;
3885 pDevExt->cGipUsers = 0;
3886
3887 return VINF_SUCCESS;
3888}
3889
3890
3891/**
3892 * Determin the GIP TSC mode.
3893 *
3894 * @returns The most suitable TSC mode.
3895 * @param pDevExt Pointer to the device instance data.
3896 */
3897static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
3898{
3899#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
3900 /*
3901 * The problem here is that AMD processors with power management features
3902 * may easily end up with different TSCs because the CPUs or even cores
3903 * on the same physical chip run at different frequencies to save power.
3904 *
3905 * It is rumoured that this will be corrected with Barcelona and it's
3906 * expected that this will be indicated by the TscInvariant bit in
3907 * cpuid(0x80000007). So, the "difficult" bit here is to correctly
3908 * identify the older CPUs which don't do different frequency and
3909 * can be relied upon to have somewhat uniform TSC between the cpus.
3910 */
3911 if (supdrvOSGetCPUCount() > 1)
3912 {
3913 uint32_t uEAX, uEBX, uECX, uEDX;
3914
3915 /* Permit user users override. */
3916 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
3917 return SUPGIPMODE_ASYNC_TSC;
3918
3919 /* Check for "AuthenticAMD" */
3920 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
3921 if ( uEAX >= 1
3922 && uEBX == X86_CPUID_VENDOR_AMD_EBX
3923 && uECX == X86_CPUID_VENDOR_AMD_ECX
3924 && uEDX == X86_CPUID_VENDOR_AMD_EDX)
3925 {
3926 /* Check for APM support and that TscInvariant is cleared. */
3927 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
3928 if (uEAX >= 0x80000007)
3929 {
3930 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
3931 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
3932 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
3933 return SUPGIPMODE_ASYNC_TSC;
3934 }
3935 }
3936 }
3937#endif
3938 return SUPGIPMODE_SYNC_TSC;
3939}
3940
3941
3942/**
3943 * Invalidates the GIP data upon termination.
3944 *
3945 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3946 */
3947void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
3948{
3949 unsigned i;
3950 pGip->u32Magic = 0;
3951 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3952 {
3953 pGip->aCPUs[i].u64NanoTS = 0;
3954 pGip->aCPUs[i].u64TSC = 0;
3955 pGip->aCPUs[i].iTSCHistoryHead = 0;
3956 }
3957}
3958
3959
3960/**
3961 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
3962 * updates all the per cpu data except the transaction id.
3963 *
3964 * @param pGip The GIP.
3965 * @param pGipCpu Pointer to the per cpu data.
3966 * @param u64NanoTS The current time stamp.
3967 */
3968static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3969{
3970 uint64_t u64TSC;
3971 uint64_t u64TSCDelta;
3972 uint32_t u32UpdateIntervalTSC;
3973 uint32_t u32UpdateIntervalTSCSlack;
3974 unsigned iTSCHistoryHead;
3975 uint64_t u64CpuHz;
3976
3977 /*
3978 * Update the NanoTS.
3979 */
3980 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
3981
3982 /*
3983 * Calc TSC delta.
3984 */
3985 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
3986 u64TSC = ASMReadTSC();
3987 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
3988 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
3989
3990 if (u64TSCDelta >> 32)
3991 {
3992 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
3993 pGipCpu->cErrors++;
3994 }
3995
3996 /*
3997 * TSC History.
3998 */
3999 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
4000
4001 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
4002 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
4003 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
4004
4005 /*
4006 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
4007 */
4008 if (pGip->u32UpdateHz >= 1000)
4009 {
4010 uint32_t u32;
4011 u32 = pGipCpu->au32TSCHistory[0];
4012 u32 += pGipCpu->au32TSCHistory[1];
4013 u32 += pGipCpu->au32TSCHistory[2];
4014 u32 += pGipCpu->au32TSCHistory[3];
4015 u32 >>= 2;
4016 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
4017 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
4018 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
4019 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
4020 u32UpdateIntervalTSC >>= 2;
4021 u32UpdateIntervalTSC += u32;
4022 u32UpdateIntervalTSC >>= 1;
4023
4024 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
4025 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
4026 }
4027 else if (pGip->u32UpdateHz >= 90)
4028 {
4029 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4030 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
4031 u32UpdateIntervalTSC >>= 1;
4032
4033 /* value choosen on a 2GHz thinkpad running windows */
4034 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
4035 }
4036 else
4037 {
4038 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4039
4040 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
4041 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
4042 }
4043 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
4044
4045 /*
4046 * CpuHz.
4047 */
4048 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
4049 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
4050}
4051
4052
4053/**
4054 * Updates the GIP.
4055 *
4056 * @param pGip Pointer to the GIP.
4057 * @param u64NanoTS The current nanosecond timesamp.
4058 */
4059void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
4060{
4061 /*
4062 * Determin the relevant CPU data.
4063 */
4064 PSUPGIPCPU pGipCpu;
4065 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
4066 pGipCpu = &pGip->aCPUs[0];
4067 else
4068 {
4069 unsigned iCpu = ASMGetApicId();
4070 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
4071 return;
4072 pGipCpu = &pGip->aCPUs[iCpu];
4073 }
4074
4075 /*
4076 * Start update transaction.
4077 */
4078 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4079 {
4080 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
4081 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4082 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4083 pGipCpu->cErrors++;
4084 return;
4085 }
4086
4087 /*
4088 * Recalc the update frequency every 0x800th time.
4089 */
4090 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
4091 {
4092 if (pGip->u64NanoTSLastUpdateHz)
4093 {
4094#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
4095 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
4096 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
4097 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
4098 {
4099 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
4100 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
4101 }
4102#endif
4103 }
4104 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
4105 }
4106
4107 /*
4108 * Update the data.
4109 */
4110 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4111
4112 /*
4113 * Complete transaction.
4114 */
4115 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4116}
4117
4118
4119/**
4120 * Updates the per cpu GIP data for the calling cpu.
4121 *
4122 * @param pGip Pointer to the GIP.
4123 * @param u64NanoTS The current nanosecond timesamp.
4124 * @param iCpu The CPU index.
4125 */
4126void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
4127{
4128 PSUPGIPCPU pGipCpu;
4129
4130 if (RT_LIKELY(iCpu < RT_ELEMENTS(pGip->aCPUs)))
4131 {
4132 pGipCpu = &pGip->aCPUs[iCpu];
4133
4134 /*
4135 * Start update transaction.
4136 */
4137 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4138 {
4139 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4140 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4141 pGipCpu->cErrors++;
4142 return;
4143 }
4144
4145 /*
4146 * Update the data.
4147 */
4148 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4149
4150 /*
4151 * Complete transaction.
4152 */
4153 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4154 }
4155}
4156
4157
4158/**
4159 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
4160 *
4161 * @param idCpu Ignored.
4162 * @param pvUser1 Where to put the TSC.
4163 * @param pvUser2 Ignored.
4164 */
4165static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4166{
4167 *(uint64_t *)pvUser1 = ASMReadTSC();
4168}
4169
4170
4171/**
4172 * Determine if Async GIP mode is required because of TSC drift.
4173 *
4174 * When using the default/normal timer code it is essential that the time stamp counter
4175 * (TSC) runs never backwards, that is, a read operation to the counter should return
4176 * a bigger value than any previous read operation. This is guaranteed by the latest
4177 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
4178 * case we have to choose the asynchronous timer mode.
4179 *
4180 * @param pu64Diff pointer to the determined difference between different cores.
4181 * @return false if the time stamp counters appear to be synchron, true otherwise.
4182 */
4183bool VBOXCALL supdrvDetermineAsyncTsc(uint64_t *pu64DiffCores)
4184{
4185 static uint64_t s_aTsc[8][RTCPUSET_MAX_CPUS];
4186 uint64_t u64Diff, u64DiffMin, u64DiffMax, u64TscLast;
4187 int iSlot, iCpu, cCpus;
4188 bool fBackwards;
4189 RTCPUSET OnlineCpus;
4190 int rc;
4191
4192 *pu64DiffCores = 1;
4193
4194 RTMpGetOnlineSet(&OnlineCpus);
4195 cCpus = RTCpuSetCount(&OnlineCpus);
4196 if (cCpus < 2)
4197 return false;
4198 Assert(cCpus <= RT_ELEMENTS(s_aTsc[0]));
4199
4200 /*
4201 * Collect data from the online CPUs.
4202 */
4203 for (iSlot = 0; iSlot < RT_ELEMENTS(s_aTsc); iSlot++)
4204 {
4205 RTCPUID iCpuSet = 0;
4206 for (iCpu = 0; iCpu < cCpus; iCpu++)
4207 {
4208 while (!RTCpuSetIsMember(&OnlineCpus, iCpuSet))
4209 iCpuSet++; /* skip offline CPU */
4210 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpuSet), supdrvDetermineAsyncTscWorker, &s_aTsc[iSlot][iCpu], NULL);
4211 if (rc == VERR_NOT_SUPPORTED)
4212 return false;
4213 iCpuSet++;
4214 }
4215 }
4216
4217 /*
4218 * Check that the TSC reads are strictly ascending.
4219 */
4220 fBackwards = false;
4221 u64DiffMin = (uint64_t)~0;
4222 u64TscLast = 0;
4223 for (iSlot = 0; iSlot < RT_ELEMENTS(s_aTsc); iSlot++)
4224 {
4225 uint64_t u64Tsc0 = s_aTsc[iSlot][0];
4226 u64DiffMax = 0;
4227 if (u64Tsc0 <= u64TscLast)
4228 fBackwards = true;
4229 u64TscLast = u64Tsc0;
4230 for (iCpu = 1; iCpu < cCpus; iCpu++)
4231 {
4232 uint64_t u64TscN = s_aTsc[iSlot][iCpu];
4233 if (u64TscN <= u64TscLast)
4234 fBackwards = true;
4235 u64TscLast = u64TscN;
4236
4237 u64Diff = u64TscN > u64Tsc0 ? u64TscN - u64Tsc0 : u64Tsc0 - u64TscN;
4238 if (u64DiffMax < u64Diff)
4239 u64DiffMax = u64Diff;
4240 }
4241 if (u64DiffMin > u64DiffMax)
4242 u64DiffMin = u64DiffMax;
4243 }
4244 /* informational */
4245 *pu64DiffCores = u64DiffMin;
4246
4247 return fBackwards;
4248}
4249
4250
4251#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
4252/**
4253 * Stub function for non-debug builds.
4254 */
4255RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
4256{
4257 return NULL;
4258}
4259
4260RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4261{
4262 return NULL;
4263}
4264
4265/**
4266 * Stub function for non-debug builds.
4267 */
4268RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4269{
4270 return 0;
4271}
4272
4273/**
4274 * Stub function for non-debug builds.
4275 */
4276RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4277{
4278}
4279
4280/**
4281 * Stub function for non-debug builds.
4282 */
4283RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4284{
4285}
4286
4287/**
4288 * Stub function for non-debug builds.
4289 */
4290RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4291{
4292}
4293
4294/**
4295 * Stub function for non-debug builds.
4296 */
4297RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4298{
4299}
4300
4301/**
4302 * Stub function for non-debug builds.
4303 */
4304RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4305{
4306}
4307#endif /* !DEBUG */
4308
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette