VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c@ 5999

Last change on this file since 5999 was 5999, checked in by vboxsync, 17 years ago

The Giant CDDL Dual-License Header Change.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Rev
File size: 36.4 KB
Line 
1/* $Revision: 5999 $ */
2/** @file
3 * innotek Portable Runtime - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32
33#include <iprt/memobj.h>
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/log.h>
37#include <iprt/string.h>
38#include <iprt/process.h>
39#include "internal/memobj.h"
40
41
42/*******************************************************************************
43* Structures and Typedefs *
44*******************************************************************************/
45/**
46 * The Darwin version of the memory object structure.
47 */
48typedef struct RTR0MEMOBJLNX
49{
50 /** The core structure. */
51 RTR0MEMOBJINTERNAL Core;
52 /** Set if the allocation is contiguous.
53 * This means it has to be given back as one chunk. */
54 bool fContiguous;
55 /** Set if we've vmap'ed thed memory into ring-0. */
56 bool fMappedToRing0;
57 /** The pages in the apPages array. */
58 size_t cPages;
59 /** Array of struct page pointers. (variable size) */
60 struct page *apPages[1];
61} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
62
63
64/**
65 * Helper that converts from a RTR0PROCESS handle to a linux task.
66 *
67 * @returns The corresponding Linux task.
68 * @param R0Process IPRT ring-0 process handle.
69 */
70struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
71{
72 /** @todo fix rtR0ProcessToLinuxTask!! */
73 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
74}
75
76
77/**
78 * Compute order. Some functions allocate 2^order pages.
79 *
80 * @returns order.
81 * @param cPages Number of pages.
82 */
83static int rtR0MemObjLinuxOrder(size_t cPages)
84{
85 int iOrder;
86 size_t cTmp;
87
88 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
89 ;
90 if (cPages & ~((size_t)1 << iOrder))
91 ++iOrder;
92
93 return iOrder;
94}
95
96
97/**
98 * Converts from RTMEM_PROT_* to Linux PAGE_*.
99 *
100 * @returns Linux page protection constant.
101 * @param fProt The IPRT protection mask.
102 * @param fKernel Whether it applies to kernel or user space.
103 */
104static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
105{
106 switch (fProt)
107 {
108 default:
109 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
110 case RTMEM_PROT_NONE:
111 return PAGE_NONE;
112
113 case RTMEM_PROT_READ:
114 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
115
116 case RTMEM_PROT_WRITE:
117 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
118 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
119
120 case RTMEM_PROT_EXEC:
121 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
122#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
123 if (fKernel)
124 {
125 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
126 pgprot_val(fPg) &= ~_PAGE_RW;
127 return fPg;
128 }
129 return PAGE_READONLY_EXEC;
130#else
131 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
132#endif
133
134 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
135 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
136 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
137 }
138}
139
140
141/**
142 * Internal worker that allocates physical pages and creates the memory object for them.
143 *
144 * @returns IPRT status code.
145 * @param ppMemLnx Where to store the memory object pointer.
146 * @param enmType The object type.
147 * @param cb The number of bytes to allocate.
148 * @param fFlagsLnx The page allocation flags (GPFs).
149 * @param fContiguous Whether the allocation must be contiguous.
150 */
151static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb, unsigned fFlagsLnx, bool fContiguous)
152{
153 size_t iPage;
154 size_t cPages = cb >> PAGE_SHIFT;
155 struct page *paPages;
156
157 /*
158 * Allocate a memory object structure that's large enough to contain
159 * the page pointer array.
160 */
161 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
162 if (!pMemLnx)
163 return VERR_NO_MEMORY;
164 pMemLnx->cPages = cPages;
165
166 /*
167 * Allocate the pages.
168 * For small allocations we'll try contiguous first and then fall back on page by page.
169 */
170#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
171 if ( fContiguous
172 || cb <= PAGE_SIZE * 2)
173 {
174 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cb >> PAGE_SHIFT));
175 if (paPages)
176 {
177 fContiguous = true;
178 for (iPage = 0; iPage < cPages; iPage++)
179 pMemLnx->apPages[iPage] = &paPages[iPage];
180 }
181 else if (fContiguous)
182 {
183 rtR0MemObjDelete(&pMemLnx->Core);
184 return VERR_NO_MEMORY;
185 }
186 }
187
188 if (!fContiguous)
189 {
190 for (iPage = 0; iPage < cPages; iPage++)
191 {
192 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
193 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
194 {
195 while (iPage-- > 0)
196 __free_page(pMemLnx->apPages[iPage]);
197 rtR0MemObjDelete(&pMemLnx->Core);
198 return VERR_NO_MEMORY;
199 }
200 }
201 }
202
203#else /* < 2.4.22 */
204 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
205 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cb >> PAGE_SHIFT));
206 if (!paPages)
207 {
208 rtR0MemObjDelete(&pMemLnx->Core);
209 return VERR_NO_MEMORY;
210 }
211 for (iPage = 0; iPage < cPages; iPage++)
212 {
213 pMemLnx->apPages[iPage] = &paPages[iPage];
214 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
215 MY_CHANGE_PAGE_ATTR(pMemLnx->apPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
216 if (PageHighMem(pMemLnx->apPages[iPage]))
217 BUG();
218 }
219
220 fContiguous = true;
221#endif /* < 2.4.22 */
222 pMemLnx->fContiguous = fContiguous;
223
224 /*
225 * Reserve the pages.
226 */
227 for (iPage = 0; iPage < cPages; iPage++)
228 SetPageReserved(pMemLnx->apPages[iPage]);
229
230 *ppMemLnx = pMemLnx;
231 return VINF_SUCCESS;
232}
233
234
235/**
236 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
237 *
238 * This method does NOT free the object.
239 *
240 * @param pMemLnx The object which physical pages should be freed.
241 */
242static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
243{
244 size_t iPage = pMemLnx->cPages;
245 if (iPage > 0)
246 {
247 /*
248 * Restore the page flags.
249 */
250 while (iPage-- > 0)
251 {
252 ClearPageReserved(pMemLnx->apPages[iPage]);
253#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
254#else
255 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
256 MY_CHANGE_PAGE_ATTR(pMemLnx->apPages[iPage], 1, PAGE_KERNEL);
257#endif
258 }
259
260 /*
261 * Free the pages.
262 */
263#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
264 if (!pMemLnx->fContiguous)
265 {
266 iPage = pMemLnx->cPages;
267 while (iPage-- > 0)
268 __free_page(pMemLnx->apPages[iPage]);
269 }
270 else
271#endif
272 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
273
274 pMemLnx->cPages = 0;
275 }
276}
277
278
279/**
280 * Maps the allocation into ring-0.
281 *
282 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
283 *
284 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
285 * space, so we'll use that mapping if possible. If execute access is required, we'll
286 * play safe and do our own mapping.
287 *
288 * @returns IPRT status code.
289 * @param pMemLnx The linux memory object to map.
290 * @param fExecutable Whether execute access is required.
291 */
292static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
293{
294 int rc = VINF_SUCCESS;
295
296 /*
297 * Choose mapping strategy.
298 */
299 bool fMustMap = fExecutable
300 || !pMemLnx->fContiguous;
301 if (!fMustMap)
302 {
303 size_t iPage = pMemLnx->cPages;
304 while (iPage-- > 0)
305 if (PageHighMem(pMemLnx->apPages[iPage]))
306 {
307 fMustMap = true;
308 break;
309 }
310 }
311
312 Assert(!pMemLnx->Core.pv);
313 Assert(!pMemLnx->fMappedToRing0);
314
315 if (fMustMap)
316 {
317 /*
318 * Use vmap - 2.4.22 and later.
319 */
320#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
321 pgprot_t fPg;
322 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
323# ifdef _PAGE_NX
324 if (!fExecutable)
325 pgprot_val(fPg) |= _PAGE_NX;
326# endif
327
328# ifdef VM_MAP
329 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
330# else
331 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
332# endif
333 if (pMemLnx->Core.pv)
334 pMemLnx->fMappedToRing0 = true;
335 else
336 rc = VERR_MAP_FAILED;
337#else /* < 2.4.22 */
338 rc = VERR_NOT_SUPPORTED;
339#endif
340 }
341 else
342 {
343 /*
344 * Use the kernel RAM mapping.
345 */
346 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
347 Assert(pMemLnx->Core.pv);
348 }
349
350 return rc;
351}
352
353
354/**
355 * Undos what rtR0MemObjLinuxVMap() did.
356 *
357 * @param pMemLnx The linux memory object.
358 */
359static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
360{
361#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
362 if (pMemLnx->fMappedToRing0)
363 {
364 Assert(pMemLnx->Core.pv);
365 vunmap(pMemLnx->Core.pv);
366 pMemLnx->fMappedToRing0 = false;
367 }
368#else /* < 2.4.22 */
369 Assert(!pMemLnx->fMappedToRing0);
370#endif
371 pMemLnx->Core.pv = NULL;
372}
373
374
375int rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
376{
377 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
378
379 /*
380 * Release any memory that we've allocated or locked.
381 */
382 switch (pMemLnx->Core.enmType)
383 {
384 case RTR0MEMOBJTYPE_LOW:
385 case RTR0MEMOBJTYPE_PAGE:
386 case RTR0MEMOBJTYPE_CONT:
387 case RTR0MEMOBJTYPE_PHYS:
388 rtR0MemObjLinuxVUnmap(pMemLnx);
389 rtR0MemObjLinuxFreePages(pMemLnx);
390 break;
391
392 case RTR0MEMOBJTYPE_LOCK:
393 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
394 {
395 size_t iPage;
396 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
397 Assert(pTask);
398 if (pTask && pTask->mm)
399 down_read(&pTask->mm->mmap_sem);
400
401 iPage = pMemLnx->cPages;
402 while (iPage-- > 0)
403 {
404 if (!PageReserved(pMemLnx->apPages[iPage]))
405 SetPageDirty(pMemLnx->apPages[iPage]);
406 page_cache_release(pMemLnx->apPages[iPage]);
407 }
408
409 if (pTask && pTask->mm)
410 up_read(&pTask->mm->mmap_sem);
411 }
412 else
413 AssertFailed(); /* not implemented for R0 */
414 break;
415
416 case RTR0MEMOBJTYPE_RES_VIRT:
417 Assert(pMemLnx->Core.pv);
418 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
419 {
420 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
421 Assert(pTask);
422 if (pTask && pTask->mm)
423 {
424 down_write(&pTask->mm->mmap_sem);
425 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
426 up_write(&pTask->mm->mmap_sem);
427 }
428 }
429 else
430 {
431 vunmap(pMemLnx->Core.pv);
432
433 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
434 __free_page(pMemLnx->apPages[0]);
435 pMemLnx->apPages[0] = NULL;
436 pMemLnx->cPages = 0;
437 }
438 pMemLnx->Core.pv = NULL;
439 break;
440
441 case RTR0MEMOBJTYPE_MAPPING:
442 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
443 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
444 {
445 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
446 Assert(pTask);
447 if (pTask && pTask->mm)
448 {
449 down_write(&pTask->mm->mmap_sem);
450 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
451 up_write(&pTask->mm->mmap_sem);
452 }
453 }
454 else
455 vunmap(pMemLnx->Core.pv);
456 pMemLnx->Core.pv = NULL;
457 break;
458
459 default:
460 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
461 return VERR_INTERNAL_ERROR;
462 }
463 return VINF_SUCCESS;
464}
465
466
467int rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
468{
469 PRTR0MEMOBJLNX pMemLnx;
470 int rc;
471
472#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
473 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_HIGHUSER, false /* non-contiguous */);
474#else
475 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_USER, false /* non-contiguous */);
476#endif
477 if (RT_SUCCESS(rc))
478 {
479 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
480 if (RT_SUCCESS(rc))
481 {
482 *ppMem = &pMemLnx->Core;
483 return rc;
484 }
485
486 rtR0MemObjLinuxFreePages(pMemLnx);
487 rtR0MemObjDelete(&pMemLnx->Core);
488 }
489
490 return rc;
491}
492
493
494int rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
495{
496 PRTR0MEMOBJLNX pMemLnx;
497 int rc;
498
499#ifdef RT_ARCH_AMD64
500# ifdef GFP_DMA32
501 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA32, false /* non-contiguous */);
502 if (RT_FAILURE(rc))
503# endif
504 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA, false /* non-contiguous */);
505#else
506 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_USER, false /* non-contiguous */);
507#endif
508 if (RT_SUCCESS(rc))
509 {
510 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
511 if (RT_SUCCESS(rc))
512 {
513 *ppMem = &pMemLnx->Core;
514 return rc;
515 }
516
517 rtR0MemObjLinuxFreePages(pMemLnx);
518 rtR0MemObjDelete(&pMemLnx->Core);
519 }
520
521 return rc;
522}
523
524
525int rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
526{
527 PRTR0MEMOBJLNX pMemLnx;
528 int rc;
529
530#ifdef RT_ARCH_AMD64
531# ifdef GFP_DMA32
532 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA32, true /* contiguous */);
533 if (RT_FAILURE(rc))
534# endif
535 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA, true /* contiguous */);
536#else
537 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_USER, true /* contiguous */);
538#endif
539 if (RT_SUCCESS(rc))
540 {
541 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
542 if (RT_SUCCESS(rc))
543 {
544#ifdef RT_STRICT
545 size_t iPage = pMemLnx->cPages;
546 while (iPage-- > 0)
547 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
548#endif
549 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
550 *ppMem = &pMemLnx->Core;
551 return rc;
552 }
553
554 rtR0MemObjLinuxFreePages(pMemLnx);
555 rtR0MemObjDelete(&pMemLnx->Core);
556 }
557
558 return rc;
559}
560
561
562/**
563 * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
564 *
565 * @returns IPRT status.
566 * @param ppMemLnx Where to
567 * @param enmType The object type.
568 * @param cb The size of the allocation.
569 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
570 * @param fGfp The Linux GFP flags to use for the allocation.
571 */
572static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest, unsigned fGfp)
573{
574 PRTR0MEMOBJLNX pMemLnx;
575 int rc;
576
577 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, fGfp,
578 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */);
579 if (RT_FAILURE(rc))
580 return rc;
581
582 /*
583 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
584 */
585 if (PhysHighest != NIL_RTHCPHYS)
586 {
587 size_t iPage = pMemLnx->cPages;
588 while (iPage-- > 0)
589 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
590 {
591 rtR0MemObjLinuxFreePages(pMemLnx);
592 rtR0MemObjDelete(&pMemLnx->Core);
593 return VERR_NO_MEMORY;
594 }
595 }
596
597 /*
598 * Complete the object.
599 */
600 if (enmType == RTR0MEMOBJTYPE_PHYS)
601 {
602 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
603 pMemLnx->Core.u.Phys.fAllocated = true;
604 }
605 *ppMem = &pMemLnx->Core;
606 return rc;
607}
608
609
610/**
611 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
612 *
613 * @returns IPRT status.
614 * @param ppMem Where to store the memory object pointer on success.
615 * @param enmType The object type.
616 * @param cb The size of the allocation.
617 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
618 */
619static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest)
620{
621 int rc;
622
623 /*
624 * There are two clear cases and that's the <=16MB and anything-goes ones.
625 * When the physical address limit is somewhere inbetween those two we'll
626 * just have to try, starting with HIGHUSER and working our way thru the
627 * different types, hoping we'll get lucky.
628 *
629 * We should probably move this physical address restriction logic up to
630 * the page alloc function as it would be more efficient there. But since
631 * we don't expect this to be a performance issue just yet it can wait.
632 */
633 if (PhysHighest == NIL_RTHCPHYS)
634 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_HIGHUSER);
635 else if (PhysHighest <= _1M * 16)
636 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_DMA);
637 else
638 {
639 rc = VERR_NO_MEMORY;
640 if (RT_FAILURE(rc))
641 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_HIGHUSER);
642 if (RT_FAILURE(rc))
643 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_USER);
644#ifdef GFP_DMA32
645 if (RT_FAILURE(rc))
646 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_DMA32);
647#endif
648 if (RT_FAILURE(rc))
649 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_DMA);
650 }
651 return rc;
652}
653
654
655int rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
656{
657 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, PhysHighest);
658}
659
660
661int rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
662{
663 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PhysHighest);
664}
665
666
667int rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb)
668{
669 /*
670 * All we need to do here is to validate that we can use
671 * ioremap on the specified address (32/64-bit dma_addr_t).
672 */
673 PRTR0MEMOBJLNX pMemLnx;
674 dma_addr_t PhysAddr = Phys;
675 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
676
677 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
678 if (!pMemLnx)
679 return VERR_NO_MEMORY;
680
681 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
682 pMemLnx->Core.u.Phys.fAllocated = false;
683 Assert(!pMemLnx->cPages);
684 *ppMem = &pMemLnx->Core;
685 return VINF_SUCCESS;
686}
687
688
689int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, RTR0PROCESS R0Process)
690{
691 const int cPages = cb >> PAGE_SHIFT;
692 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
693 struct vm_area_struct **papVMAs;
694 PRTR0MEMOBJLNX pMemLnx;
695 int rc;
696
697 /*
698 * Check for valid task and size overflows.
699 */
700 if (!pTask)
701 return VERR_NOT_SUPPORTED;
702 if (((size_t)cPages << PAGE_SHIFT) != cb)
703 return VERR_OUT_OF_RANGE;
704
705 /*
706 * Allocate the memory object and a temporary buffer for the VMAs.
707 */
708 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
709 if (!pMemLnx)
710 return VERR_NO_MEMORY;
711
712 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
713 if (papVMAs)
714 {
715 down_read(&pTask->mm->mmap_sem);
716
717 /*
718 * Get user pages.
719 */
720 rc = get_user_pages(pTask, /* Task for fault acounting. */
721 pTask->mm, /* Whose pages. */
722 R3Ptr, /* Where from. */
723 cPages, /* How many pages. */
724 1, /* Write to memory. */
725 0, /* force. */
726 &pMemLnx->apPages[0], /* Page array. */
727 papVMAs); /* vmas */
728 if (rc == cPages)
729 {
730 /*
731 * Flush dcache (required?) and protect against fork.
732 */
733 /** @todo The Linux fork() protection will require more work if this API
734 * is to be used for anything but locking VM pages. */
735 while (rc-- > 0)
736 {
737 flush_dcache_page(pMemLnx->apPages[rc]);
738 papVMAs[rc]->vm_flags |= VM_DONTCOPY;
739 }
740
741 up_read(&pTask->mm->mmap_sem);
742
743 RTMemFree(papVMAs);
744
745 pMemLnx->Core.u.Lock.R0Process = R0Process;
746 pMemLnx->cPages = cPages;
747 Assert(!pMemLnx->fMappedToRing0);
748 *ppMem = &pMemLnx->Core;
749
750 return VINF_SUCCESS;
751 }
752
753 /*
754 * Failed - we need to unlock any pages that we succeeded to lock.
755 */
756 while (rc-- > 0)
757 {
758 if (!PageReserved(pMemLnx->apPages[rc]))
759 SetPageDirty(pMemLnx->apPages[rc]);
760 page_cache_release(pMemLnx->apPages[rc]);
761 }
762
763 up_read(&pTask->mm->mmap_sem);
764
765 RTMemFree(papVMAs);
766 rc = VERR_LOCK_FAILED;
767 }
768
769 rtR0MemObjDelete(&pMemLnx->Core);
770 return rc;
771}
772
773
774int rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb)
775{
776 /* What is there to lock? Should/Can we fake this? */
777 return VERR_NOT_SUPPORTED;
778}
779
780
781int rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
782{
783#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
784 const size_t cPages = cb >> PAGE_SHIFT;
785 struct page *pDummyPage;
786 struct page **papPages;
787
788 /* check for unsupported stuff. */
789 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
790 AssertMsgReturn(uAlignment <= PAGE_SIZE, ("%#x\n", uAlignment), VERR_NOT_SUPPORTED);
791
792 /*
793 * Allocate a dummy page and create a page pointer array for vmap such that
794 * the dummy page is mapped all over the reserved area.
795 */
796 pDummyPage = alloc_page(GFP_HIGHUSER);
797 if (!pDummyPage)
798 return VERR_NO_MEMORY;
799 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
800 if (papPages)
801 {
802 void *pv;
803 size_t iPage = cPages;
804 while (iPage-- > 0)
805 papPages[iPage] = pDummyPage;
806# ifdef VM_MAP
807 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
808# else
809 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
810# endif
811 RTMemFree(papPages);
812 if (pv)
813 {
814 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
815 if (pMemLnx)
816 {
817 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
818 pMemLnx->cPages = 1;
819 pMemLnx->apPages[0] = pDummyPage;
820 *ppMem = &pMemLnx->Core;
821 return VINF_SUCCESS;
822 }
823 vunmap(pv);
824 }
825 }
826 __free_page(pDummyPage);
827 return VERR_NO_MEMORY;
828
829#else /* < 2.4.22 */
830 /*
831 * Could probably use ioremap here, but the caller is in a better position than us
832 * to select some safe physical memory.
833 */
834 return VERR_NOT_SUPPORTED;
835#endif
836}
837
838
839/**
840 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
841 * an empty user space mapping.
842 *
843 * The caller takes care of acquiring the mmap_sem of the task.
844 *
845 * @returns Pointer to the mapping.
846 * (void *)-1 on failure.
847 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
848 * @param cb The size of the mapping.
849 * @param uAlignment The alignment of the mapping.
850 * @param pTask The Linux task to create this mapping in.
851 * @param fProt The RTMEM_PROT_* mask.
852 */
853static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
854{
855 unsigned fLnxProt;
856 unsigned long ulAddr;
857
858 /*
859 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
860 */
861 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
862 if (fProt == RTMEM_PROT_NONE)
863 fLnxProt = PROT_NONE;
864 else
865 {
866 fLnxProt = 0;
867 if (fProt & RTMEM_PROT_READ)
868 fLnxProt |= PROT_READ;
869 if (fProt & RTMEM_PROT_WRITE)
870 fLnxProt |= PROT_WRITE;
871 if (fProt & RTMEM_PROT_EXEC)
872 fLnxProt |= PROT_EXEC;
873 }
874
875 if (R3PtrFixed != (RTR3PTR)-1)
876 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
877 else
878 {
879 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
880 if ( !(ulAddr & ~PAGE_MASK)
881 && (ulAddr & (uAlignment - 1)))
882 {
883 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
884 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
885 * ourselves) and further by there begin two mmap strategies (top / bottom). */
886 /* For now, just ignore uAlignment requirements... */
887 }
888 }
889 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
890 return (void *)-1;
891 return (void *)ulAddr;
892}
893
894
895int rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
896{
897 PRTR0MEMOBJLNX pMemLnx;
898 void *pv;
899 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
900 if (!pTask)
901 return VERR_NOT_SUPPORTED;
902
903 /*
904 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
905 */
906 down_write(&pTask->mm->mmap_sem);
907 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
908 up_write(&pTask->mm->mmap_sem);
909 if (pv == (void *)-1)
910 return VERR_NO_MEMORY;
911
912 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
913 if (!pMemLnx)
914 {
915 down_write(&pTask->mm->mmap_sem);
916 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, cb);
917 up_write(&pTask->mm->mmap_sem);
918 return VERR_NO_MEMORY;
919 }
920
921 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
922 *ppMem = &pMemLnx->Core;
923 return VINF_SUCCESS;
924}
925
926
927int rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment, unsigned fProt)
928{
929 int rc = VERR_NO_MEMORY;
930 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
931 PRTR0MEMOBJLNX pMemLnx;
932
933 /* Fail if requested to do something we can't. */
934 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
935 AssertMsgReturn(uAlignment <= PAGE_SIZE, ("%#x\n", uAlignment), VERR_NOT_SUPPORTED);
936
937 /*
938 * Create the IPRT memory object.
939 */
940 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
941 if (pMemLnx)
942 {
943 if (pMemLnxToMap->cPages)
944 {
945#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
946 /*
947 * Use vmap - 2.4.22 and later.
948 */
949 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
950# ifdef VM_MAP
951 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
952# else
953 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
954# endif
955 if (pMemLnx->Core.pv)
956 {
957 pMemLnx->fMappedToRing0 = true;
958 rc = VINF_SUCCESS;
959 }
960 else
961 rc = VERR_MAP_FAILED;
962
963#else /* < 2.4.22 */
964 /*
965 * Only option here is to share mappings if possible and forget about fProt.
966 */
967 if (rtR0MemObjIsRing3(pMemToMap))
968 rc = VERR_NOT_SUPPORTED;
969 else
970 {
971 rc = VINF_SUCCESS;
972 if (!pMemLnxToMap->Core.pv)
973 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
974 if (RT_SUCCESS(rc))
975 {
976 Assert(pMemLnxToMap->Core.pv);
977 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
978 }
979 }
980#endif
981 }
982 else
983 {
984 /*
985 * MMIO / physical memory.
986 */
987 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
988 pMemLnx->Core.pv = ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
989 if (pMemLnx->Core.pv)
990 {
991 /** @todo fix protection. */
992 rc = VINF_SUCCESS;
993 }
994 }
995 if (RT_SUCCESS(rc))
996 {
997 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
998 *ppMem = &pMemLnx->Core;
999 return VINF_SUCCESS;
1000 }
1001 rtR0MemObjDelete(&pMemLnx->Core);
1002 }
1003
1004 return rc;
1005}
1006
1007
1008int rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed, size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
1009{
1010 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1011 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1012 int rc = VERR_NO_MEMORY;
1013 PRTR0MEMOBJLNX pMemLnx;
1014
1015 /*
1016 * Check for restrictions.
1017 */
1018 if (!pTask)
1019 return VERR_NOT_SUPPORTED;
1020
1021 /*
1022 * Create the IPRT memory object.
1023 */
1024 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1025 if (pMemLnx)
1026 {
1027 /*
1028 * Allocate user space mapping.
1029 */
1030 void *pv;
1031 down_write(&pTask->mm->mmap_sem);
1032 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1033 if (pv != (void *)-1)
1034 {
1035 /*
1036 * Map page by page into the mmap area.
1037 * This is generic, paranoid and not very efficient.
1038 */
1039 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1040 unsigned long ulAddrCur = (unsigned long)pv;
1041 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1042 size_t iPage;
1043 rc = 0;
1044 if (pMemLnxToMap->cPages)
1045 {
1046 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1047 {
1048#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1049 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1050 AssertBreak(vma, rc = VERR_INTERNAL_ERROR);
1051#endif
1052
1053#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1054 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1055#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1056 rc = remap_page_range(vma, ulAddrCur, page_to_phys(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1057#else /* 2.4 */
1058 rc = remap_page_range(ulAddrCur, page_to_phys(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1059#endif
1060 if (rc)
1061 break;
1062 }
1063 }
1064 else
1065 {
1066 RTHCPHYS Phys;
1067 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1068 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1069 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1070 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1071 else
1072 {
1073 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1074 Phys = NIL_RTHCPHYS;
1075 }
1076 if (Phys != NIL_RTHCPHYS)
1077 {
1078 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1079 {
1080#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1081 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1082 AssertBreak(vma, rc = VERR_INTERNAL_ERROR);
1083#endif
1084
1085#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1086 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1087#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1088 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1089#else /* 2.4 */
1090 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1091#endif
1092 if (rc)
1093 break;
1094 }
1095 }
1096 }
1097 if (!rc)
1098 {
1099 up_write(&pTask->mm->mmap_sem);
1100
1101 pMemLnx->Core.pv = pv;
1102 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1103 *ppMem = &pMemLnx->Core;
1104 return VINF_SUCCESS;
1105 }
1106
1107 /*
1108 * Bail out.
1109 */
1110 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, pMemLnxToMap->Core.cb);
1111 if (rc != VERR_INTERNAL_ERROR)
1112 rc = VERR_NO_MEMORY;
1113 }
1114
1115 up_write(&pTask->mm->mmap_sem);
1116
1117 rtR0MemObjDelete(&pMemLnx->Core);
1118 }
1119
1120 return rc;
1121}
1122
1123
1124RTHCPHYS rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1125{
1126 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1127
1128 if (pMemLnx->cPages)
1129 return page_to_phys(pMemLnx->apPages[iPage]);
1130
1131 switch (pMemLnx->Core.enmType)
1132 {
1133 case RTR0MEMOBJTYPE_CONT:
1134 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1135
1136 case RTR0MEMOBJTYPE_PHYS:
1137 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1138
1139 /* the parent knows */
1140 case RTR0MEMOBJTYPE_MAPPING:
1141 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1142
1143 /* cPages > 0 */
1144 case RTR0MEMOBJTYPE_LOW:
1145 case RTR0MEMOBJTYPE_LOCK:
1146 case RTR0MEMOBJTYPE_PHYS_NC:
1147 case RTR0MEMOBJTYPE_PAGE:
1148 default:
1149 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1150 /* fall thru */
1151
1152 case RTR0MEMOBJTYPE_RES_VIRT:
1153 return NIL_RTHCPHYS;
1154 }
1155}
1156
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette