VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 97905

Last change on this file since 97905 was 97905, checked in by vboxsync, 2 years ago

IPRT,SUPDrv: Dropping RTR0MemExecDonate and associated SUPDrv-linux code, it's not needed since linux started using RTR0MemObjAllocPage for r0 images. bugref:9801

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 14.2 KB
Line 
1/* $Id: alloc-r0drv-linux.c 97905 2022-12-29 18:22:23Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.215389.xyz.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include "the-linux-kernel.h"
42#include "internal/iprt.h"
43#include <iprt/mem.h>
44
45#include <iprt/assert.h>
46#include <iprt/errcore.h>
47#include "r0drv/alloc-r0drv.h"
48
49
50#if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP)
51# if RTLNX_VER_MIN(2,6,23) && RTLNX_VER_MAX(5,8,0) && !RTLNX_RHEL_MAJ_PREREQ(8,5)
52/**
53 * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
54 * memory in the moduel range. This is preferrable to the exec heap below.
55 */
56# define RTMEMALLOC_EXEC_VM_AREA
57# else
58/**
59 * We need memory in the module range (~2GB to ~0) this can only be obtained
60 * thru APIs that are not exported (see module_alloc()).
61 *
62 * So, we'll have to create a quick and dirty heap here using BSS memory.
63 * Very annoying and it's going to restrict us!
64 */
65# define RTMEMALLOC_EXEC_HEAP
66# endif
67#endif
68
69#ifdef RTMEMALLOC_EXEC_HEAP
70# include <iprt/heap.h>
71# include <iprt/spinlock.h>
72# include <iprt/errcore.h>
73#endif
74
75#include "internal/initterm.h"
76
77
78/*********************************************************************************************************************************
79* Structures and Typedefs *
80*********************************************************************************************************************************/
81#ifdef RTMEMALLOC_EXEC_VM_AREA
82/**
83 * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
84 *
85 * This is used with allocating executable memory, for things like generated
86 * code and loaded modules.
87 */
88typedef struct RTMEMLNXHDREX
89{
90 /** The VM area for this allocation. */
91 struct vm_struct *pVmArea;
92 void *pvDummy;
93 /** The header we present to the generic API. */
94 RTMEMHDR Hdr;
95} RTMEMLNXHDREX;
96AssertCompileSize(RTMEMLNXHDREX, 32);
97/** Pointer to an extended memory header. */
98typedef RTMEMLNXHDREX *PRTMEMLNXHDREX;
99#endif
100
101
102/*********************************************************************************************************************************
103* Global Variables *
104*********************************************************************************************************************************/
105#ifdef RTMEMALLOC_EXEC_HEAP
106/** The heap. */
107static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
108/** Spinlock protecting the heap. */
109static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
110#endif
111
112
113/**
114 * API for cleaning up the heap spinlock on IPRT termination.
115 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
116 */
117DECLHIDDEN(void) rtR0MemExecCleanup(void)
118{
119#ifdef RTMEMALLOC_EXEC_HEAP
120 RTSpinlockDestroy(g_HeapExecSpinlock);
121 g_HeapExecSpinlock = NIL_RTSPINLOCK;
122#endif
123}
124
125
126#ifdef RTMEMALLOC_EXEC_VM_AREA
127/**
128 * Allocate executable kernel memory in the module range.
129 *
130 * @returns Pointer to a allocation header success. NULL on failure.
131 *
132 * @param cb The size the user requested.
133 */
134static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb)
135{
136 size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
137 size_t const cPages = cbAlloc >> PAGE_SHIFT;
138 struct page **papPages;
139 struct vm_struct *pVmArea;
140 size_t iPage;
141
142 pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
143 if (!pVmArea)
144 return NULL;
145 pVmArea->nr_pages = 0; /* paranoia? */
146 pVmArea->pages = NULL; /* paranoia? */
147
148 papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL | __GFP_NOWARN);
149 if (!papPages)
150 {
151 vunmap(pVmArea->addr);
152 return NULL;
153 }
154
155 for (iPage = 0; iPage < cPages; iPage++)
156 {
157 papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN);
158 if (!papPages[iPage])
159 break;
160 }
161 if (iPage == cPages)
162 {
163 /*
164 * Map the pages.
165 *
166 * Not entirely sure we really need to set nr_pages and pages here, but
167 * they provide a very convenient place for storing something we need
168 * in the free function, if nothing else...
169 */
170# if RTLNX_VER_MAX(3,17,0)
171 struct page **papPagesIterator = papPages;
172# endif
173 pVmArea->nr_pages = cPages;
174 pVmArea->pages = papPages;
175 if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
176# if RTLNX_VER_MAX(3,17,0)
177 &papPagesIterator
178# else
179 papPages
180# endif
181 ))
182 {
183 PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
184 pHdrEx->pVmArea = pVmArea;
185 pHdrEx->pvDummy = NULL;
186 return &pHdrEx->Hdr;
187 }
188 /* bail out */
189# if RTLNX_VER_MAX(3,17,0)
190 pVmArea->nr_pages = papPagesIterator - papPages;
191# endif
192 }
193
194 vunmap(pVmArea->addr);
195
196 while (iPage-- > 0)
197 __free_page(papPages[iPage]);
198 kfree(papPages);
199
200 return NULL;
201}
202#endif /* RTMEMALLOC_EXEC_VM_AREA */
203
204
205/**
206 * OS specific allocation function.
207 */
208DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
209{
210 PRTMEMHDR pHdr;
211 IPRT_LINUX_SAVE_EFL_AC();
212
213 /*
214 * Allocate.
215 */
216 if (fFlags & RTMEMHDR_FLAG_EXEC)
217 {
218 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
219 return VERR_NOT_SUPPORTED;
220
221#if defined(RT_ARCH_AMD64)
222# ifdef RTMEMALLOC_EXEC_HEAP
223 if (g_HeapExec != NIL_RTHEAPSIMPLE)
224 {
225 RTSpinlockAcquire(g_HeapExecSpinlock);
226 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
227 RTSpinlockRelease(g_HeapExecSpinlock);
228 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
229 }
230 else
231 pHdr = NULL;
232
233# elif defined(RTMEMALLOC_EXEC_VM_AREA)
234 pHdr = rtR0MemAllocExecVmArea(cb);
235 fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA;
236
237# else /* !RTMEMALLOC_EXEC_HEAP && !RTMEMALLOC_EXEC_VM_AREA */
238# error "you do not want to go here..."
239 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
240# endif /* !RTMEMALLOC_EXEC_HEAP && !RTMEMALLOC_EXEC_VM_AREA */
241
242#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
243# if RTLNX_VER_MIN(5,8,0)
244 AssertMsgFailed(("This point should not be reached, please file a bug\n"));
245 pHdr = NULL;
246# else
247 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
248# endif
249#else
250 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
251#endif
252 }
253 else
254 {
255 if (
256#if 1 /* vmalloc has serious performance issues, avoid it. */
257 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
258#else
259 cb <= PAGE_SIZE
260#endif
261 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
262 )
263 {
264 fFlags |= RTMEMHDR_FLAG_KMALLOC;
265 pHdr = kmalloc(cb + sizeof(*pHdr),
266 fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC ? GFP_ATOMIC | __GFP_NOWARN : GFP_KERNEL | __GFP_NOWARN);
267 if (RT_UNLIKELY( !pHdr
268 && cb > PAGE_SIZE
269 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
270 {
271 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
272 pHdr = vmalloc(cb + sizeof(*pHdr));
273 }
274 }
275 else
276 pHdr = vmalloc(cb + sizeof(*pHdr));
277 }
278 if (RT_UNLIKELY(!pHdr))
279 {
280 IPRT_LINUX_RESTORE_EFL_AC();
281 return VERR_NO_MEMORY;
282 }
283
284 /*
285 * Initialize.
286 */
287 pHdr->u32Magic = RTMEMHDR_MAGIC;
288 pHdr->fFlags = fFlags;
289 pHdr->cb = cb;
290 pHdr->cbReq = cb;
291
292 *ppHdr = pHdr;
293 IPRT_LINUX_RESTORE_EFL_AC();
294 return VINF_SUCCESS;
295}
296
297
298/**
299 * OS specific free function.
300 */
301DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
302{
303 IPRT_LINUX_SAVE_EFL_AC();
304
305 pHdr->u32Magic += 1;
306 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
307 kfree(pHdr);
308#ifdef RTMEMALLOC_EXEC_HEAP
309 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
310 {
311 RTSpinlockAcquire(g_HeapExecSpinlock);
312 RTHeapSimpleFree(g_HeapExec, pHdr);
313 RTSpinlockRelease(g_HeapExecSpinlock);
314 }
315#endif
316#ifdef RTMEMALLOC_EXEC_VM_AREA
317 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
318 {
319 PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
320 size_t iPage = pHdrEx->pVmArea->nr_pages;
321 struct page **papPages = pHdrEx->pVmArea->pages;
322 void *pvMapping = pHdrEx->pVmArea->addr;
323
324 vunmap(pvMapping);
325
326 while (iPage-- > 0)
327 __free_page(papPages[iPage]);
328 kfree(papPages);
329 }
330#endif
331 else
332 vfree(pHdr);
333
334 IPRT_LINUX_RESTORE_EFL_AC();
335}
336
337
338
339/**
340 * Compute order. Some functions allocate 2^order pages.
341 *
342 * @returns order.
343 * @param cPages Number of pages.
344 */
345static int CalcPowerOf2Order(unsigned long cPages)
346{
347 int iOrder;
348 unsigned long cTmp;
349
350 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
351 ;
352 if (cPages & ~(1 << iOrder))
353 ++iOrder;
354
355 return iOrder;
356}
357
358
359/**
360 * Allocates physical contiguous memory (below 4GB).
361 * The allocation is page aligned and the content is undefined.
362 *
363 * @returns Pointer to the memory block. This is page aligned.
364 * @param pPhys Where to store the physical address.
365 * @param cb The allocation size in bytes. This is always
366 * rounded up to PAGE_SIZE.
367 */
368RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
369{
370 int cOrder;
371 unsigned cPages;
372 struct page *paPages;
373 void *pvRet;
374 IPRT_LINUX_SAVE_EFL_AC();
375
376 /*
377 * validate input.
378 */
379 AssertPtr(pPhys);
380 Assert(cb > 0);
381
382 /*
383 * Allocate page pointer array.
384 */
385 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
386 cPages = cb >> PAGE_SHIFT;
387 cOrder = CalcPowerOf2Order(cPages);
388#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
389 /* ZONE_DMA32: 0-4GB */
390 paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder);
391 if (!paPages)
392#endif
393#ifdef RT_ARCH_AMD64
394 /* ZONE_DMA; 0-16MB */
395 paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder);
396#else
397 /* ZONE_NORMAL: 0-896MB */
398 paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder);
399#endif
400 if (paPages)
401 {
402 /*
403 * Reserve the pages and mark them executable.
404 */
405 unsigned iPage;
406 for (iPage = 0; iPage < cPages; iPage++)
407 {
408 Assert(!PageHighMem(&paPages[iPage]));
409 if (iPage + 1 < cPages)
410 {
411 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
412 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
413 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
414 == page_to_phys(&paPages[iPage + 1]),
415 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
416 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
417 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
418 }
419
420 SetPageReserved(&paPages[iPage]);
421 }
422 *pPhys = page_to_phys(paPages);
423 pvRet = phys_to_virt(page_to_phys(paPages));
424 }
425 else
426 pvRet = NULL;
427
428 IPRT_LINUX_RESTORE_EFL_AC();
429 return pvRet;
430}
431RT_EXPORT_SYMBOL(RTMemContAlloc);
432
433
434/**
435 * Frees memory allocated using RTMemContAlloc().
436 *
437 * @param pv Pointer to return from RTMemContAlloc().
438 * @param cb The cb parameter passed to RTMemContAlloc().
439 */
440RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
441{
442 if (pv)
443 {
444 int cOrder;
445 unsigned cPages;
446 unsigned iPage;
447 struct page *paPages;
448 IPRT_LINUX_SAVE_EFL_AC();
449
450 /* validate */
451 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
452 Assert(cb > 0);
453
454 /* calc order and get pages */
455 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
456 cPages = cb >> PAGE_SHIFT;
457 cOrder = CalcPowerOf2Order(cPages);
458 paPages = virt_to_page(pv);
459
460 /*
461 * Restore page attributes freeing the pages.
462 */
463 for (iPage = 0; iPage < cPages; iPage++)
464 {
465 ClearPageReserved(&paPages[iPage]);
466 }
467 __free_pages(paPages, cOrder);
468 IPRT_LINUX_RESTORE_EFL_AC();
469 }
470}
471RT_EXPORT_SYMBOL(RTMemContFree);
472
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette