VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/bignum-amd64-x86.asm@ 52291

Last change on this file since 52291 was 52291, checked in by vboxsync, 11 years ago

RTBigNum: Forgot to add the assembly file. Oops.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 15.3 KB
Line 
1; $Id: bignum-amd64-x86.asm 52291 2014-08-06 10:23:10Z vboxsync $
2;; @file
3; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers
4;
5
6;
7; Copyright (C) 2006-2014 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.215389.xyz. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28%define RT_ASM_WITH_SEH64
29%include "iprt/asmdefs.mac"
30%include "internal/bignum.mac"
31
32
33BEGINCODE
34
35;;
36; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
37; stores the result in pauResult.
38;
39; All three numbers are zero padded such that a borrow can be carried one (or
40; two for 64-bit) elements beyond the end of the largest number.
41;
42; @returns nothing.
43; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx
44; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx
45; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8
46; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9
47;
48BEGINPROC rtBigNumMagnitudeSubAssemblyWorker
49 push xBP
50 SEH64_PUSH_xBP
51 mov xBP, xSP
52 SEH64_SET_FRAME_xBP 0
53SEH64_END_PROLOGUE
54
55%ifdef RT_ARCH_AMD64
56 %ifdef ASM_CALL64_GCC
57 %define pauResult rdi
58 %define pauMinuend rsi
59 %define pauSubtrahend rdx
60 %define cUsed ecx
61 %else
62 %define pauResult rcx
63 %define pauMinuend rdx
64 %define pauSubtrahend r8
65 %define cUsed r9d
66 %endif
67 xor r11d, r11d ; index register.
68
69%if RTBIGNUM_ELEMENT_SIZE == 4
70 add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
71 shr cUsed, 1
72%endif
73 cmp cUsed, 8 ; Skip the big loop if small number.
74 jb .small_job
75
76 mov r10d, cUsed
77 shr r10d, 3
78 clc
79.big_loop:
80 mov rax, [pauMinuend + r11]
81 sbb rax, [pauSubtrahend + r11]
82 mov [pauResult + r11], rax
83 mov rax, [pauMinuend + r11 + 8]
84 sbb rax, [pauSubtrahend + r11 + 8]
85 mov [pauResult + r11 + 8], rax
86 mov rax, [pauMinuend + r11 + 16]
87 sbb rax, [pauSubtrahend + r11 + 16]
88 mov [pauResult + r11 + 16], rax
89 mov rax, [pauMinuend + r11 + 24]
90 sbb rax, [pauSubtrahend + r11 + 24]
91 mov [pauResult + r11 + 24], rax
92 mov rax, [pauMinuend + r11 + 32]
93 sbb rax, [pauSubtrahend + r11 + 32]
94 mov [pauResult + r11 + 32], rax
95 mov rax, [pauMinuend + r11 + 40]
96 sbb rax, [pauSubtrahend + r11 + 40]
97 mov [pauResult + r11 + 40], rax
98 mov rax, [pauMinuend + r11 + 48]
99 sbb rax, [pauSubtrahend + r11 + 48]
100 mov [pauResult + r11 + 48], rax
101 mov rax, [pauMinuend + r11 + 56]
102 sbb rax, [pauSubtrahend + r11 + 56]
103 mov [pauResult + r11 + 56], rax
104 lea r11, [r11 + 64]
105 dec r10d ; Does not change CF.
106 jnz .big_loop
107
108 lahf ; Save CF
109 and cUsed, 7 ; Up to seven odd rounds.
110 jz .done
111 sahf ; Restore CF.
112 jmp .small_loop ; Skip CF=1 (clc).
113
114.small_job:
115 clc
116.small_loop:
117 mov rax, [pauMinuend + r11]
118 sbb rax, [pauSubtrahend + r11]
119 mov [pauResult + r11], rax
120 lea r11, [r11 + 8]
121 dec cUsed ; does not change CF.
122 jnz .small_loop
123 %ifdef RT_STRICT
124 jnc .done
125 int3
126 %endif
127.done:
128
129%elifdef RT_ARCH_X86
130 push edi
131 push esi
132 push ebx
133
134 mov edi, [ebp + 08h] ; pauResult
135 %define pauResult edi
136 mov ecx, [ebp + 0ch] ; pauMinuend
137 %define pauMinuend ecx
138 mov edx, [ebp + 10h] ; pauSubtrahend
139 %define pauSubtrahend edx
140 mov esi, [ebp + 14h] ; cUsed
141 %define cUsed esi
142
143 xor ebx, ebx ; index register.
144
145 cmp cUsed, 8 ; Skip the big loop if small number.
146 jb .small_job
147
148 shr cUsed, 3
149 clc
150.big_loop:
151 mov eax, [pauMinuend + ebx]
152 sbb eax, [pauSubtrahend + ebx]
153 mov [pauResult + ebx], eax
154 mov eax, [pauMinuend + ebx + 4]
155 sbb eax, [pauSubtrahend + ebx + 4]
156 mov [pauResult + ebx + 4], eax
157 mov eax, [pauMinuend + ebx + 8]
158 sbb eax, [pauSubtrahend + ebx + 8]
159 mov [pauResult + ebx + 8], eax
160 mov eax, [pauMinuend + ebx + 12]
161 sbb eax, [pauSubtrahend + ebx + 12]
162 mov [pauResult + ebx + 12], eax
163 mov eax, [pauMinuend + ebx + 16]
164 sbb eax, [pauSubtrahend + ebx + 16]
165 mov [pauResult + ebx + 16], eax
166 mov eax, [pauMinuend + ebx + 20]
167 sbb eax, [pauSubtrahend + ebx + 20]
168 mov [pauResult + ebx + 20], eax
169 mov eax, [pauMinuend + ebx + 24]
170 sbb eax, [pauSubtrahend + ebx + 24]
171 mov [pauResult + ebx + 24], eax
172 mov eax, [pauMinuend + ebx + 28]
173 sbb eax, [pauSubtrahend + ebx + 28]
174 mov [pauResult + ebx + 28], eax
175 lea ebx, [ebx + 32]
176 dec cUsed ; Does not change CF.
177 jnz .big_loop
178
179 lahf ; Save CF
180 mov cUsed, [ebp + 14h] ; Up to three final rounds.
181 and cUsed, 7
182 jz .done
183 sahf ; Restore CF.
184 jmp .small_loop ; Skip CF=1 (clc).
185
186.small_job:
187 clc
188.small_loop:
189 mov eax, [pauMinuend + ebx]
190 sbb eax, [pauSubtrahend + ebx]
191 mov [pauResult + ebx], eax
192 lea ebx, [ebx + 4]
193 dec cUsed ; Does not change CF
194 jnz .small_loop
195 %ifdef RT_STRICT
196 jnc .done
197 int3
198 %endif
199.done:
200
201 pop ebx
202 pop esi
203 pop edi
204%else
205 %error "Unsupported arch"
206%endif
207
208 leave
209 ret
210%undef pauResult
211%undef pauMinuend
212%undef pauSubtrahend
213%undef cUsed
214ENDPROC rtBigNumMagnitudeSubAssemblyWorker
215
216
217
218;;
219; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
220; stores the result in pauResult.
221;
222; All three numbers are zero padded such that a borrow can be carried one (or
223; two for 64-bit) elements beyond the end of the largest number.
224;
225; @returns nothing.
226; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx
227; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx
228; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8
229;
230BEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker
231 push xBP
232 SEH64_PUSH_xBP
233 mov xBP, xSP
234 SEH64_SET_FRAME_xBP 0
235SEH64_END_PROLOGUE
236
237%ifdef RT_ARCH_AMD64
238 %ifdef ASM_CALL64_GCC
239 %define pauResultMinuend rdi
240 %define pauSubtrahend rsi
241 %define cUsed edx
242 %else
243 %define pauResultMinuend rcx
244 %define pauSubtrahend rdx
245 %define cUsed r8d
246 %endif
247 xor r11d, r11d ; index register.
248
249%if RTBIGNUM_ELEMENT_SIZE == 4
250 add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
251 shr cUsed, 1
252%endif
253 cmp cUsed, 4 ; Skip the big loop if small number.
254 jb .small_job
255
256 mov r10d, cUsed
257 shr r10d, 3
258 clc
259.big_loop:
260 mov rax, [pauSubtrahend + r11]
261 sbb [pauResultMinuend + r11], rax
262 mov rax, [pauSubtrahend + r11 + 8]
263 sbb [pauResultMinuend + r11 + 8], rax
264 mov rax, [pauSubtrahend + r11 + 16]
265 sbb [pauResultMinuend + r11 + 16], rax
266 mov rax, [pauSubtrahend + r11 + 24]
267 sbb [pauResultMinuend + r11 + 24], rax
268 mov rax, [pauSubtrahend + r11 + 32]
269 sbb [pauResultMinuend + r11 + 32], rax
270 mov rax, [pauSubtrahend + r11 + 40]
271 sbb [pauResultMinuend + r11 + 40], rax
272 mov rax, [pauSubtrahend + r11 + 48]
273 sbb [pauResultMinuend + r11 + 48], rax
274 mov rax, [pauSubtrahend + r11 + 56]
275 sbb [pauResultMinuend + r11 + 56], rax
276 lea r11, [r11 + 64]
277 dec r10d ; Does not change CF.
278 jnz .big_loop
279
280 lahf ; Save CF
281 and cUsed, 7 ; Up to seven odd rounds.
282 jz .done
283 sahf ; Restore CF.
284 jmp .small_loop ; Skip CF=1 (clc).
285
286.small_job:
287 clc
288.small_loop:
289 mov rax, [pauSubtrahend + r11]
290 sbb [pauResultMinuend + r11], rax
291 lea r11, [r11 + 8]
292 dec cUsed ; does not change CF.
293 jnz .small_loop
294 %ifdef RT_STRICT
295 jnc .done
296 int3
297 %endif
298.done:
299
300%elifdef RT_ARCH_X86
301 push edi
302 push ebx
303
304 mov edi, [ebp + 08h] ; pauResultMinuend
305 %define pauResultMinuend edi
306 mov edx, [ebp + 0ch] ; pauSubtrahend
307 %define pauSubtrahend edx
308 mov ecx, [ebp + 10h] ; cUsed
309 %define cUsed ecx
310
311 xor ebx, ebx ; index register.
312
313 cmp cUsed, 8 ; Skip the big loop if small number.
314 jb .small_job
315
316 shr cUsed, 3
317 clc
318.big_loop:
319 mov eax, [pauSubtrahend + ebx]
320 sbb [pauResultMinuend + ebx], eax
321 mov eax, [pauSubtrahend + ebx + 4]
322 sbb [pauResultMinuend + ebx + 4], eax
323 mov eax, [pauSubtrahend + ebx + 8]
324 sbb [pauResultMinuend + ebx + 8], eax
325 mov eax, [pauSubtrahend + ebx + 12]
326 sbb [pauResultMinuend + ebx + 12], eax
327 mov eax, [pauSubtrahend + ebx + 16]
328 sbb [pauResultMinuend + ebx + 16], eax
329 mov eax, [pauSubtrahend + ebx + 20]
330 sbb [pauResultMinuend + ebx + 20], eax
331 mov eax, [pauSubtrahend + ebx + 24]
332 sbb [pauResultMinuend + ebx + 24], eax
333 mov eax, [pauSubtrahend + ebx + 28]
334 sbb [pauResultMinuend + ebx + 28], eax
335 lea ebx, [ebx + 32]
336 dec cUsed ; Does not change CF.
337 jnz .big_loop
338
339 lahf ; Save CF
340 mov cUsed, [ebp + 10h] ; Up to seven odd rounds.
341 and cUsed, 7
342 jz .done
343 sahf ; Restore CF.
344 jmp .small_loop ; Skip CF=1 (clc).
345
346.small_job:
347 clc
348.small_loop:
349 mov eax, [pauSubtrahend + ebx]
350 sbb [pauResultMinuend + ebx], eax
351 lea ebx, [ebx + 4]
352 dec cUsed ; Does not change CF
353 jnz .small_loop
354 %ifdef RT_STRICT
355 jnc .done
356 int3
357 %endif
358.done:
359
360 pop ebx
361 pop edi
362%else
363 %error "Unsupported arch"
364%endif
365
366 leave
367 ret
368ENDPROC rtBigNumMagnitudeSubThisAssemblyWorker
369
370
371;;
372; Shifts an element array one bit to the left, returning the final carry value.
373;
374; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so
375; we can use 64-bit operand sizes even if the element type is 32-bit.
376;
377; @returns The final carry value.
378; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx
379; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx
380; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8
381;
382BEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
383 push xBP
384 SEH64_PUSH_xBP
385 mov xBP, xSP
386 SEH64_SET_FRAME_xBP 0
387SEH64_END_PROLOGUE
388
389%ifdef RT_ARCH_AMD64
390 %ifdef ASM_CALL64_GCC
391 %define pauElements rdi
392 %define cUsed esi
393 %define uCarry edx
394 %else
395 %define pauElements rcx
396 %define cUsed edx
397 %define uCarry r8d
398 %endif
399%elifdef RT_ARCH_X86
400 %define pauElements ecx
401 mov pauElements, [ebp + 08h]
402 %define cUsed edx
403 mov cUsed, [ebp + 0ch]
404 %define uCarry eax
405 mov uCarry, [ebp + 10h]
406%else
407 %error "Unsupported arch."
408%endif
409 ; Lots to do?
410 cmp cUsed, 8
411 jae .big_loop_init
412
413 ; Check for empty array.
414 test cUsed, cUsed
415 jz .no_elements
416 jmp .small_loop_init
417
418 ; Big loop - 8 unrolled loop iterations.
419.big_loop_init:
420%ifdef RT_ARCH_AMD64
421 mov r11d, cUsed
422%endif
423 shr cUsed, 3
424 test uCarry, uCarry ; clear the carry flag
425 jz .big_loop
426 stc
427.big_loop:
428%if RTBIGNUM_ELEMENT_SIZE == 8
429 rcl qword [pauElements], 1
430 rcl qword [pauElements + 8], 1
431 rcl qword [pauElements + 16], 1
432 rcl qword [pauElements + 24], 1
433 rcl qword [pauElements + 32], 1
434 rcl qword [pauElements + 40], 1
435 rcl qword [pauElements + 48], 1
436 rcl qword [pauElements + 56], 1
437 lea pauElements, [pauElements + 64]
438%else
439 rcl dword [pauElements], 1
440 rcl dword [pauElements + 4], 1
441 rcl dword [pauElements + 8], 1
442 rcl dword [pauElements + 12], 1
443 rcl dword [pauElements + 16], 1
444 rcl dword [pauElements + 20], 1
445 rcl dword [pauElements + 24], 1
446 rcl dword [pauElements + 28], 1
447 lea pauElements, [pauElements + 32]
448%endif
449 dec cUsed
450 jnz .big_loop
451
452 ; More to do?
453 lahf ; save carry flag (uCarry no longer used on x86).
454%ifdef RT_ARCH_AMD64
455 mov cUsed, r11d
456%else
457 mov cUsed, [ebp + 0ch]
458%endif
459 and cUsed, 7
460 jz .restore_cf_and_return ; Jump if we're good and done.
461 sahf ; Restore CF.
462 jmp .small_loop ; Deal with the odd rounds.
463.restore_cf_and_return:
464 sahf
465 jmp .carry_to_eax
466
467 ; Small loop - One round at the time.
468.small_loop_init:
469 test uCarry, uCarry ; clear the carry flag
470 jz .small_loop
471 stc
472.small_loop:
473%if RTBIGNUM_ELEMENT_SIZE == 8
474 rcl qword [pauElements], 1
475 lea pauElements, [pauElements + 8]
476%else
477 rcl dword [pauElements], 1
478 lea pauElements, [pauElements + 4]
479%endif
480 dec cUsed
481 jnz .small_loop
482
483 ; Calculate return value.
484.carry_to_eax:
485 mov eax, 0
486 jnc .return
487 inc eax
488.return:
489 leave
490 ret
491
492.no_elements:
493 mov eax, uCarry
494 jmp .return
495ENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
496
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette