VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64-utf16.cpp@ 84293

Last change on this file since 84293 was 84293, checked in by vboxsync, 5 years ago

IPRT/base64: Put the UTF-16 code in separate file. Implemented decoding of UTF-16 strings. bugref:9224

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.6 KB
Line 
1/* $Id: base64-utf16.cpp 84293 2020-05-13 16:23:25Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 *
5 * @note The base64.cpp file must be diffable with this one.
6 * Fixed typically applies to both files.
7 */
8
9/*
10 * Copyright (C) 2009-2020 Oracle Corporation
11 *
12 * This file is part of VirtualBox Open Source Edition (OSE), as
13 * available from http://www.215389.xyz. This file is free software;
14 * you can redistribute it and/or modify it under the terms of the GNU
15 * General Public License (GPL) as published by the Free Software
16 * Foundation, in version 2 as it comes in the "COPYING" file of the
17 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
18 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19 *
20 * The contents of this file may alternatively be used under the terms
21 * of the Common Development and Distribution License Version 1.0
22 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
23 * VirtualBox OSE distribution, in which case the provisions of the
24 * CDDL are applicable instead of those of the GPL.
25 *
26 * You may elect to license modified versions of this file under the
27 * terms and conditions of either the GPL or the CDDL or both.
28 */
29
30
31/*********************************************************************************************************************************
32* Header Files *
33*********************************************************************************************************************************/
34#include <iprt/base64.h>
35#include "internal/iprt.h"
36
37#include <iprt/assert.h>
38#include <iprt/err.h>
39#include <iprt/string.h>
40#include <iprt/uni.h>
41#ifdef RT_STRICT
42# include <iprt/asm.h>
43#endif
44
45#include "base64.h"
46
47
48/** Translates the given character. */
49DECL_FORCE_INLINE(uint8_t) rtBase64TranslateUtf16(RTUTF16 wc)
50{
51 if (wc < RT_ELEMENTS(g_au8RTBase64CharToVal))
52 return g_au8RTBase64CharToVal[wc];
53 if (RTUniCpIsSpace(wc))
54 return BASE64_SPACE;
55 return BASE64_INVALID;
56}
57
58
59/** Fetched the next character in the string and translates it. */
60DECL_FORCE_INLINE(uint8_t) rtBase64TranslateNextUtf16(PCRTUTF16 pwszString, size_t cwcStringMax)
61{
62 if (cwcStringMax > 0)
63 return rtBase64TranslateUtf16(*pwszString);
64 return BASE64_INVALID;
65}
66
67
68/*
69 * Mostly the same as RTBase64DecodedSizeEx, except for the wider character
70 * type and therefore more careful handling of g_szRTBase64ValToChar and additional
71 * space characters. Fixes must be applied to both copies of the code.
72 */
73RTDECL(ssize_t) RTBase64DecodedUtf16SizeEx(PCRTUTF16 pwszString, size_t cwcStringMax, PRTUTF16 *ppwszEnd)
74{
75#ifdef RT_STRICT
76 rtBase64Sanity();
77#endif
78
79 /*
80 * Walk the string until a non-encoded or non-space character is encountered.
81 */
82 uint32_t c6Bits = 0;
83 uint8_t u8 = BASE64_INVALID;
84 RTUTF16 wc = 0;
85
86 while (cwcStringMax > 0 && (wc = *pwszString))
87 {
88 u8 = rtBase64TranslateUtf16(wc);
89 if (u8 < 64)
90 c6Bits++;
91 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
92 break;
93
94 /* advance */
95 pwszString++;
96 cwcStringMax--;
97 }
98
99 /*
100 * Padding can only be found at the end and there is
101 * only 1 or 2 padding chars. Deal with it first.
102 */
103 unsigned cbPad = 0;
104 if (u8 == BASE64_PAD)
105 {
106 cbPad = 1;
107 c6Bits++;
108 pwszString++;
109 cwcStringMax--;
110 while (cwcStringMax > 0 && (wc = *pwszString))
111 {
112 u8 = rtBase64TranslateUtf16(wc);
113 if (u8 != BASE64_SPACE)
114 {
115 if (u8 != BASE64_PAD)
116 break;
117 c6Bits++;
118 cbPad++;
119 }
120 pwszString++;
121 cwcStringMax--;
122 }
123 if (cbPad >= 3)
124 return -1;
125 }
126
127 /*
128 * Invalid char and no where to indicate where the
129 * Base64 text ends? Return failure.
130 */
131 if ( u8 == BASE64_INVALID
132 && !ppwszEnd
133 && wc)
134 return -1;
135
136 /*
137 * Recalc 6-bit to 8-bit and adjust for padding.
138 */
139 if (ppwszEnd)
140 *ppwszEnd = (PRTUTF16)pwszString;
141 return rtBase64DecodedSizeRecalc(c6Bits, cbPad);
142}
143RT_EXPORT_SYMBOL(RTBase64DecodedUtf16SizeEx);
144
145
146RTDECL(ssize_t) RTBase64DecodedUtf16Size(PCRTUTF16 pwszString, PRTUTF16 *ppwszEnd)
147{
148 return RTBase64DecodedUtf16SizeEx(pwszString, RTSTR_MAX, ppwszEnd);
149}
150RT_EXPORT_SYMBOL(RTBase64DecodedUtf16Size);
151
152
153RTDECL(int) RTBase64DecodeUtf16Ex(PCRTUTF16 pwszString, size_t cwcStringMax, void *pvData, size_t cbData,
154 size_t *pcbActual, PRTUTF16 *ppwszEnd)
155{
156#ifdef RT_STRICT
157 rtBase64Sanity();
158#endif
159
160 /*
161 * Process input in groups of 4 input / 3 output chars.
162 */
163 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
164 uint8_t *pbData = (uint8_t *)pvData;
165 uint8_t u8;
166 unsigned c6Bits = 0;
167 AssertCompile(sizeof(char) == sizeof(uint8_t));
168
169 for (;;)
170 {
171 /* The first 6-bit group. */
172 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
173 pwszString++, cwcStringMax--;
174 if (u8 >= 64)
175 {
176 c6Bits = 0;
177 break;
178 }
179 u8Trio[0] = u8 << 2;
180 pwszString++;
181 cwcStringMax--;
182
183 /* The second 6-bit group. */
184 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
185 pwszString++, cwcStringMax--;
186 if (u8 >= 64)
187 {
188 c6Bits = 1;
189 break;
190 }
191 u8Trio[0] |= u8 >> 4;
192 u8Trio[1] = u8 << 4;
193 pwszString++;
194 cwcStringMax--;
195
196 /* The third 6-bit group. */
197 u8 = BASE64_INVALID;
198 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
199 pwszString++, cwcStringMax--;
200 if (u8 >= 64)
201 {
202 c6Bits = 2;
203 break;
204 }
205 u8Trio[1] |= u8 >> 2;
206 u8Trio[2] = u8 << 6;
207 pwszString++;
208 cwcStringMax--;
209
210 /* The fourth 6-bit group. */
211 u8 = BASE64_INVALID;
212 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
213 pwszString++, cwcStringMax--;
214 if (u8 >= 64)
215 {
216 c6Bits = 3;
217 break;
218 }
219 u8Trio[2] |= u8;
220 pwszString++;
221 cwcStringMax--;
222
223 /* flush the trio */
224 if (cbData < 3)
225 return VERR_BUFFER_OVERFLOW;
226 cbData -= 3;
227 pbData[0] = u8Trio[0];
228 pbData[1] = u8Trio[1];
229 pbData[2] = u8Trio[2];
230 pbData += 3;
231 }
232
233 /*
234 * Padding can only be found at the end and there is
235 * only 1 or 2 padding chars. Deal with it first.
236 */
237 unsigned cbPad = 0;
238 if (u8 == BASE64_PAD)
239 {
240 cbPad = 1;
241 pwszString++;
242 cwcStringMax--;
243 RTUTF16 wc;
244 while (cwcStringMax > 0 && (wc = *pwszString))
245 {
246 u8 = rtBase64TranslateUtf16(wc);
247 if (u8 != BASE64_SPACE)
248 {
249 if (u8 != BASE64_PAD)
250 break;
251 cbPad++;
252 }
253 pwszString++;
254 cwcStringMax--;
255 }
256 if (cbPad >= 3)
257 return VERR_INVALID_BASE64_ENCODING;
258 }
259
260 /*
261 * Invalid char and no where to indicate where the
262 * Base64 text ends? Return failure.
263 */
264 if ( u8 == BASE64_INVALID
265 && !ppwszEnd
266 && cwcStringMax != 0
267 && *pwszString != '\0')
268 return VERR_INVALID_BASE64_ENCODING;
269
270 /*
271 * Check padding vs. pending sextets, if anything left to do finish it off.
272 */
273 if (c6Bits || cbPad)
274 {
275 if (c6Bits + cbPad != 4)
276 return VERR_INVALID_BASE64_ENCODING;
277
278 switch (c6Bits)
279 {
280 case 1:
281 u8Trio[1] = u8Trio[2] = 0;
282 break;
283 case 2:
284 u8Trio[2] = 0;
285 break;
286 case 3:
287 default:
288 break;
289 }
290 switch (3 - cbPad)
291 {
292 case 1:
293 if (cbData < 1)
294 return VERR_BUFFER_OVERFLOW;
295 cbData--;
296 pbData[0] = u8Trio[0];
297 pbData++;
298 break;
299
300 case 2:
301 if (cbData < 2)
302 return VERR_BUFFER_OVERFLOW;
303 cbData -= 2;
304 pbData[0] = u8Trio[0];
305 pbData[1] = u8Trio[1];
306 pbData += 2;
307 break;
308
309 default:
310 break;
311 }
312 }
313
314 /*
315 * Set optional return values and return successfully.
316 */
317 if (ppwszEnd)
318 *ppwszEnd = (PRTUTF16)pwszString;
319 if (pcbActual)
320 *pcbActual = pbData - (uint8_t *)pvData;
321 return VINF_SUCCESS;
322}
323RT_EXPORT_SYMBOL(RTBase64DecodeUtf16Ex);
324
325
326RTDECL(int) RTBase64DecodeUtf16(PCRTUTF16 pwszString, void *pvData, size_t cbData, size_t *pcbActual, PRTUTF16 *ppwszEnd)
327{
328 return RTBase64DecodeUtf16Ex(pwszString, RTSTR_MAX, pvData, cbData, pcbActual, ppwszEnd);
329}
330RT_EXPORT_SYMBOL(RTBase64DecodeUtf16);
331
332
333RTDECL(size_t) RTBase64EncodedUtf16Length(size_t cbData)
334{
335 return RTBase64EncodedLengthEx(cbData, 0);
336}
337RT_EXPORT_SYMBOL(RTBase64EncodedUtf16Length);
338
339
340RTDECL(size_t) RTBase64EncodedUtf16LengthEx(size_t cbData, uint32_t fFlags)
341{
342 return RTBase64EncodedLengthEx(cbData, fFlags);
343}
344RT_EXPORT_SYMBOL(RTBase64EncodedUtf16LengthEx);
345
346
347RTDECL(int) RTBase64EncodeUtf16(const void *pvData, size_t cbData, PRTUTF16 pwszBuf, size_t cwcBuf, size_t *pcwcActual)
348{
349 return RTBase64EncodeUtf16Ex(pvData, cbData, 0, pwszBuf, cwcBuf, pcwcActual);
350}
351RT_EXPORT_SYMBOL(RTBase64EncodeUtf16);
352
353
354/*
355 * Please note that RTBase64EncodeEx contains an almost exact copy of
356 * this code, just using different output character type and variable prefixes.
357 * So, all fixes must be applied to both versions of the code.
358 */
359RTDECL(int) RTBase64EncodeUtf16Ex(const void *pvData, size_t cbData, uint32_t fFlags,
360 PRTUTF16 pwszBuf, size_t cwcBuf, size_t *pcwcActual)
361{
362 /* Expand the EOL style flags: */
363 size_t const cchEol = g_acchRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
364 char const chEol0 = g_aachRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
365 char const chEol1 = g_aachRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
366 Assert(cchEol == (chEol0 != '\0' ? 1U : 0U) + (chEol1 != '\0' ? 1U : 0U));
367
368 /*
369 * Process whole "trios" of input data.
370 */
371 uint8_t u8A;
372 uint8_t u8B;
373 uint8_t u8C;
374 size_t cwcLineFeed = cchEol ? cwcBuf - RTBASE64_LINE_LEN : ~(size_t)0;
375 const uint8_t *pbSrc = (const uint8_t *)pvData;
376 PRTUTF16 pwcDst = pwszBuf;
377 while (cbData >= 3)
378 {
379 if (cwcBuf < 4 + 1)
380 return VERR_BUFFER_OVERFLOW;
381
382 /* encode */
383 u8A = pbSrc[0];
384 pwcDst[0] = g_szRTBase64ValToChar[u8A >> 2];
385 u8B = pbSrc[1];
386 pwcDst[1] = g_szRTBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
387 u8C = pbSrc[2];
388 pwcDst[2] = g_szRTBase64ValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
389 pwcDst[3] = g_szRTBase64ValToChar[u8C & 0x3f];
390
391 /* advance */
392 cwcBuf -= 4;
393 pwcDst += 4;
394 cbData -= 3;
395 pbSrc += 3;
396
397 /* deal out end-of-line */
398 if (cwcBuf == cwcLineFeed && cbData && cchEol)
399 {
400 if (cwcBuf < cchEol + 1)
401 return VERR_BUFFER_OVERFLOW;
402 cwcBuf -= cchEol;
403 *pwcDst++ = chEol0;
404 if (chEol1)
405 *pwcDst++ = chEol1;
406 cwcLineFeed = cwcBuf - RTBASE64_LINE_LEN;
407 }
408 }
409
410 /*
411 * Deal with the odd bytes and string termination.
412 */
413 if (cbData)
414 {
415 if (cwcBuf < 4 + 1)
416 return VERR_BUFFER_OVERFLOW;
417 switch (cbData)
418 {
419 case 1:
420 u8A = pbSrc[0];
421 pwcDst[0] = g_szRTBase64ValToChar[u8A >> 2];
422 pwcDst[1] = g_szRTBase64ValToChar[(u8A << 4) & 0x3f];
423 pwcDst[2] = '=';
424 pwcDst[3] = '=';
425 break;
426 case 2:
427 u8A = pbSrc[0];
428 pwcDst[0] = g_szRTBase64ValToChar[u8A >> 2];
429 u8B = pbSrc[1];
430 pwcDst[1] = g_szRTBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
431 pwcDst[2] = g_szRTBase64ValToChar[(u8B << 2) & 0x3f];
432 pwcDst[3] = '=';
433 break;
434 }
435 pwcDst += 4;
436 }
437
438 *pwcDst = '\0';
439
440 if (pcwcActual)
441 *pcwcActual = pwcDst - pwszBuf;
442 return VINF_SUCCESS;
443}
444RT_EXPORT_SYMBOL(RTBase64EncodeUtf16Ex);
445
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette