VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 40651

Last change on this file since 40651 was 40651, checked in by vboxsync, 13 years ago

Runtime/r3/posix, RDP/client: Solaris 11 iconv change. Avoid referencing xpg5_iconv symbol by circumventing pragma redefine_extname.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.4 KB
Line 
1/* $Id: utf8-posix.cpp 40651 2012-03-26 16:29:58Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include "internal/iprt.h"
33
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38
39#include <errno.h>
40#include <locale.h>
41
42#if defined(RT_OS_SOLARIS)
43# if !defined(_XPG6)
44# define VBOX_XPG6_TMP_DEF
45# define _XPG6
46# endif
47# if defined(__USE_LEGACY_PROTOTYPES__)
48# define VBOX_LEGACY_PROTO_TMP_DEF
49# undef __USE_LEGACY_PROTOTYPES__
50# endif
51#endif /* RT_OS_SOLARIS */
52
53# include <iconv.h>
54
55#if defined(RT_OS_SOLARIS)
56# if defined(VBOX_XPG6_TMP_DEF)
57# undef _XPG6
58# undef VBOX_XPG6_TMP_DEF
59# endif
60# if defined(VBOX_LEGACY_PROTO_TMP_DEF)
61# define __USE_LEGACY_PROTOTYPES__
62# undef VBOX_LEGACY_PROTO_TMP_DEF
63# endif
64#endif /* RT_OS_SOLARIS */
65
66#include <wctype.h>
67
68#include <langinfo.h>
69
70#include "internal/alignmentchecks.h"
71#include "internal/string.h"
72#ifdef RT_WITH_ICONV_CACHE
73# include "internal/thread.h"
74AssertCompile(sizeof(iconv_t) <= sizeof(void *));
75#endif
76
77
78/**
79 * Gets the codeset of the current locale (LC_CTYPE).
80 *
81 * @returns Pointer to read-only string with the codeset name.
82 */
83DECLHIDDEN(const char *) rtStrGetLocaleCodeset(void)
84{
85 return nl_langinfo(CODESET);
86}
87
88
89#ifdef RT_WITH_ICONV_CACHE
90
91/**
92 * Initializes the iconv handle cache associated with a thread.
93 *
94 * @param pThread The thread in question.
95 */
96DECLHIDDEN(void) rtStrIconvCacheInit(PRTTHREADINT pThread)
97{
98 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
99 pThread->ahIconvs[i] = (iconv_t)-1;
100}
101
102/**
103 * Destroys the iconv handle cache associated with a thread.
104 *
105 * @param pThread The thread in question.
106 */
107DECLHIDDEN(void) rtStrIconvCacheDestroy(PRTTHREADINT pThread)
108{
109 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
110 {
111 iconv_t hIconv = (iconv_t)pThread->ahIconvs[i];
112 pThread->ahIconvs[i] = (iconv_t)-1;
113 if (hIconv != (iconv_t)-1)
114 iconv_close(hIconv);
115 }
116}
117
118
119/**
120 * Converts a string from one charset to another.
121 *
122 * @returns iprt status code.
123 * @param pvInput Pointer to intput string.
124 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
125 * @param pszInputCS Codeset of the input string.
126 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
127 * If cbOutput is 0 this is where the pointer to the allocated
128 * buffer is stored.
129 * @param cbOutput Size of the passed in buffer.
130 * @param pszOutputCS Codeset of the input string.
131 * @param cFactor Input vs. output size factor.
132 * @param phIconv Pointer to the cache entry.
133 */
134static int rtstrConvertCached(const void *pvInput, size_t cbInput, const char *pszInputCS,
135 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
136 unsigned cFactor, iconv_t *phIconv)
137{
138 /*
139 * Allocate buffer
140 */
141 bool fUcs2Term;
142 void *pvOutput;
143 size_t cbOutput2;
144 if (!cbOutput)
145 {
146 cbOutput2 = cbInput * cFactor;
147 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
148 if (!pvOutput)
149 return VERR_NO_TMP_MEMORY;
150 fUcs2Term = true;
151 }
152 else
153 {
154 pvOutput = *ppvOutput;
155 fUcs2Term = !strcmp(pszOutputCS, "UCS-2")
156 || !strcmp(pszOutputCS, "UTF-16")
157 || !strcmp(pszOutputCS, "ucs-2")
158 || !strcmp(pszOutputCS, "utf-16");
159 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
160 if (cbOutput2 > cbOutput)
161 return VERR_BUFFER_OVERFLOW;
162 }
163
164 /*
165 * Use a loop here to retry with bigger buffers.
166 */
167 for (unsigned cTries = 10; cTries > 0; cTries--)
168 {
169 /*
170 * Create conversion object if necessary.
171 */
172 iconv_t hIconv = (iconv_t)*phIconv;
173 if (hIconv == (iconv_t)-1)
174 {
175#ifdef RT_OS_SOLARIS
176 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
177 if (!*pszInputCS)
178 pszInputCS = rtStrGetLocaleCodeset();
179 if (!*pszOutputCS)
180 pszOutputCS = rtStrGetLocaleCodeset();
181#endif
182 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
183 *phIconv = hIconv = iconv_open(pszOutputCS, pszInputCS);
184 IPRT_ALIGNMENT_CHECKS_ENABLE();
185 }
186 if (hIconv != (iconv_t)-1)
187 {
188 /*
189 * Do the conversion.
190 */
191 size_t cbInLeft = cbInput;
192 size_t cbOutLeft = cbOutput2;
193 const void *pvInputLeft = pvInput;
194 void *pvOutputLeft = pvOutput;
195#if defined(RT_OS_LINUX) || (defined(RT_OS_SOLARIS) && !defined(VBOX_SOLARIS_11_LEGACY_ICONV)) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
196 if (iconv(hIconv, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
197#else
198 if (iconv(hIconv, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
199#endif
200 {
201 if (!cbInLeft)
202 {
203 /*
204 * We're done, just add the terminator and return.
205 * (Two terminators to support UCS-2 output, too.)
206 */
207 ((char *)pvOutputLeft)[0] = '\0';
208 if (fUcs2Term)
209 ((char *)pvOutputLeft)[1] = '\0';
210 *ppvOutput = pvOutput;
211 return VINF_SUCCESS;
212 }
213 errno = E2BIG;
214 }
215
216 /*
217 * If we failed because of output buffer space we'll
218 * increase the output buffer size and retry.
219 */
220 if (errno == E2BIG)
221 {
222 if (!cbOutput)
223 {
224 RTMemTmpFree(pvOutput);
225 cbOutput2 *= 2;
226 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
227 if (!pvOutput)
228 return VERR_NO_TMP_MEMORY;
229 continue;
230 }
231 return VERR_BUFFER_OVERFLOW;
232 }
233
234 /*
235 * Close the handle on all other errors to make sure we won't carry
236 * any bad state with us.
237 */
238 *phIconv = (iconv_t)-1;
239 iconv_close(hIconv);
240 }
241 break;
242 }
243
244 /* failure */
245 if (!cbOutput)
246 RTMemTmpFree(pvOutput);
247 return VERR_NO_TRANSLATION;
248}
249
250#endif /* RT_WITH_ICONV_CACHE */
251
252/**
253 * Converts a string from one charset to another without using the handle cache.
254 *
255 * @returns IPRT status code.
256 *
257 * @param pvInput Pointer to intput string.
258 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
259 * @param pszInputCS Codeset of the input string.
260 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
261 * If cbOutput is 0 this is where the pointer to the allocated
262 * buffer is stored.
263 * @param cbOutput Size of the passed in buffer.
264 * @param pszOutputCS Codeset of the input string.
265 * @param cFactor Input vs. output size factor.
266 */
267static int rtStrConvertUncached(const void *pvInput, size_t cbInput, const char *pszInputCS,
268 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
269 unsigned cFactor)
270{
271 /*
272 * Allocate buffer
273 */
274 bool fUcs2Term;
275 void *pvOutput;
276 size_t cbOutput2;
277 if (!cbOutput)
278 {
279 cbOutput2 = cbInput * cFactor;
280 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
281 if (!pvOutput)
282 return VERR_NO_TMP_MEMORY;
283 fUcs2Term = true;
284 }
285 else
286 {
287 pvOutput = *ppvOutput;
288 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
289 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
290 if (cbOutput2 > cbOutput)
291 return VERR_BUFFER_OVERFLOW;
292 }
293
294 /*
295 * Use a loop here to retry with bigger buffers.
296 */
297 for (unsigned cTries = 10; cTries > 0; cTries--)
298 {
299 /*
300 * Create conversion object.
301 */
302#ifdef RT_OS_SOLARIS
303 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
304 if (!*pszInputCS)
305 pszInputCS = rtStrGetLocaleCodeset();
306 if (!*pszOutputCS)
307 pszOutputCS = rtStrGetLocaleCodeset();
308#endif
309 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
310 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
311 IPRT_ALIGNMENT_CHECKS_ENABLE();
312 if (icHandle != (iconv_t)-1)
313 {
314 /*
315 * Do the conversion.
316 */
317 size_t cbInLeft = cbInput;
318 size_t cbOutLeft = cbOutput2;
319 const void *pvInputLeft = pvInput;
320 void *pvOutputLeft = pvOutput;
321#if defined(RT_OS_LINUX) || (defined(RT_OS_SOLARIS) && !defined(VBOX_SOLARIS_11_LEGACY_ICONV)) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
322 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
323#else
324 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
325#endif
326 {
327 if (!cbInLeft)
328 {
329 /*
330 * We're done, just add the terminator and return.
331 * (Two terminators to support UCS-2 output, too.)
332 */
333 iconv_close(icHandle);
334 ((char *)pvOutputLeft)[0] = '\0';
335 if (fUcs2Term)
336 ((char *)pvOutputLeft)[1] = '\0';
337 *ppvOutput = pvOutput;
338 return VINF_SUCCESS;
339 }
340 errno = E2BIG;
341 }
342 iconv_close(icHandle);
343
344 /*
345 * If we failed because of output buffer space we'll
346 * increase the output buffer size and retry.
347 */
348 if (errno == E2BIG)
349 {
350 if (!cbOutput)
351 {
352 RTMemTmpFree(pvOutput);
353 cbOutput2 *= 2;
354 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
355 if (!pvOutput)
356 return VERR_NO_TMP_MEMORY;
357 continue;
358 }
359 return VERR_BUFFER_OVERFLOW;
360 }
361 }
362 break;
363 }
364
365 /* failure */
366 if (!cbOutput)
367 RTMemTmpFree(pvOutput);
368 return VERR_NO_TRANSLATION;
369}
370
371
372/**
373 * Wrapper that selects rtStrConvertCached or rtStrConvertUncached.
374 *
375 * @returns IPRT status code.
376 *
377 * @param pszInput Pointer to intput string.
378 * @param cchInput Size (in bytes) of input string. Excludes any
379 * terminators.
380 * @param pszInputCS Codeset of the input string.
381 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
382 * If cbOutput is 0 this is where the pointer to the
383 * allocated buffer is stored.
384 * @param cbOutput Size of the passed in buffer.
385 * @param pszOutputCS Codeset of the input string.
386 * @param cFactor Input vs. output size factor.
387 * @param enmCacheIdx The iconv cache index.
388 */
389DECLINLINE(int) rtStrConvertWrapper(const char *pchInput, size_t cchInput, const char *pszInputCS,
390 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
391 unsigned cFactor, RTSTRICONV enmCacheIdx)
392{
393#ifdef RT_WITH_ICONV_CACHE
394 RTTHREAD hSelf = RTThreadSelf();
395 if (hSelf != NIL_RTTHREAD)
396 {
397 PRTTHREADINT pThread = rtThreadGet(hSelf);
398 if (pThread)
399 {
400 if ((pThread->fIntFlags & (RTTHREADINT_FLAGS_ALIEN | RTTHREADINT_FLAGS_MAIN)) != RTTHREADINT_FLAGS_ALIEN)
401 {
402 int rc = rtstrConvertCached(pchInput, cchInput, pszInputCS,
403 (void **)ppszOutput, cbOutput, pszOutputCS,
404 cFactor, (iconv_t *)&pThread->ahIconvs[enmCacheIdx]);
405 rtThreadRelease(pThread);
406 return rc;
407 }
408 rtThreadRelease(pThread);
409 }
410 }
411#endif
412 return rtStrConvertUncached(pchInput, cchInput, pszInputCS,
413 (void **)ppszOutput, cbOutput, pszOutputCS,
414 cFactor);
415}
416
417
418/**
419 * Internal API for use by the path conversion code.
420 *
421 * @returns IPRT status code.
422 *
423 * @param pszInput Pointer to intput string.
424 * @param cchInput Size (in bytes) of input string. Excludes any
425 * terminators.
426 * @param pszInputCS Codeset of the input string.
427 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
428 * If cbOutput is 0 this is where the pointer to the
429 * allocated buffer is stored.
430 * @param cbOutput Size of the passed in buffer.
431 * @param pszOutputCS Codeset of the input string.
432 * @param cFactor Input vs. output size factor.
433 * @param enmCacheIdx The iconv cache index.
434 */
435DECLHIDDEN(int) rtStrConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
436 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
437 unsigned cFactor, RTSTRICONV enmCacheIdx)
438{
439 Assert(enmCacheIdx >= 0 && enmCacheIdx < RTSTRICONV_END);
440 return rtStrConvertWrapper(pchInput, cchInput, pszInputCS,
441 ppszOutput, cbOutput, pszOutputCS,
442 cFactor, enmCacheIdx);
443}
444
445
446RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag)
447{
448 Assert(ppszString);
449 Assert(pszString);
450 *ppszString = NULL;
451
452 /*
453 * Assume result string length is not longer than UTF-8 string.
454 */
455 size_t cch = strlen(pszString);
456 if (cch <= 0)
457 {
458 /* zero length string passed. */
459 *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
460 if (*ppszString)
461 return VINF_SUCCESS;
462 return VERR_NO_TMP_MEMORY;
463 }
464 return rtStrConvertWrapper(pszString, cch, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
465}
466
467
468RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag)
469{
470 Assert(ppszString);
471 Assert(pszString);
472 *ppszString = NULL;
473
474 /*
475 * Attempt with UTF-8 length of 2x the native length.
476 */
477 size_t cch = strlen(pszString);
478 if (cch <= 0)
479 {
480 /* zero length string passed. */
481 *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
482 if (*ppszString)
483 return VINF_SUCCESS;
484 return VERR_NO_TMP_MEMORY;
485 }
486 return rtStrConvertWrapper(pszString, cch, "", ppszString, 0, "UTF-8", 2, RTSTRICONV_LOCALE_TO_UTF8);
487}
488
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette