VirtualBox

source: vbox/trunk/include/VBox/com/string.h@ 26587

Last change on this file since 26587 was 26587, checked in by vboxsync, 15 years ago

Main: Bstr makeover (second attempt) -- make Bstr(NULL) and Bstr() behave the same; resulting cleanup; make some more internal methods use Utf8Str instead of Bstr; fix a lot of CheckComArgNotNull?() usage

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 22.2 KB
Line 
1/* $Id: string.h 26587 2010-02-16 16:57:09Z vboxsync $ */
2
3/** @file
4 * MS COM / XPCOM Abstraction Layer:
5 * UTF-8 and UTF-16 string classes
6 */
7
8/*
9 * Copyright (C) 2006-2009 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.215389.xyz. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * The contents of this file may alternatively be used under the terms
20 * of the Common Development and Distribution License Version 1.0
21 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
22 * VirtualBox OSE distribution, in which case the provisions of the
23 * CDDL are applicable instead of those of the GPL.
24 *
25 * You may elect to license modified versions of this file under the
26 * terms and conditions of either the GPL or the CDDL or both.
27 *
28 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
29 * Clara, CA 95054 USA or visit http://www.sun.com if you need
30 * additional information or have any questions.
31 */
32
33#ifndef ___VBox_com_string_h
34#define ___VBox_com_string_h
35
36/* Make sure all the stdint.h macros are included - must come first! */
37#ifndef __STDC_LIMIT_MACROS
38# define __STDC_LIMIT_MACROS
39#endif
40#ifndef __STDC_CONSTANT_MACROS
41# define __STDC_CONSTANT_MACROS
42#endif
43
44#if defined (VBOX_WITH_XPCOM)
45# include <nsMemory.h>
46#endif
47
48#include "VBox/com/defs.h"
49#include "VBox/com/assert.h"
50
51#include <iprt/cpp/utils.h>
52#include <iprt/alloc.h>
53#include <iprt/cpp/ministring.h>
54
55namespace com
56{
57
58class Utf8Str;
59
60/**
61 * String class used universally in Main for COM-style Utf-16 strings.
62 * Unfortunately COM on Windows uses UTF-16 everywhere, requiring conversions
63 * back and forth since most of VirtualBox and our libraries use UTF-8.
64 * The Bstr class makes such conversions easier.
65 *
66 * Whereas the BSTR identifier is a typedef for a pointer to a wide character
67 * array (const char *uint_16 effectively, depending on the platform),
68 * the Bstr class is a fully featured string class with memory management
69 * for such strings.
70 *
71 * Bstr uses COM/XPCOM-provided memory management routines (SysAlloc* etc.)
72 * to allocate and free string buffers. This makes it possible to use it as
73 * a type of member variables of COM/XPCOM components and pass their values
74 * to callers through component methods' output parameters using the #cloneTo()
75 * operation. Also, the class can adopt (take ownership of) string buffers
76 * returned in output parameters of COM methods using the #asOutParam()
77 * operation and correctly free them afterwards.
78 *
79 * As opposed to the Ut8Str class, which is very efficient, Bstr does not
80 * cache the length of its member string. As a result, copying Bstr's is
81 * more expensive, and Bstr really should only be used to capture arguments
82 * from and return data to public COM methods.
83 *
84 * Starting with VirtualBox 3.2, like Utf8Str, Bstr no longer differentiates
85 * between NULL strings and empty strings. In other words, Bstr("") and
86 * Bstr(NULL) behave the same. In both cases, Bstr allocates no memory,
87 * reports a zero length and zero allocated bytes for both, and returns an
88 * empty C wide string from raw().
89 */
90class Bstr
91{
92public:
93
94 /**
95 * Creates an empty string that has no memory allocated.
96 */
97 Bstr()
98 : m_bstr(NULL)
99 {
100 }
101
102 /**
103 * Creates a copy of another Bstr.
104 *
105 * This allocates s.length() + 1 wide characters for the new instance, unless s is empty.
106 *
107 * @param s The source string.
108 *
109 * @throws std::bad_alloc
110 */
111 Bstr(const Bstr &s)
112 {
113 copyFrom(s);
114 }
115
116 /**
117 * Creates a copy of a wide char string buffer.
118 *
119 * This allocates SysStringLen(pw) + 1 wide characters for the new instance, unless s is empty.
120 *
121 * @param pcsz The source string.
122 *
123 * @throws std::bad_alloc
124 */
125 Bstr(CBSTR pw)
126 {
127 copyFrom(pw);
128 }
129
130#if defined (VBOX_WITH_XPCOM)
131 Bstr(const wchar_t *pw)
132 {
133 AssertCompile(sizeof(wchar_t) == sizeof(OLECHAR));
134 copyFrom((CBSTR)pw);
135 }
136#endif
137
138
139 /**
140 * Creates a copy of an IPRT MiniString (which includes Utf8Str).
141 *
142 * This allocates s.length() + 1 wide characters for the new instance, unless s is empty.
143 *
144 * @param pcsz The source string.
145 *
146 * @throws std::bad_alloc
147 */
148 Bstr(const iprt::MiniString &s)
149 {
150 copyFrom(s.c_str()); // @todo the source string length is know, we can probably speed this up
151 }
152
153 /**
154 * Creates a copy of a C string.
155 *
156 * This allocates strlen(pcsz) + 1 bytes for the new instance, unless s is empty.
157 *
158 * @param pcsz The source string.
159 *
160 * @throws std::bad_alloc
161 */
162 Bstr(const char *pcsz)
163 {
164 copyFrom(pcsz);
165 }
166
167 /**
168 * String length in wide characters.
169 *
170 * Returns the length of the member string, which is equal to SysStringLen(raw()).
171 * In other words, this returns neither bytes nor the number of unicode codepoints.
172 *
173 * As opposed to Utf8Str::length(), this is _not_ cached and expensive.
174 */
175 size_t length() const
176 {
177 return ::SysStringLen(m_bstr);
178 }
179
180 /**
181 * Deallocates all memory.
182 */
183 inline void setNull()
184 {
185 cleanup();
186 }
187
188 /**
189 * Returns a const pointer to the member string. If the member string is empty,
190 * returns a pointer to a static null character.
191 * @return
192 */
193 CBSTR raw() const
194 {
195 return (m_bstr) ? m_bstr : (CBSTR)L"";
196 }
197
198 /**
199 * Empty string or not?
200 *
201 * Returns true if the member string has no length.
202 *
203 * @returns true if empty, false if not.
204 */
205 bool isEmpty() const
206 {
207 return (m_bstr == NULL);
208 }
209
210 operator bool() const
211 {
212 return !isEmpty();
213 }
214
215 bool operator!() const
216 {
217 return isEmpty();
218 }
219
220 /** Case sensitivity selector. */
221 enum CaseSensitivity
222 {
223 CaseSensitive,
224 CaseInsensitive
225 };
226
227 int compare(CBSTR str, CaseSensitivity cs = CaseSensitive) const
228 {
229 if (m_bstr == str)
230 return 0;
231 if (m_bstr == NULL)
232 return -1;
233 if (str == NULL)
234 return 1;
235
236 if (cs == CaseSensitive)
237 return ::RTUtf16Cmp((PRTUTF16)m_bstr, (PRTUTF16)str);
238 else
239 return ::RTUtf16ICmp((PRTUTF16)m_bstr, (PRTUTF16)str);
240 }
241
242 int compare(const Bstr &bstr, CaseSensitivity cs = CaseSensitive) const
243 {
244 return compare(bstr.raw(), cs);
245 }
246
247 /** @name Comparison operators.
248 * @{ */
249 bool operator==(const Bstr &that) const { return !compare(that.raw()); }
250 bool operator!=(const Bstr &that) const { return !!compare(that.raw()); }
251 bool operator<( const Bstr &that) const { return compare(that.raw()) < 0; }
252 bool operator>( const Bstr &that) const { return compare(that.raw()) > 0; }
253
254 bool operator==(CBSTR that) const { return !compare(that); }
255 bool operator!=(CBSTR that) const { return !!compare(that); }
256 bool operator<( CBSTR that) const { return compare(that) < 0; }
257 bool operator>( CBSTR that) const { return compare(that) > 0; }
258
259 // the following two are necessary or stupid MSVC will complain with "ambiguous operator=="
260 bool operator==( BSTR that) const { return !compare(that); }
261 bool operator!=( BSTR that) const { return !!compare(that); }
262
263 /** @} */
264
265 /** Intended to to pass instances as |CBSTR| input parameters to methods. */
266 operator CBSTR() const { return raw(); }
267
268 /**
269 * Intended to to pass instances as |BSTR| input parameters to methods.
270 * Note that we have to provide this mutable BSTR operator since in MS COM
271 * input BSTR parameters of interface methods are not const.
272 */
273 operator BSTR() { return (BSTR)raw(); }
274
275 /**
276 * Intended to assign copies of instances to |BSTR| out parameters from
277 * within the interface method. Transfers the ownership of the duplicated
278 * string to the caller.
279 *
280 * This allocates a single 0 byte in the target if the member string is empty.
281 */
282 const Bstr& cloneTo(BSTR *pstr) const
283 {
284 if (pstr)
285 *pstr = ::SysAllocString((const OLECHAR*)raw());
286 // raw() never returns NULL, so we always allocate something here
287 return *this;
288 }
289
290 /**
291 * Intended to assign instances to |BSTR| out parameters from within the
292 * interface method. Transfers the ownership of the original string to the
293 * caller and resets the instance to null.
294 *
295 * As opposed to cloneTo(), this method doesn't create a copy of the
296 * string.
297 *
298 * This allocates a single 0 byte in the target if the member string is empty.
299 */
300 Bstr& detachTo(BSTR *pstr)
301 {
302 *pstr = (m_bstr) ? m_bstr : ::SysAllocString((const OLECHAR*)"");
303 m_bstr = NULL;
304 return *this;
305 }
306
307 /**
308 * Intended to pass instances as |BSTR| out parameters to methods.
309 * Takes the ownership of the returned data.
310 */
311 BSTR* asOutParam()
312 {
313 cleanup();
314 return &m_bstr;
315 }
316
317 /**
318 * Static immutable null object. May be used for comparison purposes.
319 */
320 static const Bstr Null;
321
322protected:
323
324 /**
325 * Destructor implementation, also used to clean up in operator=() before
326 * assigning a new string.
327 */
328 void cleanup()
329 {
330 if (m_bstr)
331 {
332 ::SysFreeString(m_bstr);
333 m_bstr = NULL;
334 }
335 }
336
337 /**
338 * Protected internal helper which allocates memory for a string capable of
339 * storing \a aSize - 1 characters (not bytes, not codepoints); in other words,
340 * aSize includes the terminating null character.
341 *
342 * Does NOT call cleanup() before allocating!
343 *
344 * @throws std::bad_alloc On allocation failure. The object is left describing
345 * a NULL string.
346 */
347 void alloc(size_t cw)
348 {
349 if (cw)
350 {
351 m_bstr = ::SysAllocStringLen(NULL, (unsigned int)cw - 1);
352#ifdef RT_EXCEPTIONS_ENABLED
353 if (!m_bstr)
354 throw std::bad_alloc();
355#endif
356 }
357 }
358
359 /**
360 * Protected internal helper to copy a string, ignoring the previous object state.
361 *
362 * copyFrom() unconditionally sets the members to a copy of the given other
363 * strings and makes no assumptions about previous contents. Can therefore be
364 * used both in copy constructors, when member variables have no defined value,
365 * and in assignments after having called cleanup().
366 *
367 * This variant copies from another Bstr. Since Bstr does _not_ cache string lengths,
368 * this is not fast.
369 *
370 * @param s The source string.
371 *
372 * @throws std::bad_alloc On allocation failure. The object is left describing
373 * a NULL string.
374 */
375 void copyFrom(const Bstr &s)
376 {
377 copyFrom(s.raw());
378 }
379
380 /**
381 * Protected internal helper to copy a string, ignoring the previous object state.
382 *
383 * See copyFrom() above.
384 *
385 * This variant copies from a wide char C string.
386 *
387 * @param pcsz The source string.
388 *
389 * @throws std::bad_alloc On allocation failure. The object is left describing
390 * a NULL string.
391 */
392 void copyFrom(CBSTR pw)
393 {
394 size_t cwLength;
395 if ( (pw)
396 && ((cwLength = ::SysStringLen((BSTR)pw)))
397 )
398 {
399 size_t cwAllocated = cwLength + 1;
400 alloc(cwAllocated);
401 memcpy(m_bstr, pw, cwAllocated * sizeof(OLECHAR)); // include 0 terminator
402 }
403 else
404 m_bstr = NULL;
405 }
406
407 /**
408 * Protected internal helper to copy a string, ignoring the previous object state.
409 *
410 * See copyFrom() above.
411 *
412 * This variant converts from a Utf-8 C string.
413 *
414 * @param pcsz The source string.
415 *
416 * @throws std::bad_alloc On allocation failure. The object is left describing
417 * a NULL string.
418 */
419 void copyFrom(const char *pcsz)
420 {
421 if (pcsz && *pcsz)
422 {
423 // @todo r=dj apparently this was copied twice in the original because our buffers
424 // use memory from SysAllocMem and IPRT doesn't, but check if this can be made faster
425 PRTUTF16 s = NULL;
426 ::RTStrToUtf16(pcsz, &s);
427 copyFrom((BSTR)s); // @todo r=dj this is not exception safe
428 ::RTUtf16Free(s);
429 }
430 else
431 m_bstr = NULL;
432 }
433
434 BSTR m_bstr; /**< The string buffer. */
435};
436
437/* symmetric compare operators */
438// inline bool operator==(CBSTR l, const Bstr &r) { return r.operator==(l); }
439// inline bool operator!=(CBSTR l, const Bstr &r) { return r.operator!=(l); }
440// inline bool operator==(BSTR l, const Bstr &r) { return r.operator==(l); }
441// inline bool operator!=(BSTR l, const Bstr &r) { return r.operator!=(l); }
442
443////////////////////////////////////////////////////////////////////////////////
444
445/**
446 * String class used universally in Main for Utf-8 strings.
447 *
448 * This is based on iprt::MiniString, to which some functionality has been
449 * moved. Here we keep things that are specific to Main, such as conversions
450 * with UTF-16 strings (Bstr).
451 *
452 * Like iprt::MiniString, Utf8Str does not differentiate between NULL strings
453 * and empty strings. In other words, Utf8Str("") and Utf8Str(NULL)
454 * behave the same. In both cases, MiniString allocates no memory, reports
455 * a zero length and zero allocated bytes for both, and returns an empty
456 * C string from c_str().
457 */
458class Utf8Str : public iprt::MiniString
459{
460public:
461
462 Utf8Str() {}
463
464 Utf8Str(const MiniString &that)
465 : MiniString(that)
466 {}
467
468 Utf8Str(const char *that)
469 : MiniString(that)
470 {}
471
472 Utf8Str(const Bstr &that)
473 {
474 copyFrom(that.raw());
475 }
476
477 Utf8Str(CBSTR that)
478 {
479 copyFrom(that);
480 }
481
482 Utf8Str& operator=(const MiniString &that)
483 {
484 MiniString::operator=(that);
485 return *this;
486 }
487
488 Utf8Str& operator=(const char *that)
489 {
490 MiniString::operator=(that);
491 return *this;
492 }
493
494 Utf8Str& operator=(const Bstr &that)
495 {
496 cleanup();
497 copyFrom(that.raw());
498 return *this;
499 }
500
501 Utf8Str& operator=(CBSTR that)
502 {
503 cleanup();
504 copyFrom(that);
505 return *this;
506 }
507
508 /**
509 * Intended to assign instances to |char *| out parameters from within the
510 * interface method. Transfers the ownership of the duplicated string to the
511 * caller.
512 *
513 * This allocates a single 0 byte in the target if the member string is empty.
514 *
515 * @remarks The returned string must be freed by RTStrFree, not RTMemFree.
516 */
517 const Utf8Str& cloneTo(char **pstr) const
518 {
519 if (pstr)
520 {
521 *pstr = RTStrDup(raw());
522#ifdef RT_EXCEPTIONS_ENABLED
523 if (!*pstr)
524 throw std::bad_alloc();
525#endif
526 }
527 return *this;
528 }
529
530 /**
531 * Intended to assign instances to |BSTR| out parameters from within the
532 * interface method. Transfers the ownership of the duplicated string to the
533 * caller.
534 *
535 * This allocates a single 0 byte in the target if the member string is empty.
536 */
537 const Utf8Str& cloneTo(BSTR *pstr) const
538 {
539 if (pstr)
540 {
541 Bstr bstr(c_str());
542 *pstr = NULL;
543 bstr.detachTo(pstr);
544 }
545 return *this;
546 }
547
548 /**
549 * Converts "this" to lower case by calling RTStrToLower().
550 * @return
551 */
552 Utf8Str& toLower();
553
554 /**
555 * Converts "this" to upper case by calling RTStrToUpper().
556 * @return
557 */
558 Utf8Str& toUpper();
559
560 /**
561 * Removes a trailing slash from the member string, if present.
562 * Calls RTPathStripTrailingSlash() without having to mess with mutableRaw().
563 */
564 void stripTrailingSlash();
565
566 /**
567 * Removes a trailing filename from the member string, if present.
568 * Calls RTPathStripFilename() without having to mess with mutableRaw().
569 */
570 void stripFilename();
571
572 /**
573 * Removes a trailing file name extension from the member string, if present.
574 * Calls RTPathStripExt() without having to mess with mutableRaw().
575 */
576 void stripExt();
577
578 /**
579 * Attempts to convert the member string into a 32-bit integer.
580 *
581 * @returns 32-bit unsigned number on success.
582 * @returns 0 on failure.
583 */
584 int toInt32() const
585 {
586 return RTStrToInt32(m_psz);
587 }
588
589 /**
590 * Attempts to convert the member string into an unsigned 32-bit integer.
591 *
592 * @returns 32-bit unsigned number on success.
593 * @returns 0 on failure.
594 */
595 int toUInt32() const
596 {
597 return RTStrToUInt32(m_psz);
598 }
599
600 /**
601 * Intended to pass instances as out (|char **|) parameters to methods. Takes
602 * the ownership of the returned data.
603 *
604 * @remarks See ministring::jolt().
605 */
606 char **asOutParam()
607 {
608 cleanup();
609 return &m_psz;
610 }
611
612 /**
613 * Static immutable null object. May be used for comparison purposes.
614 */
615 static const Utf8Str Null;
616
617protected:
618
619 /**
620 * As with the ministring::copyFrom() variants, this unconditionally
621 * sets the members to a copy of the given other strings and makes
622 * no assumptions about previous contents. This can therefore be used
623 * both in copy constructors, when member variables have no defined
624 * value, and in assignments after having called cleanup().
625 *
626 * This variant converts from a UTF-16 string, most probably from
627 * a Bstr assignment.
628 *
629 * @param rs
630 */
631 void copyFrom(CBSTR s)
632 {
633 if (s)
634 {
635 RTUtf16ToUtf8((PRTUTF16)s, &m_psz); /** @todo r=bird: This technically requires using RTStrFree, ministring::cleanup() uses RTMemFree. */
636#ifdef RT_EXCEPTIONS_ENABLED
637 if (!m_psz)
638 throw std::bad_alloc();
639#endif
640 m_cbLength = strlen(m_psz); /** @todo optimize by using a different RTUtf* function */
641 m_cbAllocated = m_cbLength + 1;
642 }
643 else
644 {
645 m_cbLength = 0;
646 m_cbAllocated = 0;
647 m_psz = NULL;
648 }
649 }
650
651 friend class Bstr; /* to access our raw_copy() */
652};
653
654// work around error C2593 of the stupid MSVC 7.x ambiguity resolver
655// @todo r=dj if I enable this I get about five warnings every time this header
656// is included, figure out what that is... for now I have modified the calling code instead
657// WORKAROUND_MSVC7_ERROR_C2593_FOR_BOOL_OP(Bstr)
658
659////////////////////////////////////////////////////////////////////////////////
660
661/**
662 * This class is a printf-like formatter for Utf8Str strings. Its purpose is
663 * to construct Utf8Str objects from a format string and a list of arguments
664 * for the format string.
665 *
666 * The usage of this class is like the following:
667 * <code>
668 * Utf8StrFmt string ("program name = %s", argv[0]);
669 * </code>
670 */
671class Utf8StrFmt : public Utf8Str
672{
673public:
674
675 /**
676 * Constructs a new string given the format string and the list
677 * of the arguments for the format string.
678 *
679 * @param format printf-like format string (in UTF-8 encoding)
680 * @param ... list of the arguments for the format string
681 */
682 explicit Utf8StrFmt(const char *format, ...)
683 {
684 va_list args;
685 va_start(args, format);
686 init(format, args);
687 va_end(args);
688 }
689
690protected:
691
692 Utf8StrFmt() {}
693
694 void init(const char *format, va_list args);
695
696private:
697
698 static DECLCALLBACK(size_t) strOutput(void *pvArg, const char *pachChars,
699 size_t cbChars);
700};
701
702/**
703 * This class is a vprintf-like formatter for Utf8Str strings. It is
704 * identical to Utf8StrFmt except that its constructor takes a va_list
705 * argument instead of ellipsis.
706 *
707 * Note that a separate class is necessary because va_list is defined as
708 * |char *| on most platforms. For this reason, if we had two overloaded
709 * constructors in Utf8StrFmt (one taking ellipsis and another one taking
710 * va_list) then composing a constructor call using exactly two |char *|
711 * arguments would cause the compiler to use the va_list overload instead of
712 * the ellipsis one which is obviously wrong. The compiler would choose
713 * va_list because ellipsis has the lowest rank when it comes to resolving
714 * overloads, as opposed to va_list which is an exact match for |char *|.
715 */
716class Utf8StrFmtVA : public Utf8StrFmt
717{
718public:
719
720 /**
721 * Constructs a new string given the format string and the list
722 * of the arguments for the format string.
723 *
724 * @param format printf-like format string (in UTF-8 encoding)
725 * @param args list of arguments for the format string
726 */
727 Utf8StrFmtVA(const char *format, va_list args) { init(format, args); }
728};
729
730/**
731 * The BstrFmt class is a shortcut to <tt>Bstr(Utf8StrFmt(...))</tt>.
732 */
733class BstrFmt : public Bstr
734{
735public:
736
737 /**
738 * Constructs a new string given the format string and the list of the
739 * arguments for the format string.
740 *
741 * @param aFormat printf-like format string (in UTF-8 encoding).
742 * @param ... List of the arguments for the format string.
743 */
744 explicit BstrFmt(const char *aFormat, ...)
745 {
746 va_list args;
747 va_start(args, aFormat);
748 copyFrom(Utf8StrFmtVA(aFormat, args).c_str());
749 va_end(args);
750 }
751};
752
753/**
754 * The BstrFmtVA class is a shortcut to <tt>Bstr(Utf8StrFmtVA(...))</tt>.
755 */
756class BstrFmtVA : public Bstr
757{
758public:
759
760 /**
761 * Constructs a new string given the format string and the list of the
762 * arguments for the format string.
763 *
764 * @param aFormat printf-like format string (in UTF-8 encoding).
765 * @param aArgs List of arguments for the format string
766 */
767 BstrFmtVA(const char *aFormat, va_list aArgs)
768 {
769 copyFrom(Utf8StrFmtVA(aFormat, aArgs).c_str());
770 }
771};
772
773} /* namespace com */
774
775#endif /* !___VBox_com_string_h */
776
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette