00001 #ifndef FDO_STRINGP_H 00002 #define FDO_STRINGP_H 1 00003 00004 // 00005 // Copyright (C) 2004-2006 Autodesk, Inc. 00006 // 00007 // This library is free software; you can redistribute it and/or 00008 // modify it under the terms of version 2.1 of the GNU Lesser 00009 // General Public License as published by the Free Software Foundation. 00010 // 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // Lesser General Public License for more details. 00015 // 00016 // You should have received a copy of the GNU Lesser General Public 00017 // License along with this library; if not, write to the Free Software 00018 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00019 // 00020 00021 #ifdef _WIN32 00022 #pragma once 00023 #endif 00024 00025 /// \brief 00026 /// FdoStringP is smart pointer wrapper around strings. 00027 /// Provides memory management,plus conversion of strings between unicode and utf8 00028 /// and various string manipulation functions 00029 class FdoStringP 00030 { 00031 public: 00032 /// \brief 00033 /// Creates an empty string with value L"". 00034 /// 00035 FDO_API_COMMON FdoStringP(void); 00036 00037 /// \brief 00038 /// Creates a string from another string pointer. 00039 /// 00040 /// \param oValue 00041 /// Source string to set this string from. 00042 /// 00043 FDO_API_COMMON FdoStringP(const FdoStringP& oValue); 00044 00045 /// \brief 00046 /// Creates a string from a unicode (FdoString) string 00047 /// 00048 /// \param wValue 00049 /// the unicode string 00050 /// \param bAttach 00051 /// true: just point to the given string without copying it to an 00052 /// internal buffer. The caller is responsible for ensuring that 00053 /// the given string is not deleted before this object. 00054 /// false: copy the given string to an internal buffer 00055 /// 00056 FDO_API_COMMON FdoStringP( FdoString* wValue, FdoBoolean bAttach = false ); 00057 00058 /// \brief 00059 /// Creates a string from a utf8 string 00060 /// 00061 /// \param sValue 00062 /// the utf8 string 00063 /// 00064 FDO_API_COMMON FdoStringP( const char* sValue ); 00065 00066 /// \brief 00067 /// Destroys this string and releases its contents 00068 /// 00069 FDO_API_COMMON ~FdoStringP(void); 00070 00071 /// Operators to copy from other strings in various forms 00072 00073 /// \brief 00074 /// Copies a string from a string pointer. 00075 /// 00076 /// \param oString 00077 /// Source string to copy from. 00078 /// 00079 /// \return 00080 /// Returns the copied string 00081 /// 00082 FDO_API_COMMON FdoStringP& operator=( const FdoStringP& oString ); 00083 00084 /// \brief 00085 /// Copies a string from a utf8 string. 00086 /// 00087 /// \param sString 00088 /// Utf8 string to copy from. 00089 /// 00090 /// \return 00091 /// Returns the copied string 00092 /// 00093 FDO_API_COMMON FdoStringP& operator=( const char* sString ); 00094 00095 /// \brief 00096 /// Copies a string from a unicode string. 00097 /// 00098 /// \param wString 00099 /// Unicode string to copy from. 00100 /// 00101 /// \return 00102 /// Returns the copied string 00103 /// 00104 FDO_API_COMMON FdoStringP& operator=( FdoString* wString ); 00105 00106 /// Various operators for concatenating str2 to this. 00107 00108 /// \brief 00109 /// Appends a unicode string onto this string. 00110 /// When performance is critical, += should be used whenever possible 00111 /// since it is up to 5X faster than this operator. 00112 /// 00113 /// \param str2 00114 /// Unicode string to concatenate 00115 /// 00116 /// \return 00117 /// Returns the concatenation of this string plus str2 00118 /// 00119 FDO_API_COMMON const FdoStringP operator+( FdoString* str2 ) const; 00120 00121 /// \brief 00122 /// Appends the contents of another string onto this string. 00123 /// When performance is critical, += should be used whenever possible 00124 /// since it is up to 5X faster than this operator. 00125 /// 00126 /// \param str2 00127 /// string to concatenate 00128 /// 00129 /// \return 00130 /// Returns the concatenation of this string plus str2 00131 /// 00132 FDO_API_COMMON const FdoStringP operator+( const FdoStringP str2 ) const 00133 { 00134 return( (*this) + (FdoString*) str2 ); 00135 } 00136 00137 /// \brief 00138 /// Appends a unicode string onto this string. 00139 /// 00140 /// \param str2 00141 /// Unicode string to concatenate 00142 /// 00143 /// \return 00144 /// Returns the concatenation of this string plus str2 00145 /// 00146 FDO_API_COMMON FdoStringP operator+=( FdoString* str2 ); 00147 00148 /// \brief 00149 /// Appends the contents of another string onto this string. 00150 /// 00151 /// \param str2 00152 /// string to concatenate 00153 /// 00154 /// \return 00155 /// Returns the concatenation of this string plus str2 00156 /// 00157 FDO_API_COMMON FdoStringP operator+=( const FdoStringP str2 ) 00158 { 00159 (*this) = (*this) + (FdoString*) str2; 00160 return( *this ); 00161 } 00162 00163 /// Various comparison operators. 00164 00165 /// \brief 00166 /// Greater than comparison operator 00167 /// 00168 /// \param str2 00169 /// string to compare 00170 /// 00171 /// \return 00172 /// Returns true if this string is lexically greater than str2 00173 /// 00174 FDO_API_COMMON bool operator>( const FdoStringP str2 ) const 00175 { 00176 return( wcscmp( *this, str2 ) > 0 ); 00177 } 00178 00179 /// \brief 00180 /// Greater than or equal comparison operator 00181 /// 00182 /// \param str2 00183 /// string to compare 00184 /// 00185 /// \return 00186 /// Returns true if this string is lexically greater or equal to str2 00187 /// 00188 FDO_API_COMMON bool operator>=( const FdoStringP str2 ) const 00189 { 00190 return( wcscmp( *this, str2 ) >= 0 ); 00191 } 00192 00193 /// \brief 00194 /// Equals comparison operator 00195 /// 00196 /// \param str2 00197 /// string to compare 00198 /// 00199 /// \return 00200 /// Returns true if this string is lexically equal to str2 00201 /// 00202 FDO_API_COMMON bool operator==( const FdoStringP str2 ) const 00203 { 00204 return( wcscmp( *this, str2 ) == 0 ); 00205 } 00206 00207 /// \brief 00208 /// Equals comparison operator 00209 /// 00210 /// \param str2 00211 /// Unicode string to compare 00212 /// 00213 /// \return 00214 /// Returns true if this string is lexically equal to str2 00215 /// 00216 FDO_API_COMMON bool operator==( FdoString* str2 ) const 00217 { 00218 return( wcscmp( *this, str2 ? str2 : L"" ) == 0 ); 00219 } 00220 00221 /// \brief 00222 /// Not Equals comparison operator 00223 /// 00224 /// \param str2 00225 /// Unicode string to compare 00226 /// 00227 /// \return 00228 /// Returns true if this string is not lexically equal to str2 00229 /// 00230 FDO_API_COMMON bool operator!=( FdoString* str2 ) const 00231 { 00232 return( !((*this) == str2) ); 00233 } 00234 00235 /// \brief 00236 /// Less than or Equals comparison operator 00237 /// 00238 /// \param str2 00239 /// String to compare 00240 /// 00241 /// \return 00242 /// Returns true if this string is lexically less than or equal to str2 00243 /// 00244 FDO_API_COMMON bool operator<=( const FdoStringP str2 ) const 00245 { 00246 return( wcscmp( *this, str2 ) <= 0 ); 00247 } 00248 00249 /// \brief 00250 /// Less than comparison operator 00251 /// 00252 /// \param str2 00253 /// String to compare 00254 /// 00255 /// \return 00256 /// Returns true if this string is lexically less than str2 00257 /// 00258 FDO_API_COMMON bool operator<( const FdoStringP str2 ) const 00259 { 00260 return( wcscmp( *this, str2 ) < 0 ); 00261 } 00262 00263 /// \brief 00264 /// Case-insensitive string comparison. 00265 /// 00266 /// \param str2 00267 /// String to compare. 00268 /// 00269 /// \return 00270 /// Returns: 00271 /// <ul> 00272 /// <li>-1 if this is less than str2 00273 /// <li>0 if this is equal to str2 00274 /// <li>1 if this is greater than str2 00275 /// </ul> 00276 /// 00277 /// 00278 FDO_API_COMMON int ICompare( const FdoStringP str2 ) const; 00279 00280 00281 /// \return 00282 /// Returns the Unicode version of this string that the caller does not have 00283 /// to destroy 00284 /// 00285 FDO_API_COMMON operator FdoString*( ) const; 00286 00287 00288 00289 /// \return 00290 /// Returns the UTF8 version of this string that the caller does not have 00291 /// to destroy 00292 /// 00293 FDO_API_COMMON operator const char*( ) const; 00294 00295 /// \return 00296 /// Returns the length (in wide characters) of the Unicode version of this string 00297 /// 00298 FDO_API_COMMON size_t GetLength() const; 00299 00300 /// \brief 00301 /// Gets the characters to the left of the given sub-string. 00302 /// 00303 /// \param delimiter 00304 /// The substring. 00305 /// 00306 /// \return 00307 /// Returns all of the characters 00308 /// to the left of the first occurance of the delimiter string. 00309 /// All of this string is returned if the delimiter is 00310 /// not in this string. An empty string(L"") is returned if the delimiter is 00311 /// is NULL or L"". 00312 /// 00313 00314 FDO_API_COMMON FdoStringP Left( FdoString* delimiter ) const; 00315 00316 /// \brief 00317 /// Gets the characters to the right of the given sub-string. 00318 /// 00319 /// \param delimiter 00320 /// The substring. 00321 /// 00322 /// \return 00323 /// Returns all of the characters 00324 /// to the right of the first occurance of the delimiter string. 00325 /// an empty string(L"") is returned if the delimiter is 00326 /// not in this string. All of this string is returned if the delimiter is 00327 /// NULL or L"". 00328 /// 00329 00330 FDO_API_COMMON FdoStringP Right( FdoString* delimiter ) const; 00331 00332 /// \brief 00333 /// Extracts a sub-string of this string 00334 /// 00335 /// \param first 00336 /// the 0-based position of the first character to return. 00337 /// Negative values are treated as 0. 00338 /// \param count 00339 /// the number of characters to return. 00340 /// If negative then all characters up to the end of this string are returned. 00341 /// \param useUTF8 00342 /// true: perform Mid against the UTF8 representation of this string. 00343 /// false (default): perform it against the Unicode representation. 00344 /// 00345 /// \return 00346 /// The extracted sub-string 00347 /// 00348 FDO_API_COMMON FdoStringP Mid( size_t first, size_t count, bool useUTF8 = false ); 00349 00350 /// \brief 00351 /// Replaces sub-strings. 00352 /// 00353 /// \param pOld 00354 /// the sub-string to replace 00355 /// \param pNew 00356 /// the string to replace pOld by 00357 /// 00358 /// \return 00359 /// Returns a copy of this string, with all occurrences of pOld 00360 /// replaced by pNew. This string itself is not modified 00361 /// 00362 FDO_API_COMMON FdoStringP Replace( FdoString* pOld, FdoString* pNew ) const; 00363 00364 /// \return 00365 /// Returns a copy of this string with all characters in upper case. 00366 /// 00367 FDO_API_COMMON FdoStringP Upper() const; 00368 00369 /// \return 00370 /// Returns a copy of this string with all characters in lower case. 00371 /// 00372 FDO_API_COMMON FdoStringP Lower() const; 00373 00374 /// \brief 00375 /// Checks for a sub-string. 00376 /// 00377 /// \param subString 00378 /// the sub-string to check 00379 /// 00380 /// \return 00381 /// Returns true if this string has at least 1 occurrence of subString 00382 /// 00383 FDO_API_COMMON bool Contains( FdoString* subString ) const; 00384 00385 /// \return 00386 /// Returns true if this string represents a number. 00387 /// 00388 FDO_API_COMMON bool IsNumber() const; 00389 00390 /// \brief 00391 /// Converts this string to a long integer. 00392 /// 00393 /// \return 00394 /// Returns the long integer (0 if the string is not numeric). 00395 /// 00396 FDO_API_COMMON long ToLong() const; 00397 00398 /// \brief 00399 /// Converts this string to a double precision number. 00400 /// 00401 /// \return 00402 /// Returns double (0 if the string is not numeric). 00403 /// 00404 FDO_API_COMMON FdoDouble ToDouble() const; 00405 00406 /// \brief 00407 /// Converts this string to a boolean. 00408 /// 00409 /// \param defaultValue 00410 /// value returned when string is not recognized as boolean 00411 /// 00412 /// \return 00413 /// Returns true if the string in lower case is "t", "true", "y", "yes", or "1". 00414 /// Returns false if the string in lower case is "f", "false", "n", "no", or "0". 00415 /// Otherwise returns defaultValue. 00416 /// 00417 FDO_API_COMMON FdoBoolean ToBoolean(FdoBoolean defaultValue=false ) const; 00418 00419 /// \brief 00420 /// Create a formatted string. 00421 /// 00422 /// \param wValue 00423 /// the formatting template. Can contain 00424 /// "sprintf" style formatting specs. 00425 /// <param name="..."> 00426 /// substitution parms to format into string. 00427 /// wValue must have one formatting spec per parm. 00428 /// </param> 00429 /// 00430 /// \return 00431 /// Returns the formatted string. 00432 /// 00433 FDO_API_COMMON static FdoStringP Format( FdoString* wValue, ... ); 00434 00435 /// \brief 00436 /// Constant representing a zero-length string. 00437 /// 00438 FDO_API_COMMON static const wchar_t* mEmptyString; 00439 00440 /// \brief 00441 /// Constant representing a null string. 00442 /// 00443 FDO_API_COMMON static const wchar_t* mNullString; 00444 00445 /// \cond DOXYGEN-IGNORE 00446 00447 /// Utility function for converting from unicode to utf8. 00448 /// returned value is allocated and must be deleted by caller. 00449 FDO_API_COMMON static int Utf8FromUnicode( 00450 const wchar_t * Wtext, 00451 int in_size, 00452 char * str_out, 00453 int out_max_size, 00454 bool thrown_exception = true 00455 ); 00456 00457 /// Utility function for converting from utf8 to unicode. 00458 /// returned value is allocated and must be deleted by caller. 00459 FDO_API_COMMON static int Utf8ToUnicode( 00460 const char * str_in, 00461 int in_size, 00462 wchar_t * Wtext, 00463 int out_size, 00464 bool thrown_exception = true 00465 ); 00466 00467 /// Utility function for converting from unicode to utf8. 00468 /// returned value is allocated and must be deleted by caller. 00469 FDO_API_COMMON static int Utf8FromUnicode( 00470 const wchar_t * Wtext, 00471 char * str_out, 00472 int out_max_size, 00473 bool thrown_exception = true 00474 ); 00475 00476 /// Utility function for converting from utf8 to unicode. 00477 /// returned value is allocated and must be deleted by caller. 00478 FDO_API_COMMON static int Utf8ToUnicode( 00479 const char * str_in, 00480 wchar_t * Wtext, 00481 int out_size, 00482 bool thrown_exception = true 00483 ); 00484 00485 /// Return the length (in Unicode characters) of a UTF8 string 00486 static int Utf8Len(const char *utf8String); 00487 /// \endcond 00488 00489 private: 00490 00491 // Creates an FdoStringP from a null-terminated list of strings. 00492 // The strings are concatenated together 00493 FdoStringP( FdoString** values ); 00494 00495 /// structure used to convert between Unicode and UTF8 00496 typedef struct 00497 { 00498 int cmask; 00499 int cval; 00500 int shift; 00501 long lmask; 00502 long lval; 00503 } Tab; 00504 00505 // Descriptor containing refcount and size for the string buffer 00506 typedef struct 00507 { 00508 long mRefCount; 00509 size_t mBufSize; // size in # of wide characters 00510 } Descriptor; 00511 00512 /// General functions to set this object's string value. 00513 void SetString(const FdoStringP& oValue); 00514 void SetString(FdoString* wValue, FdoBoolean bAttach = false); 00515 void SetString(const char* sValue); 00516 void SetString(FdoString** values); 00517 00518 void SetSingle() const; 00519 00520 /// Caller is responsible for deleting the returned string 00521 wchar_t* _copyAsWChar( ) const; 00522 char* _copyAsChar( ) const; 00523 00524 /// Refcount functions. Refcount is the number of 00525 /// FdoStringP objects that point to a particular 00526 /// string buffer. 00527 void AddRef(); 00528 void Release(); 00529 00530 // Allocate a fresh buffer big enough to hold the given number of characters. 00531 // Current buffer is re-used if big enough and not referenced by any other FdoStringP 00532 void AllocateBuffer( size_t bufSize ); 00533 00534 // Refcount setter and getter 00535 void SetRefCount( long refCount ); 00536 long GetRefCount(); 00537 00538 // Buffer size (in characters) setter and getter. 00539 void SetBufSize( size_t bufSize ); 00540 size_t GetBufSize(); 00541 00542 /// String buffer 00543 00544 /// wide-char string 00545 wchar_t* mwString; 00546 /// utf8 string 00547 char* msString; 00548 /// Buffer - descriptor + wide-char string 00549 Descriptor* mBuffer; 00550 00551 /// table for converting between UTF8 and Unicode. 00552 static Tab tab[]; 00553 }; 00554 00555 #endif 00556 00557