vcs.maemo.org Git - someplayer/blob - src/taglib/toolkit/tstring.h

   1 /***************************************************************************
   2     copyright            : (C) 2002 - 2008 by Scott Wheeler
   3     email                : wheeler@kde.org
   4  ***************************************************************************/
   5
   6 /***************************************************************************
   7  *   This library is free software; you can redistribute it and/or modify  *
   8  *   it under the terms of the GNU Lesser General Public License version   *
   9  *   2.1 as published by the Free Software Foundation.                     *
  10  *                                                                         *
  11  *   This library is distributed in the hope that it will be useful, but   *
  12  *   WITHOUT ANY WARRANTY; without even the implied warranty of            *
  13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *
  14  *   Lesser General Public License for more details.                       *
  15  *                                                                         *
  16  *   You should have received a copy of the GNU Lesser General Public      *
  17  *   License along with this library; if not, write to the Free Software   *
  18  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  *
  19  *   USA                                                                   *
  20  *                                                                         *
  21  *   Alternatively, this file is available under the Mozilla Public        *
  22  *   License Version 1.1.  You may obtain a copy of the License at         *
  23  *   http://www.mozilla.org/MPL/                                           *
  24  ***************************************************************************/
  25
  26 #ifndef TAGLIB_STRING_H
  27 #define TAGLIB_STRING_H
  28
  29 #include "taglib_export.h"
  30 #include "taglib.h"
  31 #include "tbytevector.h"
  32
  33 #include <string>
  34 #include <iostream>
  35
  36 /*!
  37  * \relates TagLib::String
  38  *
  39  * Converts a TagLib::String to a QString without a requirement to link to Qt.
  40  */
  41 #define QStringToTString(s) TagLib::String(s.utf8().data(), TagLib::String::UTF8)
  42
  43 /*!
  44  * \relates TagLib::String
  45  *
  46  * Converts a TagLib::String to a QString without a requirement to link to Qt.
  47  */
  48 #define TStringToQString(s) QString::fromUtf8(s.toCString(true))
  49
  50 namespace TagLib {
  51
  52   //! A \e wide string class suitable for unicode.
  53
  54   /*!
  55    * This is an implicitly shared \e wide string.  For storage it uses
  56    * TagLib::wstring, but as this is an <i>implementation detail</i> this of
  57    * course could change.  Strings are stored internally as UTF-16BE.  (Without
  58    * the BOM (Byte Order Mark)
  59    *
  60    * The use of implicit sharing means that copying a string is cheap, the only
  61    * \e cost comes into play when the copy is modified.  Prior to that the string
  62    * just has a pointer to the data of the \e parent String.  This also makes
  63    * this class suitable as a function return type.
  64    *
  65    * In addition to adding implicit sharing, this class keeps track of four
  66    * possible encodings, which are the four supported by the ID3v2 standard.
  67    */
  68
  69   class TAGLIB_EXPORT String
  70   {
  71   public:
  72
  73 #ifndef DO_NOT_DOCUMENT
  74     typedef std::basic_string<wchar>::iterator Iterator;
  75     typedef std::basic_string<wchar>::const_iterator ConstIterator;
  76 #endif
  77
  78     /**
  79      * The four types of string encodings supported by the ID3v2 specification.
  80      * ID3v1 is assumed to be Latin1 and Ogg Vorbis comments use UTF8.
  81      */
  82     enum Type {
  83       /*!
  84        * IS08859-1, or <i>Latin1</i> encoding.  8 bit characters.
  85        */
  86       Latin1 = 0,
  87       /*!
  88        * UTF16 with a <i>byte order mark</i>.  16 bit characters.
  89        */
  90       UTF16 = 1,
  91       /*!
  92        * UTF16 <i>big endian</i>.  16 bit characters.  This is the encoding used
  93        * internally by TagLib.
  94        */
  95       UTF16BE = 2,
  96       /*!
  97        * UTF8 encoding.  Characters are usually 8 bits but can be up to 32.
  98        */
  99       UTF8 = 3,
 100       /*!
 101        * UTF16 <i>little endian</i>.  16 bit characters.
 102        */
 103       UTF16LE = 4
 104     };
 105
 106     /*!
 107      * Constructs an empty String.
 108      */
 109     String();
 110
 111     /*!
 112      * Make a shallow, implicitly shared, copy of \a s.  Because this is
 113      * implicitly shared, this method is lightweight and suitable for
 114      * pass-by-value usage.
 115      */
 116     String(const String &s);
 117
 118     /*!
 119      * Makes a deep copy of the data in \a s.
 120      *
 121      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
 122      * used with other codecs it will simply print a warning and exit.
 123      */
 124     String(const std::string &s, Type t = Latin1);
 125
 126     /*!
 127      * Makes a deep copy of the data in \a s.
 128      */
 129     String(const wstring &s, Type t = UTF16BE);
 130
 131     /*!
 132      * Makes a deep copy of the data in \a s.
 133      */
 134     String(const wchar_t *s, Type t = UTF16BE);
 135
 136     /*!
 137      * Makes a deep copy of the data in \a c.
 138      *
 139      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
 140      * used with other codecs it will simply print a warning and exit.
 141      */
 142     String(char c, Type t = Latin1);
 143
 144     /*!
 145      * Makes a deep copy of the data in \a c.
 146      */
 147     String(wchar_t c, Type t = Latin1);
 148
 149
 150     /*!
 151      * Makes a deep copy of the data in \a s.
 152      *
 153      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
 154      * used with other codecs it will simply print a warning and exit.
 155      */
 156     String(const char *s, Type t = Latin1);
 157
 158     /*!
 159      * Makes a deep copy of the data in \a s.
 160      *
 161      * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
 162      * used with other codecs it will simply print a warning and exit.
 163      */
 164     String(const ByteVector &v, Type t = Latin1);
 165
 166     /*!
 167      * Destroys this String instance.
 168      */
 169     virtual ~String();
 170
 171     /*!
 172      * If \a unicode if false (the default) this will return a \e Latin1 encoded
 173      * std::string.  If it is true the returned std::wstring will be UTF-8
 174      * encoded.
 175      */
 176     std::string to8Bit(bool unicode = false) const;
 177
 178     /*!
 179      * Returns a wstring version of the TagLib string as a wide string.
 180      */
 181     wstring toWString() const;
 182
 183     /*!
 184      * Creates and returns a C-String based on the data.  This string is still
 185      * owned by the String (class) and as such should not be deleted by the user.
 186      *
 187      * If \a unicode if false (the default) this string will be encoded in
 188      * \e Latin1.  If it is true the returned C-String will be UTF-8 encoded.
 189      *
 190      * This string remains valid until the String instance is destroyed or
 191      * another export method is called.
 192      *
 193      * \warning This however has the side effect that this C-String will remain
 194      * in memory <b>in addition to</b> other memory that is consumed by the
 195      * String instance.  So, this method should not be used on large strings or
 196      * where memory is critical.
 197      */
 198     const char *toCString(bool unicode = false) const;
 199
 200     /*!
 201      * Returns an iterator pointing to the beginning of the string.
 202      */
 203     Iterator begin();
 204
 205     /*!
 206      * Returns a const iterator pointing to the beginning of the string.
 207      */
 208     ConstIterator begin() const;
 209
 210     /*!
 211      * Returns an iterator pointing to the end of the string (the position
 212      * after the last character).
 213      */
 214     Iterator end();
 215
 216     /*!
 217      * Returns a const iterator pointing to the end of the string (the position
 218      * after the last character).
 219      */
 220     ConstIterator end() const;
 221
 222     /*!
 223      * Finds the first occurrence of pattern \a s in this string starting from
 224      * \a offset.  If the pattern is not found, -1 is returned.
 225      */
 226     int find(const String &s, int offset = 0) const;
 227
 228     /*!
 229      * Finds the last occurrence of pattern \a s in this string, searched backwards,
 230      * either from the end of the string or starting from \a offset. If the pattern
 231      * is not found, -1 is returned.
 232      */
 233     int rfind(const String &s, int offset = -1) const;
 234
 235     /*!
 236      * Returns true if the strings starts with the substring \a s.
 237      */
 238     bool startsWith(const String &s) const;
 239
 240     /*!
 241      * Extract a substring from this string starting at \a position and
 242      * continuing for \a n characters.
 243      */
 244     String substr(uint position, uint n = 0xffffffff) const;
 245
 246     /*!
 247      * Append \a s to the current string and return a reference to the current
 248      * string.
 249      */
 250     String &append(const String &s);
 251
 252     /*!
 253      * Returns an upper case version of the string.
 254      *
 255      * \warning This only works for the characters in US-ASCII, i.e. A-Z.
 256      */
 257     String upper() const;
 258
 259     /*!
 260      * Returns the size of the string.
 261      */
 262     uint size() const;
 263
 264     /*!
 265      * Returns the length of the string.  Equivalent to size().
 266      */
 267     uint length() const;
 268
 269     /*!
 270      * Returns true if the string is empty.
 271      *
 272      * \see isNull()
 273      */
 274     bool isEmpty() const;
 275
 276     /*!
 277      * Returns true if this string is null -- i.e. it is a copy of the
 278      * String::null string.
 279      *
 280      * \note A string can be empty and not null.
 281      * \see isEmpty()
 282      */
 283     bool isNull() const;
 284
 285     /*!
 286      * Returns a ByteVector containing the string's data.  If \a t is Latin1 or
 287      * UTF8, this will return a vector of 8 bit characters, otherwise it will use
 288      * 16 bit characters.
 289      */
 290     ByteVector data(Type t) const;
 291
 292     /*!
 293      * Convert the string to an integer.
 294      *
 295      * Returns the integer if the conversion was successfull or 0 if the
 296      * string does not represent a number.
 297      */
 298     // BIC: merge with the method below
 299     int toInt() const;
 300
 301     /*!
 302      * Convert the string to an integer.
 303      *
 304      * If the conversion was successfull, it sets the value of \a *ok to
 305      * true and returns the integer. Otherwise it sets \a *ok to false
 306      * and the result is undefined.
 307      */
 308     int toInt(bool *ok) const;
 309
 310     /*!
 311      * Returns a string with the leading and trailing whitespace stripped.
 312      */
 313     String stripWhiteSpace() const;
 314
 315     /*!
 316      * Returns true if the file only uses characters required by Latin1.
 317      */
 318     bool isLatin1() const;
 319
 320     /*!
 321      * Returns true if the file only uses characters required by (7-bit) ASCII.
 322      */
 323     bool isAscii() const;
 324
 325     /*!
 326      * Converts the base-10 integer \a n to a string.
 327      */
 328     static String number(int n);
 329
 330     /*!
 331      * Returns a reference to the character at position \a i.
 332      */
 333     wchar &operator[](int i);
 334
 335     /*!
 336      * Returns a const reference to the character at position \a i.
 337      */
 338     const wchar &operator[](int i) const;
 339
 340     /*!
 341      * Compares each character of the String with each character of \a s and
 342      * returns true if the strings match.
 343      */
 344     bool operator==(const String &s) const;
 345
 346     /*!
 347      * Appends \a s to the end of the String.
 348      */
 349     String &operator+=(const String &s);
 350
 351     /*!
 352      * Appends \a s to the end of the String.
 353      */
 354     String &operator+=(const wchar_t* s);
 355
 356     /*!
 357      * Appends \a s to the end of the String.
 358      */
 359     String &operator+=(const char* s);
 360
 361     /*!
 362      * Appends \a s to the end of the String.
 363      */
 364     String &operator+=(wchar_t c);
 365
 366     /*!
 367      * Appends \a c to the end of the String.
 368      */
 369     String &operator+=(char c);
 370
 371     /*!
 372      * Performs a shallow, implicitly shared, copy of \a s, overwriting the
 373      * String's current data.
 374      */
 375     String &operator=(const String &s);
 376
 377     /*!
 378      * Performs a deep copy of the data in \a s.
 379      */
 380     String &operator=(const std::string &s);
 381
 382     /*!
 383      * Performs a deep copy of the data in \a s.
 384      */
 385     String &operator=(const wstring &s);
 386
 387     /*!
 388      * Performs a deep copy of the data in \a s.
 389      */
 390     String &operator=(const wchar_t *s);
 391
 392     /*!
 393      * Performs a deep copy of the data in \a s.
 394      */
 395     String &operator=(char c);
 396
 397     /*!
 398      * Performs a deep copy of the data in \a s.
 399      */
 400     String &operator=(wchar_t c);
 401
 402     /*!
 403      * Performs a deep copy of the data in \a s.
 404      */
 405     String &operator=(const char *s);
 406
 407     /*!
 408      * Performs a deep copy of the data in \a v.
 409      */
 410     String &operator=(const ByteVector &v);
 411
 412     /*!
 413      * To be able to use this class in a Map, this operator needed to be
 414      * implemented.  Returns true if \a s is less than this string in a bytewise
 415      * comparison.
 416      */
 417     bool operator<(const String &s) const;
 418
 419     /*!
 420      * A null string provided for convenience.
 421      */
 422     static String null;
 423
 424   protected:
 425     /*!
 426      * If this String is being shared via implicit sharing, do a deep copy of the
 427      * data and separate from the shared members.  This should be called by all
 428      * non-const subclass members.
 429      */
 430     void detach();
 431
 432   private:
 433     /*!
 434      * This checks to see if the string is in \e UTF-16 (with BOM) or \e UTF-8
 435      * format and if so converts it to \e UTF-16BE for internal use.  \e Latin1
 436      * does not require conversion since it is a subset of \e UTF-16BE and
 437      * \e UTF16-BE requires no conversion since it is used internally.
 438      */
 439     void prepare(Type t);
 440
 441     class StringPrivate;
 442     StringPrivate *d;
 443   };
 444
 445 }
 446
 447 /*!
 448  * \relates TagLib::String
 449  *
 450  * Concatenates \a s1 and \a s2 and returns the result as a string.
 451  */
 452 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2);
 453
 454 /*!
 455  * \relates TagLib::String
 456  *
 457  * Concatenates \a s1 and \a s2 and returns the result as a string.
 458  */
 459 TAGLIB_EXPORT const TagLib::String operator+(const char *s1, const TagLib::String &s2);
 460
 461 /*!
 462  * \relates TagLib::String
 463  *
 464  * Concatenates \a s1 and \a s2 and returns the result as a string.
 465  */
 466 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const char *s2);
 467
 468
 469 /*!
 470  * \relates TagLib::String
 471  *
 472  * Send the string to an output stream.
 473  */
 474 TAGLIB_EXPORT std::ostream &operator<<(std::ostream &s, const TagLib::String &str);
 475
 476 #endif