1 /***************************************************************************
2 copyright : (C) 2002 - 2008 by Scott Wheeler
3 email : wheeler@kde.org
4 ***************************************************************************/
6 /***************************************************************************
7 * This library is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU Lesser General Public License version *
9 * 2.1 as published by the Free Software Foundation. *
11 * This library is distributed in the hope that it will be useful, but *
12 * WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
14 * Lesser General Public License for more details. *
16 * You should have received a copy of the GNU Lesser General Public *
17 * License along with this library; if not, write to the Free Software *
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *
21 * Alternatively, this file is available under the Mozilla Public *
22 * License Version 1.1. You may obtain a copy of the License at *
23 * http://www.mozilla.org/MPL/ *
24 ***************************************************************************/
26 #ifndef TAGLIB_STRING_H
27 #define TAGLIB_STRING_H
29 #include "taglib_export.h"
31 #include "tbytevector.h"
37 * \relates TagLib::String
39 * Converts a TagLib::String to a QString without a requirement to link to Qt.
41 #define QStringToTString(s) TagLib::String(s.utf8().data(), TagLib::String::UTF8)
44 * \relates TagLib::String
46 * Converts a TagLib::String to a QString without a requirement to link to Qt.
48 #define TStringToQString(s) QString::fromUtf8(s.toCString(true))
52 //! A \e wide string class suitable for unicode.
55 * This is an implicitly shared \e wide string. For storage it uses
56 * TagLib::wstring, but as this is an <i>implementation detail</i> this of
57 * course could change. Strings are stored internally as UTF-16BE. (Without
58 * the BOM (Byte Order Mark)
60 * The use of implicit sharing means that copying a string is cheap, the only
61 * \e cost comes into play when the copy is modified. Prior to that the string
62 * just has a pointer to the data of the \e parent String. This also makes
63 * this class suitable as a function return type.
65 * In addition to adding implicit sharing, this class keeps track of four
66 * possible encodings, which are the four supported by the ID3v2 standard.
69 class TAGLIB_EXPORT String
73 #ifndef DO_NOT_DOCUMENT
74 typedef std::basic_string<wchar>::iterator Iterator;
75 typedef std::basic_string<wchar>::const_iterator ConstIterator;
79 * The four types of string encodings supported by the ID3v2 specification.
80 * ID3v1 is assumed to be Latin1 and Ogg Vorbis comments use UTF8.
84 * IS08859-1, or <i>Latin1</i> encoding. 8 bit characters.
88 * UTF16 with a <i>byte order mark</i>. 16 bit characters.
92 * UTF16 <i>big endian</i>. 16 bit characters. This is the encoding used
93 * internally by TagLib.
97 * UTF8 encoding. Characters are usually 8 bits but can be up to 32.
101 * UTF16 <i>little endian</i>. 16 bit characters.
107 * Constructs an empty String.
112 * Make a shallow, implicitly shared, copy of \a s. Because this is
113 * implicitly shared, this method is lightweight and suitable for
114 * pass-by-value usage.
116 String(const String &s);
119 * Makes a deep copy of the data in \a s.
121 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
122 * used with other codecs it will simply print a warning and exit.
124 String(const std::string &s, Type t = Latin1);
127 * Makes a deep copy of the data in \a s.
129 String(const wstring &s, Type t = UTF16BE);
132 * Makes a deep copy of the data in \a s.
134 String(const wchar_t *s, Type t = UTF16BE);
137 * Makes a deep copy of the data in \a c.
139 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
140 * used with other codecs it will simply print a warning and exit.
142 String(char c, Type t = Latin1);
145 * Makes a deep copy of the data in \a c.
147 String(wchar_t c, Type t = Latin1);
151 * Makes a deep copy of the data in \a s.
153 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
154 * used with other codecs it will simply print a warning and exit.
156 String(const char *s, Type t = Latin1);
159 * Makes a deep copy of the data in \a s.
161 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
162 * used with other codecs it will simply print a warning and exit.
164 String(const ByteVector &v, Type t = Latin1);
167 * Destroys this String instance.
172 * If \a unicode if false (the default) this will return a \e Latin1 encoded
173 * std::string. If it is true the returned std::wstring will be UTF-8
176 std::string to8Bit(bool unicode = false) const;
179 * Returns a wstring version of the TagLib string as a wide string.
181 wstring toWString() const;
184 * Creates and returns a C-String based on the data. This string is still
185 * owned by the String (class) and as such should not be deleted by the user.
187 * If \a unicode if false (the default) this string will be encoded in
188 * \e Latin1. If it is true the returned C-String will be UTF-8 encoded.
190 * This string remains valid until the String instance is destroyed or
191 * another export method is called.
193 * \warning This however has the side effect that this C-String will remain
194 * in memory <b>in addition to</b> other memory that is consumed by the
195 * String instance. So, this method should not be used on large strings or
196 * where memory is critical.
198 const char *toCString(bool unicode = false) const;
201 * Returns an iterator pointing to the beginning of the string.
206 * Returns a const iterator pointing to the beginning of the string.
208 ConstIterator begin() const;
211 * Returns an iterator pointing to the end of the string (the position
212 * after the last character).
217 * Returns a const iterator pointing to the end of the string (the position
218 * after the last character).
220 ConstIterator end() const;
223 * Finds the first occurrence of pattern \a s in this string starting from
224 * \a offset. If the pattern is not found, -1 is returned.
226 int find(const String &s, int offset = 0) const;
229 * Finds the last occurrence of pattern \a s in this string, searched backwards,
230 * either from the end of the string or starting from \a offset. If the pattern
231 * is not found, -1 is returned.
233 int rfind(const String &s, int offset = -1) const;
236 * Returns true if the strings starts with the substring \a s.
238 bool startsWith(const String &s) const;
241 * Extract a substring from this string starting at \a position and
242 * continuing for \a n characters.
244 String substr(uint position, uint n = 0xffffffff) const;
247 * Append \a s to the current string and return a reference to the current
250 String &append(const String &s);
253 * Returns an upper case version of the string.
255 * \warning This only works for the characters in US-ASCII, i.e. A-Z.
257 String upper() const;
260 * Returns the size of the string.
265 * Returns the length of the string. Equivalent to size().
270 * Returns true if the string is empty.
274 bool isEmpty() const;
277 * Returns true if this string is null -- i.e. it is a copy of the
278 * String::null string.
280 * \note A string can be empty and not null.
286 * Returns a ByteVector containing the string's data. If \a t is Latin1 or
287 * UTF8, this will return a vector of 8 bit characters, otherwise it will use
290 ByteVector data(Type t) const;
293 * Convert the string to an integer.
295 * Returns the integer if the conversion was successfull or 0 if the
296 * string does not represent a number.
298 // BIC: merge with the method below
302 * Convert the string to an integer.
304 * If the conversion was successfull, it sets the value of \a *ok to
305 * true and returns the integer. Otherwise it sets \a *ok to false
306 * and the result is undefined.
308 int toInt(bool *ok) const;
311 * Returns a string with the leading and trailing whitespace stripped.
313 String stripWhiteSpace() const;
316 * Returns true if the file only uses characters required by Latin1.
318 bool isLatin1() const;
321 * Returns true if the file only uses characters required by (7-bit) ASCII.
323 bool isAscii() const;
326 * Converts the base-10 integer \a n to a string.
328 static String number(int n);
331 * Returns a reference to the character at position \a i.
333 wchar &operator[](int i);
336 * Returns a const reference to the character at position \a i.
338 const wchar &operator[](int i) const;
341 * Compares each character of the String with each character of \a s and
342 * returns true if the strings match.
344 bool operator==(const String &s) const;
347 * Appends \a s to the end of the String.
349 String &operator+=(const String &s);
352 * Appends \a s to the end of the String.
354 String &operator+=(const wchar_t* s);
357 * Appends \a s to the end of the String.
359 String &operator+=(const char* s);
362 * Appends \a s to the end of the String.
364 String &operator+=(wchar_t c);
367 * Appends \a c to the end of the String.
369 String &operator+=(char c);
372 * Performs a shallow, implicitly shared, copy of \a s, overwriting the
373 * String's current data.
375 String &operator=(const String &s);
378 * Performs a deep copy of the data in \a s.
380 String &operator=(const std::string &s);
383 * Performs a deep copy of the data in \a s.
385 String &operator=(const wstring &s);
388 * Performs a deep copy of the data in \a s.
390 String &operator=(const wchar_t *s);
393 * Performs a deep copy of the data in \a s.
395 String &operator=(char c);
398 * Performs a deep copy of the data in \a s.
400 String &operator=(wchar_t c);
403 * Performs a deep copy of the data in \a s.
405 String &operator=(const char *s);
408 * Performs a deep copy of the data in \a v.
410 String &operator=(const ByteVector &v);
413 * To be able to use this class in a Map, this operator needed to be
414 * implemented. Returns true if \a s is less than this string in a bytewise
417 bool operator<(const String &s) const;
420 * A null string provided for convenience.
426 * If this String is being shared via implicit sharing, do a deep copy of the
427 * data and separate from the shared members. This should be called by all
428 * non-const subclass members.
434 * This checks to see if the string is in \e UTF-16 (with BOM) or \e UTF-8
435 * format and if so converts it to \e UTF-16BE for internal use. \e Latin1
436 * does not require conversion since it is a subset of \e UTF-16BE and
437 * \e UTF16-BE requires no conversion since it is used internally.
439 void prepare(Type t);
448 * \relates TagLib::String
450 * Concatenates \a s1 and \a s2 and returns the result as a string.
452 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2);
455 * \relates TagLib::String
457 * Concatenates \a s1 and \a s2 and returns the result as a string.
459 TAGLIB_EXPORT const TagLib::String operator+(const char *s1, const TagLib::String &s2);
462 * \relates TagLib::String
464 * Concatenates \a s1 and \a s2 and returns the result as a string.
466 TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const char *s2);
470 * \relates TagLib::String
472 * Send the string to an output stream.
474 TAGLIB_EXPORT std::ostream &operator<<(std::ostream &s, const TagLib::String &str);