2 * Copyright (C) 2007 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * @author Gustavo Sverzut Barbieri <gustavo.barbieri@openbossa.org>
21 #include "lightmediascanner_charset_conv.h"
28 struct lms_charset_conv {
37 * Create a new charset conversion tool.
39 * Conversion tool will try to convert provided strings to UTF-8, just need
40 * to register known charsets with lms_charset_conv_add() and then call
43 * @return newly allocated conversion tool or NULL on error.
46 lms_charset_conv_new(void)
48 lms_charset_conv_t *lcc;
50 lcc = malloc(sizeof(*lcc));
56 lcc->check = iconv_open("UTF-8", "UTF-8");
57 if (lcc->check == (iconv_t)-1) {
58 perror("ERROR: could not create conversion checker");
62 lcc->fallback = iconv_open("UTF-8//IGNORE", "UTF-8");
63 if (lcc->fallback == (iconv_t)-1) {
64 perror("ERROR: could not create conversion fallback");
74 iconv_close(lcc->check);
82 * Free existing charset conversion tool.
84 * @param lcc existing Light Media Scanner charset conversion.
87 lms_charset_conv_free(lms_charset_conv_t *lcc)
94 iconv_close(lcc->check);
95 iconv_close(lcc->fallback);
97 for (i = 0; i < lcc->size; i++) {
98 iconv_close(lcc->convs[i]);
110 * Register new charset to conversion tool.
112 * @param lcc existing Light Media Scanner charset conversion.
113 * @param charset charset name as understood by iconv_open(3).
115 * @return On success 0 is returned.
118 lms_charset_conv_add(lms_charset_conv_t *lcc, const char *charset)
130 cd = iconv_open("UTF-8", charset);
131 if (cd == (iconv_t)-1) {
132 fprintf(stderr, "ERROR: could not add conversion charset '%s': %s\n",
133 charset, strerror(errno));
140 convs = realloc(lcc->convs, ns * sizeof(*convs));
144 lcc->convs[idx] = cd;
146 names = realloc(lcc->names, ns * sizeof(*names));
150 lcc->names[idx] = strdup(charset);
151 if (!lcc->names[idx])
164 _find(const lms_charset_conv_t *lcc, const char *charset)
168 for (i = 0; i < lcc->size; i++)
169 if (strcmp(lcc->names[i], charset) == 0)
176 * Forget about previously registered charset in conversion tool.
178 * @param lcc existing Light Media Scanner charset conversion.
179 * @param charset charset name.
181 * @return On success 0 is returned.
184 lms_charset_conv_del(lms_charset_conv_t *lcc, const char *charset)
196 idx = _find(lcc, charset);
198 fprintf(stderr, "ERROR: could not find charset '%s'\n", charset);
202 iconv_close(lcc->convs[idx]);
203 free(lcc->names[idx]);
206 for (; idx < lcc->size; idx++) {
207 lcc->convs[idx] = lcc->convs[idx + 1];
208 lcc->names[idx] = lcc->names[idx + 1];
211 convs = realloc(lcc->convs, lcc->size * sizeof(*convs));
215 perror("could not realloc 'convs'");
217 names = realloc(lcc->names, lcc->size * sizeof(*names));
221 perror("could not realloc 'names'");
227 _check(lms_charset_conv_t *lcc, const char *istr, unsigned int ilen, char *ostr, unsigned int olen)
229 char *inbuf, *outbuf;
230 size_t r, inlen, outlen;
232 inbuf = (char *)istr;
237 iconv(lcc->check, NULL, NULL, NULL, NULL);
238 r = iconv(lcc->check, &inbuf, &inlen, &outbuf, &outlen);
246 _conv(iconv_t cd, char **p_str, unsigned int *p_len, char *ostr, unsigned int olen)
248 char *inbuf, *outbuf;
249 size_t r, inlen, outlen;
256 iconv(cd, NULL, NULL, NULL, NULL);
257 r = iconv(cd, &inbuf, &inlen, &outbuf, &outlen);
261 *p_len = olen - outlen;
265 outbuf = realloc(*p_str, *p_len + 1);
271 (*p_str)[*p_len] = '\0';
277 * If required, do charset conversion to UTF-8.
279 * @param lcc existing Light Media Scanner charset conversion.
280 * @param p_str string to be converted.
281 * @param p_len string size.
283 * @return On success 0 is returned.
286 lms_charset_conv(lms_charset_conv_t *lcc, char **p_str, unsigned int *p_len)
297 if (!*p_str || !*p_len)
301 outstr = malloc(outlen + 1);
307 if (_check(lcc, *p_str, *p_len, outstr, outlen) == 0) {
312 for (i = 0; i < lcc->size; i++)
313 if (_conv(lcc->convs[i], p_str, p_len, outstr, outlen) == 0)
317 "WARNING: could not convert '%*s' to any charset, use fallback\n",
319 i = _conv(lcc->fallback, p_str, p_len, outstr, outlen);
321 memset(*p_str, '?', *p_len);
328 * Check if strings is not UTF-8 and conversion is required.
330 * @param lcc existing Light Media Scanner charset conversion.
331 * @param str string to be analysed.
332 * @param len string size.
334 * @return 0 if string is already UTF-8.
337 lms_charset_conv_check(lms_charset_conv_t *lcc, const char *str, unsigned int len)
348 outstr = malloc(outlen);
354 r = _check(lcc, str, len, outstr, outlen);