1 #include "lightmediascanner_charset_conv.h"
8 struct lms_charset_conv {
17 * Create a new charset conversion tool.
19 * Conversion tool will try to convert provided strings to UTF-8, just need
20 * to register known charsets with lms_charset_conv_add() and then call
23 * @return newly allocated conversion tool or NULL on error.
26 lms_charset_conv_new(void)
28 lms_charset_conv_t *lcc;
30 lcc = malloc(sizeof(*lcc));
36 lcc->check = iconv_open("UTF-8", "UTF-8");
37 if (lcc->check == (iconv_t)-1) {
38 perror("ERROR: could not create conversion checker");
42 lcc->fallback = iconv_open("UTF-8//IGNORE", "UTF-8");
43 if (lcc->fallback == (iconv_t)-1) {
44 perror("ERROR: could not create conversion fallback");
54 iconv_close(lcc->check);
62 * Free existing charset conversion tool.
64 * @param lcc existing Light Media Scanner charset conversion.
67 lms_charset_conv_free(lms_charset_conv_t *lcc)
74 iconv_close(lcc->check);
75 iconv_close(lcc->fallback);
77 for (i = 0; i < lcc->size; i++) {
78 iconv_close(lcc->convs[i]);
90 * Register new charset to conversion tool.
92 * @param lcc existing Light Media Scanner charset conversion.
93 * @param charset charset name as understood by iconv_open(3).
95 * @return On success 0 is returned.
98 lms_charset_conv_add(lms_charset_conv_t *lcc, const char *charset)
110 cd = iconv_open("UTF-8", charset);
111 if (cd == (iconv_t)-1) {
112 fprintf(stderr, "ERROR: could not add conversion charset '%s': %s\n",
113 charset, strerror(errno));
120 convs = realloc(lcc->convs, ns * sizeof(*convs));
124 lcc->convs[idx] = cd;
126 names = realloc(lcc->names, ns * sizeof(*names));
130 lcc->names[idx] = strdup(charset);
131 if (!lcc->names[idx])
144 _find(const lms_charset_conv_t *lcc, const char *charset)
148 for (i = 0; i < lcc->size; i++)
149 if (strcmp(lcc->names[i], charset) == 0)
156 * Forget about previously registered charset in conversion tool.
158 * @param lcc existing Light Media Scanner charset conversion.
159 * @param charset charset name.
161 * @return On success 0 is returned.
164 lms_charset_conv_del(lms_charset_conv_t *lcc, const char *charset)
176 idx = _find(lcc, charset);
178 fprintf(stderr, "ERROR: could not find charset '%s'\n", charset);
182 iconv_close(lcc->convs[idx]);
183 free(lcc->names[idx]);
186 for (; idx < lcc->size; idx++) {
187 lcc->convs[idx] = lcc->convs[idx + 1];
188 lcc->names[idx] = lcc->names[idx + 1];
191 convs = realloc(lcc->convs, lcc->size * sizeof(*convs));
195 perror("could not realloc 'convs'");
197 names = realloc(lcc->names, lcc->size * sizeof(*names));
201 perror("could not realloc 'names'");
207 _check(lms_charset_conv_t *lcc, const char *istr, unsigned int ilen, char *ostr, unsigned int olen)
209 char *inbuf, *outbuf;
210 size_t r, inlen, outlen;
212 inbuf = (char *)istr;
217 iconv(lcc->check, NULL, NULL, NULL, NULL);
218 r = iconv(lcc->check, &inbuf, &inlen, &outbuf, &outlen);
226 _conv(iconv_t cd, char **p_str, unsigned int *p_len, char *ostr, unsigned int olen)
228 char *inbuf, *outbuf;
229 size_t r, inlen, outlen;
236 iconv(cd, NULL, NULL, NULL, NULL);
237 r = iconv(cd, &inbuf, &inlen, &outbuf, &outlen);
241 *p_len = olen - outlen;
245 outbuf = realloc(*p_str, *p_len + 1);
251 (*p_str)[*p_len] = '\0';
257 * If required, do charset conversion to UTF-8.
259 * @param lcc existing Light Media Scanner charset conversion.
260 * @param p_str string to be converted.
261 * @param p_len string size.
263 * @return On success 0 is returned.
266 lms_charset_conv(lms_charset_conv_t *lcc, char **p_str, unsigned int *p_len)
277 if (!*p_str || !*p_len)
281 outstr = malloc(outlen + 1);
287 if (_check(lcc, *p_str, *p_len, outstr, outlen) == 0) {
292 for (i = 0; i < lcc->size; i++)
293 if (_conv(lcc->convs[i], p_str, p_len, outstr, outlen) == 0)
297 "WARNING: could not convert '%*s' to any charset, use fallback\n",
299 i = _conv(lcc->fallback, p_str, p_len, outstr, outlen);
301 memset(*p_str, '?', *p_len);
308 * Check if strings is not UTF-8 and conversion is required.
310 * @param lcc existing Light Media Scanner charset conversion.
311 * @param str string to be analysed.
312 * @param len string size.
314 * @return 0 if string is already UTF-8.
317 lms_charset_conv_check(lms_charset_conv_t *lcc, const char *str, unsigned int len)
328 outstr = malloc(outlen);
334 r = _check(lcc, str, len, outstr, outlen);