Contents of /trunk/src/qnd_xml.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 39 - (show annotations)
Fri Jan 16 20:01:07 2009 UTC (15 years, 5 months ago) by harbaum
File MIME type: text/plain
File size: 12676 byte(s)
Quick'n dirty XML parser
1 /*
2 * Copyright (C) 2008 Till Harbaum <till@harbaum.org>.
3 *
4 * This file is part of OSM2Go.
5 *
6 * OSM2Go is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * OSM2Go is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with OSM2Go. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /*
21 * qnd_xml - quick'n dirty xml is a very small and very fast implementation
22 * of a xml parser. The idea is to replace the usage of libxml2
23 * by this whenever performance is an issue. This is the case
24 * with reading the *.osm files on mobile devices. A powerful
25 * desktop will likely still use the libxml as it's just "better"
26 */
27
28 #include "appdata.h"
29
30 #include <ctype.h>
31 int isblank(int c);
32
33 #define QND_XML_BUFFER_SIZE 1024
34 typedef struct {
35 gpointer userdata;
36
37 FILE *file;
38 int total, bytes_read;
39
40 char buffer[QND_XML_BUFFER_SIZE], *cur;
41 int fill;
42
43 qnd_xml_stack_t *stack, *sp;
44 int mod; // modifier (?, !, /) in element
45 gboolean done;
46
47 qnd_xml_attribute_t *attributes;
48
49 } qnd_xml_context_t;
50
51
52 void stack_dump(qnd_xml_context_t *context) {
53 qnd_xml_stack_t *stack = context->stack;
54
55 printf("Stack:\n");
56 while(stack) {
57 if(stack == context->sp) printf(" *");
58 else printf(" ");
59
60 printf("%s\n", stack->entry->name);
61 stack = stack->next;
62 }
63 }
64
65 void stack_push(qnd_xml_context_t *context, qnd_xml_entry_t *entry) {
66 // printf("push %s\n", entry->name);
67
68 context->sp->next = g_new0(qnd_xml_stack_t, 1);
69 context->sp->next->prev = context->sp;
70 context->sp = context->sp->next;
71 context->sp->entry = entry;
72
73 // stack_dump(context);
74 }
75
76 qnd_xml_entry_t *stack_pop(qnd_xml_context_t *context) {
77 qnd_xml_entry_t *cur = context->sp->entry;
78
79 context->sp = context->sp->prev;
80 g_free(context->sp->next);
81 context->sp->next = NULL;
82
83 /* did we just empty the stack? if yes, we're done parsing */
84 if(context->sp == context->stack) {
85 printf("done parsing\n");
86 context->done = TRUE;
87 }
88
89 // printf("popped %s\n", cur->name);
90 // stack_dump(context);
91 return cur;
92 }
93
94 gboolean update_buffer(qnd_xml_context_t *context) {
95
96 /* if buffer is empty just fill it */
97 if(!context->fill) {
98 context->cur = context->buffer;
99 context->fill = fread(context->buffer, 1l,
100 QND_XML_BUFFER_SIZE, context->file);
101
102 if(context->fill < 0) {
103 printf("read error\n");
104 context->fill = 0;
105 return FALSE;
106 }
107 context->bytes_read += context->fill;
108 return TRUE;
109 }
110
111 /* shift remaining data down */
112 int offset = context->cur - context->buffer;
113 g_memmove(context->buffer, context->cur, QND_XML_BUFFER_SIZE - offset);
114 context->fill -= offset;
115 int bytes_read = fread(context->buffer + QND_XML_BUFFER_SIZE -
116 offset, 1l, offset, context->file);
117
118 context->cur = context->buffer;
119 if(bytes_read < 0) {
120 printf("read error\n");
121 return FALSE;
122 }
123
124 context->bytes_read += bytes_read;
125 context->fill += bytes_read;
126 return TRUE;
127 }
128
129 /*
130 utf8:
131 0xxxxxxx
132 110xxxxx 10xxxxxx
133 1110xxxx 10xxxxxx 10xxxxxx
134 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
135
136 Do we really need to handle this? Internally we are only
137 handling ascii characters (e.g. '<', '>', '/', '?' etc.)
138 thus it's only important to be able to skip utf8 characters
139 correctly. Since a subbyte of utf8 never equals a ascii character
140 it should be possible to parse the file correctly when ignoring utf8
141 */
142
143 /* TODO: this needs to be updated to cope with utf8 */
144 inline char current_char(qnd_xml_context_t *context) {
145 return *context->cur;
146 }
147
148 /* TODO: this needs to be updated to cope with utf8 */
149 inline gboolean skip_char(qnd_xml_context_t *context) {
150 context->cur++;
151 /* TODO: check buffer range */
152 return TRUE;
153 }
154
155 gboolean skip_to_char(qnd_xml_context_t *context, char *chrs) {
156 do {
157 while(context->cur < context->buffer + context->fill) {
158 if(strchr(chrs, current_char(context))) {
159 return skip_char(context);
160 }
161 if(!skip_char(context)) return FALSE;
162 }
163
164 /* try to get more data */
165 if(!update_buffer(context))
166 return FALSE;
167
168 } while(context->fill);
169
170 /* if we get here the system was unable to fill the buffer */
171 return FALSE;
172 }
173
174 gboolean buffer_overflow(qnd_xml_context_t *context) {
175 return(!(context->cur < context->buffer + context->fill));
176 }
177
178 gboolean get_element_name(qnd_xml_context_t *context) {
179
180 /* drop everything before element from buffer */
181 if(!update_buffer(context)) return FALSE;
182
183 char *start = context->cur;
184
185 if(buffer_overflow(context) || !isalpha(current_char(context))) {
186 printf("invalid element name #1 (%c)\n", current_char(context));
187 return FALSE;
188 }
189
190 while(!buffer_overflow(context) && !isblank(current_char(context)) &&
191 (current_char(context) != '>')) {
192 if(!isalnum(current_char(context))) {
193 printf("invalid element name #2 (%c)\n", current_char(context));
194 return FALSE;
195 }
196 if(!skip_char(context)) return FALSE;
197 }
198
199 #if 0
200 char *format = g_strdup_printf("Element name = %%.%ds\n",
201 context->cur-start);
202 printf(format, start);
203 g_free(format);
204 #endif
205
206 /* handle special elements locally */
207 if(context->mod) {
208
209 } else {
210 qnd_xml_entry_t *entry = context->sp->entry, *hit = NULL;
211
212 int i=0;
213 for(i=0;!hit && i<entry->num_children;i++)
214 if(strncmp(entry->children[i]->name, start,
215 strlen(entry->children[i]->name)) == 0)
216 hit = entry->children[i];
217
218 if(hit)
219 stack_push(context, hit);
220 else {
221 printf("element search failed\n");
222 return FALSE;
223 }
224 }
225
226 return TRUE;
227 }
228
229 gboolean get_attribute_name(qnd_xml_context_t *context) {
230
231 char *start = context->cur;
232
233 if(buffer_overflow(context) || !isalpha(current_char(context))) {
234 printf("invalid attribute name\n");
235 return FALSE;
236 }
237
238 while(!buffer_overflow(context) && !isblank(current_char(context)) &&
239 !(current_char(context) == '=')) {
240 if(!isalnum(current_char(context))) {
241 printf("invalid attribute name\n");
242 return FALSE;
243 }
244 if(!skip_char(context)) return FALSE;
245 }
246
247 /* attach a new attribute to chain */
248 qnd_xml_attribute_t **attr = &context->attributes;
249 while(*attr) attr = &(*attr)->next;
250
251 /* terminate name at closing '=' */
252 *context->cur = '\0';
253
254 *attr = g_new0(qnd_xml_attribute_t, 1);
255 (*attr)->name = start;
256
257 return TRUE;
258 }
259
260 gboolean get_attribute_value(qnd_xml_context_t *context) {
261
262 char *start = context->cur;
263
264 while(!buffer_overflow(context) && !(current_char(context) == '\"'))
265 if(!skip_char(context)) return FALSE;
266
267 /* attach a new attribute to chain */
268 qnd_xml_attribute_t **attr = &context->attributes;
269 while((*attr) && (*attr)->next) attr = &(*attr)->next;
270
271 if(!(*attr) || (*attr)->value) {
272 printf("error storing attribute value\n");
273 return FALSE;
274 }
275
276 /* terminate value at closing '\"' */
277 *context->cur = '\0';
278 (*attr)->value = start;
279
280 return TRUE;
281 }
282
283 gboolean skip_white(qnd_xml_context_t *context) {
284 /* skip all white space */
285 while(!buffer_overflow(context) && isblank(current_char(context)))
286 if(!skip_char(context)) return FALSE;
287
288 if(isblank(current_char(context))) {
289 printf("error skipping white space\n");
290 return FALSE;
291 }
292
293 return TRUE;
294 }
295
296 gboolean get_attributes(qnd_xml_context_t *context) {
297 /* drop everything before element from buffer */
298
299 if(!update_buffer(context)) return FALSE;
300 if(!skip_white(context)) return FALSE;
301
302 while(isalpha(current_char(context))) {
303
304 /* get attribute name */
305 if(!get_attribute_name(context)) return FALSE;
306
307 if(!skip_to_char(context, "=")) return FALSE;
308 if(!skip_to_char(context, "\"")) return FALSE;
309
310 if(!get_attribute_value(context)) return FALSE;
311 if(!skip_to_char(context, "\"")) return FALSE;
312
313 if(!skip_white(context)) return FALSE;
314 }
315 return TRUE;
316 }
317
318 void attributes_free(qnd_xml_context_t *context) {
319 qnd_xml_attribute_t *attr = context->attributes;
320
321 while(attr) {
322 qnd_xml_attribute_t *next = attr->next;
323 g_free(attr);
324 attr = next;
325 }
326
327 context->attributes = NULL;
328 }
329
330 void qnd_xml_cleanup(qnd_xml_context_t *context) {
331 /* todo: clean stack */
332
333 if(context->file) fclose(context->file);
334 g_free(context);
335 }
336
337 gboolean get_element(qnd_xml_context_t *context) {
338
339 /* skip all text */
340 if(!skip_to_char(context, "<")) return FALSE;
341
342 /* handle optional modifier */
343 if(current_char(context) == '?' || current_char(context) == '!') {
344 context->mod = current_char(context);
345 if(!skip_char(context)) return FALSE;
346 } else
347 context->mod = 0;
348
349 /* check for closing element */
350 if(current_char(context) == '/') {
351 context->mod = '/';
352 if(!skip_char(context)) return FALSE;
353 }
354
355 if(!get_element_name(context)) return FALSE;
356 if(!get_attributes(context)) return FALSE;
357
358 if(context->mod && context->mod != '/') {
359 if(current_char(context) != context->mod) {
360 printf("modifier mismatch\n");
361 return FALSE;
362 }
363
364 /* skip the modifier */
365 if(!skip_char(context)) return FALSE;
366 }
367
368 if(!skip_white(context)) return FALSE;
369
370 /* call callback now since the entry may be taken from stack */
371 if(!context->mod && context->sp->entry->cb)
372 if(!context->sp->entry->cb(context->sp,
373 context->attributes, context->userdata))
374 return FALSE;
375
376 if(context->mod == '/')
377 stack_pop(context);
378 else {
379 /* if this element closes here it's cleaned up immediately */
380 if(current_char(context) == '/') {
381 if(!skip_char(context)) return FALSE;
382 stack_pop(context);
383 }
384 }
385
386 if(current_char(context) != '>') {
387 printf("element closing error\n");
388 return FALSE;
389 }
390
391 if(!skip_char(context)) return FALSE;
392
393 attributes_free(context);
394
395 return TRUE;
396 }
397
398 gpointer qnd_xml_parse(char *name, qnd_xml_entry_t *root, gpointer userdata) {
399 qnd_xml_context_t *context = g_new0(qnd_xml_context_t, 1);
400 context->cur = context->buffer;
401 context->userdata = userdata;
402
403 /* init stack by adding root entry */
404 context->sp = context->stack = g_new0(qnd_xml_stack_t, 1);
405 context->sp->entry = root;
406
407 /* check if file exists and is a regular file */
408 if(!g_file_test(name, G_FILE_TEST_IS_REGULAR)) {
409 printf("file doesn't exist or is not a regular file\n");
410 qnd_xml_cleanup(context);
411 return FALSE;
412 }
413
414 /* open file */
415 context->file = g_fopen(name, "r");
416 if(!context->file) {
417 printf("unable to open file\n");
418 qnd_xml_cleanup(context);
419 return FALSE;
420 }
421
422 printf("file is open\n");
423
424 /* get file length */
425 fseek(context->file, 0l, SEEK_END);
426 context->total = ftell(context->file);
427 fseek(context->file, 0l, SEEK_SET);
428
429 printf("file length is %d bytes\n", context->total);
430
431 gboolean error = FALSE;
432 do
433 error = !get_element(context);
434 while(!error && !context->done);
435
436 if(error) printf("parser ended with error\n");
437 else printf("parser ended successfully\n");
438
439 printf("current bytes read: %d of %d\n",
440 context->bytes_read, context->total);
441 printf("current buffer offset: %d\n", context->cur - context->buffer);
442
443 /* user pointer[0] of root element is retval */
444 gpointer retval = error?NULL:context->stack->userdata[0];
445
446 /* close file and cleanup */
447 qnd_xml_cleanup(context);
448
449 return retval;
450 }
451
452 char *qnd_xml_get_prop(qnd_xml_attribute_t *attr, char *name) {
453 while(attr) {
454 if(strcasecmp(name, attr->name) == 0)
455 return attr->value;
456
457 attr = attr->next;
458 }
459 return NULL;
460 }
461
462 char *qnd_xml_get_prop_str(qnd_xml_attribute_t *attr, char *name) {
463 char *value = qnd_xml_get_prop(attr, name);
464 if(value) return g_strdup(value);
465 return NULL;
466 }
467
468 gboolean qnd_xml_get_prop_double(qnd_xml_attribute_t *attr, char *name,
469 double *dest) {
470 char *value = qnd_xml_get_prop(attr, name);
471 if(!value) return FALSE;
472
473 *dest = g_ascii_strtod(value, NULL);
474 return TRUE;
475 }
476
477 gboolean qnd_xml_get_prop_gulong(qnd_xml_attribute_t *attr, char *name,
478 gulong *dest) {
479 char *value = qnd_xml_get_prop(attr, name);
480 if(!value) return FALSE;
481
482 *dest = strtoul(value, NULL, 10);
483 return TRUE;
484 }
485
486 gboolean qnd_xml_get_prop_is(qnd_xml_attribute_t *attr, char *name,
487 char *ref) {
488 char *value = qnd_xml_get_prop(attr, name);
489 if(!value) return FALSE;
490
491 return g_strcasecmp(ref, value);
492 }