Parent Directory | Revision Log
Quick'n dirty XML parser
1 | harbaum | 39 | /* |
2 | * Copyright (C) 2008 Till Harbaum <till@harbaum.org>. | ||
3 | * | ||
4 | * This file is part of OSM2Go. | ||
5 | * | ||
6 | * OSM2Go is free software: you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation, either version 3 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * OSM2Go is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with OSM2Go. If not, see <http://www.gnu.org/licenses/>. | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * qnd_xml - quick'n dirty xml is a very small and very fast implementation | ||
22 | * of a xml parser. The idea is to replace the usage of libxml2 | ||
23 | * by this whenever performance is an issue. This is the case | ||
24 | * with reading the *.osm files on mobile devices. A powerful | ||
25 | * desktop will likely still use the libxml as it's just "better" | ||
26 | */ | ||
27 | |||
28 | #include "appdata.h" | ||
29 | |||
30 | #include <ctype.h> | ||
31 | int isblank(int c); | ||
32 | |||
33 | #define QND_XML_BUFFER_SIZE 1024 | ||
34 | typedef struct { | ||
35 | gpointer userdata; | ||
36 | |||
37 | FILE *file; | ||
38 | int total, bytes_read; | ||
39 | |||
40 | char buffer[QND_XML_BUFFER_SIZE], *cur; | ||
41 | int fill; | ||
42 | |||
43 | qnd_xml_stack_t *stack, *sp; | ||
44 | int mod; // modifier (?, !, /) in element | ||
45 | gboolean done; | ||
46 | |||
47 | qnd_xml_attribute_t *attributes; | ||
48 | |||
49 | } qnd_xml_context_t; | ||
50 | |||
51 | |||
52 | void stack_dump(qnd_xml_context_t *context) { | ||
53 | qnd_xml_stack_t *stack = context->stack; | ||
54 | |||
55 | printf("Stack:\n"); | ||
56 | while(stack) { | ||
57 | if(stack == context->sp) printf(" *"); | ||
58 | else printf(" "); | ||
59 | |||
60 | printf("%s\n", stack->entry->name); | ||
61 | stack = stack->next; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | void stack_push(qnd_xml_context_t *context, qnd_xml_entry_t *entry) { | ||
66 | // printf("push %s\n", entry->name); | ||
67 | |||
68 | context->sp->next = g_new0(qnd_xml_stack_t, 1); | ||
69 | context->sp->next->prev = context->sp; | ||
70 | context->sp = context->sp->next; | ||
71 | context->sp->entry = entry; | ||
72 | |||
73 | // stack_dump(context); | ||
74 | } | ||
75 | |||
76 | qnd_xml_entry_t *stack_pop(qnd_xml_context_t *context) { | ||
77 | qnd_xml_entry_t *cur = context->sp->entry; | ||
78 | |||
79 | context->sp = context->sp->prev; | ||
80 | g_free(context->sp->next); | ||
81 | context->sp->next = NULL; | ||
82 | |||
83 | /* did we just empty the stack? if yes, we're done parsing */ | ||
84 | if(context->sp == context->stack) { | ||
85 | printf("done parsing\n"); | ||
86 | context->done = TRUE; | ||
87 | } | ||
88 | |||
89 | // printf("popped %s\n", cur->name); | ||
90 | // stack_dump(context); | ||
91 | return cur; | ||
92 | } | ||
93 | |||
94 | gboolean update_buffer(qnd_xml_context_t *context) { | ||
95 | |||
96 | /* if buffer is empty just fill it */ | ||
97 | if(!context->fill) { | ||
98 | context->cur = context->buffer; | ||
99 | context->fill = fread(context->buffer, 1l, | ||
100 | QND_XML_BUFFER_SIZE, context->file); | ||
101 | |||
102 | if(context->fill < 0) { | ||
103 | printf("read error\n"); | ||
104 | context->fill = 0; | ||
105 | return FALSE; | ||
106 | } | ||
107 | context->bytes_read += context->fill; | ||
108 | return TRUE; | ||
109 | } | ||
110 | |||
111 | /* shift remaining data down */ | ||
112 | int offset = context->cur - context->buffer; | ||
113 | g_memmove(context->buffer, context->cur, QND_XML_BUFFER_SIZE - offset); | ||
114 | context->fill -= offset; | ||
115 | int bytes_read = fread(context->buffer + QND_XML_BUFFER_SIZE - | ||
116 | offset, 1l, offset, context->file); | ||
117 | |||
118 | context->cur = context->buffer; | ||
119 | if(bytes_read < 0) { | ||
120 | printf("read error\n"); | ||
121 | return FALSE; | ||
122 | } | ||
123 | |||
124 | context->bytes_read += bytes_read; | ||
125 | context->fill += bytes_read; | ||
126 | return TRUE; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | utf8: | ||
131 | 0xxxxxxx | ||
132 | 110xxxxx 10xxxxxx | ||
133 | 1110xxxx 10xxxxxx 10xxxxxx | ||
134 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ||
135 | |||
136 | Do we really need to handle this? Internally we are only | ||
137 | handling ascii characters (e.g. '<', '>', '/', '?' etc.) | ||
138 | thus it's only important to be able to skip utf8 characters | ||
139 | correctly. Since a subbyte of utf8 never equals a ascii character | ||
140 | it should be possible to parse the file correctly when ignoring utf8 | ||
141 | */ | ||
142 | |||
143 | /* TODO: this needs to be updated to cope with utf8 */ | ||
144 | inline char current_char(qnd_xml_context_t *context) { | ||
145 | return *context->cur; | ||
146 | } | ||
147 | |||
148 | /* TODO: this needs to be updated to cope with utf8 */ | ||
149 | inline gboolean skip_char(qnd_xml_context_t *context) { | ||
150 | context->cur++; | ||
151 | /* TODO: check buffer range */ | ||
152 | return TRUE; | ||
153 | } | ||
154 | |||
155 | gboolean skip_to_char(qnd_xml_context_t *context, char *chrs) { | ||
156 | do { | ||
157 | while(context->cur < context->buffer + context->fill) { | ||
158 | if(strchr(chrs, current_char(context))) { | ||
159 | return skip_char(context); | ||
160 | } | ||
161 | if(!skip_char(context)) return FALSE; | ||
162 | } | ||
163 | |||
164 | /* try to get more data */ | ||
165 | if(!update_buffer(context)) | ||
166 | return FALSE; | ||
167 | |||
168 | } while(context->fill); | ||
169 | |||
170 | /* if we get here the system was unable to fill the buffer */ | ||
171 | return FALSE; | ||
172 | } | ||
173 | |||
174 | gboolean buffer_overflow(qnd_xml_context_t *context) { | ||
175 | return(!(context->cur < context->buffer + context->fill)); | ||
176 | } | ||
177 | |||
178 | gboolean get_element_name(qnd_xml_context_t *context) { | ||
179 | |||
180 | /* drop everything before element from buffer */ | ||
181 | if(!update_buffer(context)) return FALSE; | ||
182 | |||
183 | char *start = context->cur; | ||
184 | |||
185 | if(buffer_overflow(context) || !isalpha(current_char(context))) { | ||
186 | printf("invalid element name #1 (%c)\n", current_char(context)); | ||
187 | return FALSE; | ||
188 | } | ||
189 | |||
190 | while(!buffer_overflow(context) && !isblank(current_char(context)) && | ||
191 | (current_char(context) != '>')) { | ||
192 | if(!isalnum(current_char(context))) { | ||
193 | printf("invalid element name #2 (%c)\n", current_char(context)); | ||
194 | return FALSE; | ||
195 | } | ||
196 | if(!skip_char(context)) return FALSE; | ||
197 | } | ||
198 | |||
199 | #if 0 | ||
200 | char *format = g_strdup_printf("Element name = %%.%ds\n", | ||
201 | context->cur-start); | ||
202 | printf(format, start); | ||
203 | g_free(format); | ||
204 | #endif | ||
205 | |||
206 | /* handle special elements locally */ | ||
207 | if(context->mod) { | ||
208 | |||
209 | } else { | ||
210 | qnd_xml_entry_t *entry = context->sp->entry, *hit = NULL; | ||
211 | |||
212 | int i=0; | ||
213 | for(i=0;!hit && i<entry->num_children;i++) | ||
214 | if(strncmp(entry->children[i]->name, start, | ||
215 | strlen(entry->children[i]->name)) == 0) | ||
216 | hit = entry->children[i]; | ||
217 | |||
218 | if(hit) | ||
219 | stack_push(context, hit); | ||
220 | else { | ||
221 | printf("element search failed\n"); | ||
222 | return FALSE; | ||
223 | } | ||
224 | } | ||
225 | |||
226 | return TRUE; | ||
227 | } | ||
228 | |||
229 | gboolean get_attribute_name(qnd_xml_context_t *context) { | ||
230 | |||
231 | char *start = context->cur; | ||
232 | |||
233 | if(buffer_overflow(context) || !isalpha(current_char(context))) { | ||
234 | printf("invalid attribute name\n"); | ||
235 | return FALSE; | ||
236 | } | ||
237 | |||
238 | while(!buffer_overflow(context) && !isblank(current_char(context)) && | ||
239 | !(current_char(context) == '=')) { | ||
240 | if(!isalnum(current_char(context))) { | ||
241 | printf("invalid attribute name\n"); | ||
242 | return FALSE; | ||
243 | } | ||
244 | if(!skip_char(context)) return FALSE; | ||
245 | } | ||
246 | |||
247 | /* attach a new attribute to chain */ | ||
248 | qnd_xml_attribute_t **attr = &context->attributes; | ||
249 | while(*attr) attr = &(*attr)->next; | ||
250 | |||
251 | /* terminate name at closing '=' */ | ||
252 | *context->cur = '\0'; | ||
253 | |||
254 | *attr = g_new0(qnd_xml_attribute_t, 1); | ||
255 | (*attr)->name = start; | ||
256 | |||
257 | return TRUE; | ||
258 | } | ||
259 | |||
260 | gboolean get_attribute_value(qnd_xml_context_t *context) { | ||
261 | |||
262 | char *start = context->cur; | ||
263 | |||
264 | while(!buffer_overflow(context) && !(current_char(context) == '\"')) | ||
265 | if(!skip_char(context)) return FALSE; | ||
266 | |||
267 | /* attach a new attribute to chain */ | ||
268 | qnd_xml_attribute_t **attr = &context->attributes; | ||
269 | while((*attr) && (*attr)->next) attr = &(*attr)->next; | ||
270 | |||
271 | if(!(*attr) || (*attr)->value) { | ||
272 | printf("error storing attribute value\n"); | ||
273 | return FALSE; | ||
274 | } | ||
275 | |||
276 | /* terminate value at closing '\"' */ | ||
277 | *context->cur = '\0'; | ||
278 | (*attr)->value = start; | ||
279 | |||
280 | return TRUE; | ||
281 | } | ||
282 | |||
283 | gboolean skip_white(qnd_xml_context_t *context) { | ||
284 | /* skip all white space */ | ||
285 | while(!buffer_overflow(context) && isblank(current_char(context))) | ||
286 | if(!skip_char(context)) return FALSE; | ||
287 | |||
288 | if(isblank(current_char(context))) { | ||
289 | printf("error skipping white space\n"); | ||
290 | return FALSE; | ||
291 | } | ||
292 | |||
293 | return TRUE; | ||
294 | } | ||
295 | |||
296 | gboolean get_attributes(qnd_xml_context_t *context) { | ||
297 | /* drop everything before element from buffer */ | ||
298 | |||
299 | if(!update_buffer(context)) return FALSE; | ||
300 | if(!skip_white(context)) return FALSE; | ||
301 | |||
302 | while(isalpha(current_char(context))) { | ||
303 | |||
304 | /* get attribute name */ | ||
305 | if(!get_attribute_name(context)) return FALSE; | ||
306 | |||
307 | if(!skip_to_char(context, "=")) return FALSE; | ||
308 | if(!skip_to_char(context, "\"")) return FALSE; | ||
309 | |||
310 | if(!get_attribute_value(context)) return FALSE; | ||
311 | if(!skip_to_char(context, "\"")) return FALSE; | ||
312 | |||
313 | if(!skip_white(context)) return FALSE; | ||
314 | } | ||
315 | return TRUE; | ||
316 | } | ||
317 | |||
318 | void attributes_free(qnd_xml_context_t *context) { | ||
319 | qnd_xml_attribute_t *attr = context->attributes; | ||
320 | |||
321 | while(attr) { | ||
322 | qnd_xml_attribute_t *next = attr->next; | ||
323 | g_free(attr); | ||
324 | attr = next; | ||
325 | } | ||
326 | |||
327 | context->attributes = NULL; | ||
328 | } | ||
329 | |||
330 | void qnd_xml_cleanup(qnd_xml_context_t *context) { | ||
331 | /* todo: clean stack */ | ||
332 | |||
333 | if(context->file) fclose(context->file); | ||
334 | g_free(context); | ||
335 | } | ||
336 | |||
337 | gboolean get_element(qnd_xml_context_t *context) { | ||
338 | |||
339 | /* skip all text */ | ||
340 | if(!skip_to_char(context, "<")) return FALSE; | ||
341 | |||
342 | /* handle optional modifier */ | ||
343 | if(current_char(context) == '?' || current_char(context) == '!') { | ||
344 | context->mod = current_char(context); | ||
345 | if(!skip_char(context)) return FALSE; | ||
346 | } else | ||
347 | context->mod = 0; | ||
348 | |||
349 | /* check for closing element */ | ||
350 | if(current_char(context) == '/') { | ||
351 | context->mod = '/'; | ||
352 | if(!skip_char(context)) return FALSE; | ||
353 | } | ||
354 | |||
355 | if(!get_element_name(context)) return FALSE; | ||
356 | if(!get_attributes(context)) return FALSE; | ||
357 | |||
358 | if(context->mod && context->mod != '/') { | ||
359 | if(current_char(context) != context->mod) { | ||
360 | printf("modifier mismatch\n"); | ||
361 | return FALSE; | ||
362 | } | ||
363 | |||
364 | /* skip the modifier */ | ||
365 | if(!skip_char(context)) return FALSE; | ||
366 | } | ||
367 | |||
368 | if(!skip_white(context)) return FALSE; | ||
369 | |||
370 | /* call callback now since the entry may be taken from stack */ | ||
371 | if(!context->mod && context->sp->entry->cb) | ||
372 | if(!context->sp->entry->cb(context->sp, | ||
373 | context->attributes, context->userdata)) | ||
374 | return FALSE; | ||
375 | |||
376 | if(context->mod == '/') | ||
377 | stack_pop(context); | ||
378 | else { | ||
379 | /* if this element closes here it's cleaned up immediately */ | ||
380 | if(current_char(context) == '/') { | ||
381 | if(!skip_char(context)) return FALSE; | ||
382 | stack_pop(context); | ||
383 | } | ||
384 | } | ||
385 | |||
386 | if(current_char(context) != '>') { | ||
387 | printf("element closing error\n"); | ||
388 | return FALSE; | ||
389 | } | ||
390 | |||
391 | if(!skip_char(context)) return FALSE; | ||
392 | |||
393 | attributes_free(context); | ||
394 | |||
395 | return TRUE; | ||
396 | } | ||
397 | |||
398 | gpointer qnd_xml_parse(char *name, qnd_xml_entry_t *root, gpointer userdata) { | ||
399 | qnd_xml_context_t *context = g_new0(qnd_xml_context_t, 1); | ||
400 | context->cur = context->buffer; | ||
401 | context->userdata = userdata; | ||
402 | |||
403 | /* init stack by adding root entry */ | ||
404 | context->sp = context->stack = g_new0(qnd_xml_stack_t, 1); | ||
405 | context->sp->entry = root; | ||
406 | |||
407 | /* check if file exists and is a regular file */ | ||
408 | if(!g_file_test(name, G_FILE_TEST_IS_REGULAR)) { | ||
409 | printf("file doesn't exist or is not a regular file\n"); | ||
410 | qnd_xml_cleanup(context); | ||
411 | return FALSE; | ||
412 | } | ||
413 | |||
414 | /* open file */ | ||
415 | context->file = g_fopen(name, "r"); | ||
416 | if(!context->file) { | ||
417 | printf("unable to open file\n"); | ||
418 | qnd_xml_cleanup(context); | ||
419 | return FALSE; | ||
420 | } | ||
421 | |||
422 | printf("file is open\n"); | ||
423 | |||
424 | /* get file length */ | ||
425 | fseek(context->file, 0l, SEEK_END); | ||
426 | context->total = ftell(context->file); | ||
427 | fseek(context->file, 0l, SEEK_SET); | ||
428 | |||
429 | printf("file length is %d bytes\n", context->total); | ||
430 | |||
431 | gboolean error = FALSE; | ||
432 | do | ||
433 | error = !get_element(context); | ||
434 | while(!error && !context->done); | ||
435 | |||
436 | if(error) printf("parser ended with error\n"); | ||
437 | else printf("parser ended successfully\n"); | ||
438 | |||
439 | printf("current bytes read: %d of %d\n", | ||
440 | context->bytes_read, context->total); | ||
441 | printf("current buffer offset: %d\n", context->cur - context->buffer); | ||
442 | |||
443 | /* user pointer[0] of root element is retval */ | ||
444 | gpointer retval = error?NULL:context->stack->userdata[0]; | ||
445 | |||
446 | /* close file and cleanup */ | ||
447 | qnd_xml_cleanup(context); | ||
448 | |||
449 | return retval; | ||
450 | } | ||
451 | |||
452 | char *qnd_xml_get_prop(qnd_xml_attribute_t *attr, char *name) { | ||
453 | while(attr) { | ||
454 | if(strcasecmp(name, attr->name) == 0) | ||
455 | return attr->value; | ||
456 | |||
457 | attr = attr->next; | ||
458 | } | ||
459 | return NULL; | ||
460 | } | ||
461 | |||
462 | char *qnd_xml_get_prop_str(qnd_xml_attribute_t *attr, char *name) { | ||
463 | char *value = qnd_xml_get_prop(attr, name); | ||
464 | if(value) return g_strdup(value); | ||
465 | return NULL; | ||
466 | } | ||
467 | |||
468 | gboolean qnd_xml_get_prop_double(qnd_xml_attribute_t *attr, char *name, | ||
469 | double *dest) { | ||
470 | char *value = qnd_xml_get_prop(attr, name); | ||
471 | if(!value) return FALSE; | ||
472 | |||
473 | *dest = g_ascii_strtod(value, NULL); | ||
474 | return TRUE; | ||
475 | } | ||
476 | |||
477 | gboolean qnd_xml_get_prop_gulong(qnd_xml_attribute_t *attr, char *name, | ||
478 | gulong *dest) { | ||
479 | char *value = qnd_xml_get_prop(attr, name); | ||
480 | if(!value) return FALSE; | ||
481 | |||
482 | *dest = strtoul(value, NULL, 10); | ||
483 | return TRUE; | ||
484 | } | ||
485 | |||
486 | gboolean qnd_xml_get_prop_is(qnd_xml_attribute_t *attr, char *name, | ||
487 | char *ref) { | ||
488 | char *value = qnd_xml_get_prop(attr, name); | ||
489 | if(!value) return FALSE; | ||
490 | |||
491 | return g_strcasecmp(ref, value); | ||
492 | } |