Parent Directory
|
Revision Log
Quick'n dirty XML parser
1 | /* |
2 | * Copyright (C) 2008 Till Harbaum <till@harbaum.org>. |
3 | * |
4 | * This file is part of OSM2Go. |
5 | * |
6 | * OSM2Go is free software: you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation, either version 3 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * OSM2Go is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License |
17 | * along with OSM2Go. If not, see <http://www.gnu.org/licenses/>. |
18 | */ |
19 | |
20 | /* |
21 | * qnd_xml - quick'n dirty xml is a very small and very fast implementation |
22 | * of a xml parser. The idea is to replace the usage of libxml2 |
23 | * by this whenever performance is an issue. This is the case |
24 | * with reading the *.osm files on mobile devices. A powerful |
25 | * desktop will likely still use the libxml as it's just "better" |
26 | */ |
27 | |
28 | #include "appdata.h" |
29 | |
30 | #include <ctype.h> |
31 | int isblank(int c); |
32 | |
33 | #define QND_XML_BUFFER_SIZE 1024 |
34 | typedef struct { |
35 | gpointer userdata; |
36 | |
37 | FILE *file; |
38 | int total, bytes_read; |
39 | |
40 | char buffer[QND_XML_BUFFER_SIZE], *cur; |
41 | int fill; |
42 | |
43 | qnd_xml_stack_t *stack, *sp; |
44 | int mod; // modifier (?, !, /) in element |
45 | gboolean done; |
46 | |
47 | qnd_xml_attribute_t *attributes; |
48 | |
49 | } qnd_xml_context_t; |
50 | |
51 | |
52 | void stack_dump(qnd_xml_context_t *context) { |
53 | qnd_xml_stack_t *stack = context->stack; |
54 | |
55 | printf("Stack:\n"); |
56 | while(stack) { |
57 | if(stack == context->sp) printf(" *"); |
58 | else printf(" "); |
59 | |
60 | printf("%s\n", stack->entry->name); |
61 | stack = stack->next; |
62 | } |
63 | } |
64 | |
65 | void stack_push(qnd_xml_context_t *context, qnd_xml_entry_t *entry) { |
66 | // printf("push %s\n", entry->name); |
67 | |
68 | context->sp->next = g_new0(qnd_xml_stack_t, 1); |
69 | context->sp->next->prev = context->sp; |
70 | context->sp = context->sp->next; |
71 | context->sp->entry = entry; |
72 | |
73 | // stack_dump(context); |
74 | } |
75 | |
76 | qnd_xml_entry_t *stack_pop(qnd_xml_context_t *context) { |
77 | qnd_xml_entry_t *cur = context->sp->entry; |
78 | |
79 | context->sp = context->sp->prev; |
80 | g_free(context->sp->next); |
81 | context->sp->next = NULL; |
82 | |
83 | /* did we just empty the stack? if yes, we're done parsing */ |
84 | if(context->sp == context->stack) { |
85 | printf("done parsing\n"); |
86 | context->done = TRUE; |
87 | } |
88 | |
89 | // printf("popped %s\n", cur->name); |
90 | // stack_dump(context); |
91 | return cur; |
92 | } |
93 | |
94 | gboolean update_buffer(qnd_xml_context_t *context) { |
95 | |
96 | /* if buffer is empty just fill it */ |
97 | if(!context->fill) { |
98 | context->cur = context->buffer; |
99 | context->fill = fread(context->buffer, 1l, |
100 | QND_XML_BUFFER_SIZE, context->file); |
101 | |
102 | if(context->fill < 0) { |
103 | printf("read error\n"); |
104 | context->fill = 0; |
105 | return FALSE; |
106 | } |
107 | context->bytes_read += context->fill; |
108 | return TRUE; |
109 | } |
110 | |
111 | /* shift remaining data down */ |
112 | int offset = context->cur - context->buffer; |
113 | g_memmove(context->buffer, context->cur, QND_XML_BUFFER_SIZE - offset); |
114 | context->fill -= offset; |
115 | int bytes_read = fread(context->buffer + QND_XML_BUFFER_SIZE - |
116 | offset, 1l, offset, context->file); |
117 | |
118 | context->cur = context->buffer; |
119 | if(bytes_read < 0) { |
120 | printf("read error\n"); |
121 | return FALSE; |
122 | } |
123 | |
124 | context->bytes_read += bytes_read; |
125 | context->fill += bytes_read; |
126 | return TRUE; |
127 | } |
128 | |
129 | /* |
130 | utf8: |
131 | 0xxxxxxx |
132 | 110xxxxx 10xxxxxx |
133 | 1110xxxx 10xxxxxx 10xxxxxx |
134 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
135 | |
136 | Do we really need to handle this? Internally we are only |
137 | handling ascii characters (e.g. '<', '>', '/', '?' etc.) |
138 | thus it's only important to be able to skip utf8 characters |
139 | correctly. Since a subbyte of utf8 never equals a ascii character |
140 | it should be possible to parse the file correctly when ignoring utf8 |
141 | */ |
142 | |
143 | /* TODO: this needs to be updated to cope with utf8 */ |
144 | inline char current_char(qnd_xml_context_t *context) { |
145 | return *context->cur; |
146 | } |
147 | |
148 | /* TODO: this needs to be updated to cope with utf8 */ |
149 | inline gboolean skip_char(qnd_xml_context_t *context) { |
150 | context->cur++; |
151 | /* TODO: check buffer range */ |
152 | return TRUE; |
153 | } |
154 | |
155 | gboolean skip_to_char(qnd_xml_context_t *context, char *chrs) { |
156 | do { |
157 | while(context->cur < context->buffer + context->fill) { |
158 | if(strchr(chrs, current_char(context))) { |
159 | return skip_char(context); |
160 | } |
161 | if(!skip_char(context)) return FALSE; |
162 | } |
163 | |
164 | /* try to get more data */ |
165 | if(!update_buffer(context)) |
166 | return FALSE; |
167 | |
168 | } while(context->fill); |
169 | |
170 | /* if we get here the system was unable to fill the buffer */ |
171 | return FALSE; |
172 | } |
173 | |
174 | gboolean buffer_overflow(qnd_xml_context_t *context) { |
175 | return(!(context->cur < context->buffer + context->fill)); |
176 | } |
177 | |
178 | gboolean get_element_name(qnd_xml_context_t *context) { |
179 | |
180 | /* drop everything before element from buffer */ |
181 | if(!update_buffer(context)) return FALSE; |
182 | |
183 | char *start = context->cur; |
184 | |
185 | if(buffer_overflow(context) || !isalpha(current_char(context))) { |
186 | printf("invalid element name #1 (%c)\n", current_char(context)); |
187 | return FALSE; |
188 | } |
189 | |
190 | while(!buffer_overflow(context) && !isblank(current_char(context)) && |
191 | (current_char(context) != '>')) { |
192 | if(!isalnum(current_char(context))) { |
193 | printf("invalid element name #2 (%c)\n", current_char(context)); |
194 | return FALSE; |
195 | } |
196 | if(!skip_char(context)) return FALSE; |
197 | } |
198 | |
199 | #if 0 |
200 | char *format = g_strdup_printf("Element name = %%.%ds\n", |
201 | context->cur-start); |
202 | printf(format, start); |
203 | g_free(format); |
204 | #endif |
205 | |
206 | /* handle special elements locally */ |
207 | if(context->mod) { |
208 | |
209 | } else { |
210 | qnd_xml_entry_t *entry = context->sp->entry, *hit = NULL; |
211 | |
212 | int i=0; |
213 | for(i=0;!hit && i<entry->num_children;i++) |
214 | if(strncmp(entry->children[i]->name, start, |
215 | strlen(entry->children[i]->name)) == 0) |
216 | hit = entry->children[i]; |
217 | |
218 | if(hit) |
219 | stack_push(context, hit); |
220 | else { |
221 | printf("element search failed\n"); |
222 | return FALSE; |
223 | } |
224 | } |
225 | |
226 | return TRUE; |
227 | } |
228 | |
229 | gboolean get_attribute_name(qnd_xml_context_t *context) { |
230 | |
231 | char *start = context->cur; |
232 | |
233 | if(buffer_overflow(context) || !isalpha(current_char(context))) { |
234 | printf("invalid attribute name\n"); |
235 | return FALSE; |
236 | } |
237 | |
238 | while(!buffer_overflow(context) && !isblank(current_char(context)) && |
239 | !(current_char(context) == '=')) { |
240 | if(!isalnum(current_char(context))) { |
241 | printf("invalid attribute name\n"); |
242 | return FALSE; |
243 | } |
244 | if(!skip_char(context)) return FALSE; |
245 | } |
246 | |
247 | /* attach a new attribute to chain */ |
248 | qnd_xml_attribute_t **attr = &context->attributes; |
249 | while(*attr) attr = &(*attr)->next; |
250 | |
251 | /* terminate name at closing '=' */ |
252 | *context->cur = '\0'; |
253 | |
254 | *attr = g_new0(qnd_xml_attribute_t, 1); |
255 | (*attr)->name = start; |
256 | |
257 | return TRUE; |
258 | } |
259 | |
260 | gboolean get_attribute_value(qnd_xml_context_t *context) { |
261 | |
262 | char *start = context->cur; |
263 | |
264 | while(!buffer_overflow(context) && !(current_char(context) == '\"')) |
265 | if(!skip_char(context)) return FALSE; |
266 | |
267 | /* attach a new attribute to chain */ |
268 | qnd_xml_attribute_t **attr = &context->attributes; |
269 | while((*attr) && (*attr)->next) attr = &(*attr)->next; |
270 | |
271 | if(!(*attr) || (*attr)->value) { |
272 | printf("error storing attribute value\n"); |
273 | return FALSE; |
274 | } |
275 | |
276 | /* terminate value at closing '\"' */ |
277 | *context->cur = '\0'; |
278 | (*attr)->value = start; |
279 | |
280 | return TRUE; |
281 | } |
282 | |
283 | gboolean skip_white(qnd_xml_context_t *context) { |
284 | /* skip all white space */ |
285 | while(!buffer_overflow(context) && isblank(current_char(context))) |
286 | if(!skip_char(context)) return FALSE; |
287 | |
288 | if(isblank(current_char(context))) { |
289 | printf("error skipping white space\n"); |
290 | return FALSE; |
291 | } |
292 | |
293 | return TRUE; |
294 | } |
295 | |
296 | gboolean get_attributes(qnd_xml_context_t *context) { |
297 | /* drop everything before element from buffer */ |
298 | |
299 | if(!update_buffer(context)) return FALSE; |
300 | if(!skip_white(context)) return FALSE; |
301 | |
302 | while(isalpha(current_char(context))) { |
303 | |
304 | /* get attribute name */ |
305 | if(!get_attribute_name(context)) return FALSE; |
306 | |
307 | if(!skip_to_char(context, "=")) return FALSE; |
308 | if(!skip_to_char(context, "\"")) return FALSE; |
309 | |
310 | if(!get_attribute_value(context)) return FALSE; |
311 | if(!skip_to_char(context, "\"")) return FALSE; |
312 | |
313 | if(!skip_white(context)) return FALSE; |
314 | } |
315 | return TRUE; |
316 | } |
317 | |
318 | void attributes_free(qnd_xml_context_t *context) { |
319 | qnd_xml_attribute_t *attr = context->attributes; |
320 | |
321 | while(attr) { |
322 | qnd_xml_attribute_t *next = attr->next; |
323 | g_free(attr); |
324 | attr = next; |
325 | } |
326 | |
327 | context->attributes = NULL; |
328 | } |
329 | |
330 | void qnd_xml_cleanup(qnd_xml_context_t *context) { |
331 | /* todo: clean stack */ |
332 | |
333 | if(context->file) fclose(context->file); |
334 | g_free(context); |
335 | } |
336 | |
337 | gboolean get_element(qnd_xml_context_t *context) { |
338 | |
339 | /* skip all text */ |
340 | if(!skip_to_char(context, "<")) return FALSE; |
341 | |
342 | /* handle optional modifier */ |
343 | if(current_char(context) == '?' || current_char(context) == '!') { |
344 | context->mod = current_char(context); |
345 | if(!skip_char(context)) return FALSE; |
346 | } else |
347 | context->mod = 0; |
348 | |
349 | /* check for closing element */ |
350 | if(current_char(context) == '/') { |
351 | context->mod = '/'; |
352 | if(!skip_char(context)) return FALSE; |
353 | } |
354 | |
355 | if(!get_element_name(context)) return FALSE; |
356 | if(!get_attributes(context)) return FALSE; |
357 | |
358 | if(context->mod && context->mod != '/') { |
359 | if(current_char(context) != context->mod) { |
360 | printf("modifier mismatch\n"); |
361 | return FALSE; |
362 | } |
363 | |
364 | /* skip the modifier */ |
365 | if(!skip_char(context)) return FALSE; |
366 | } |
367 | |
368 | if(!skip_white(context)) return FALSE; |
369 | |
370 | /* call callback now since the entry may be taken from stack */ |
371 | if(!context->mod && context->sp->entry->cb) |
372 | if(!context->sp->entry->cb(context->sp, |
373 | context->attributes, context->userdata)) |
374 | return FALSE; |
375 | |
376 | if(context->mod == '/') |
377 | stack_pop(context); |
378 | else { |
379 | /* if this element closes here it's cleaned up immediately */ |
380 | if(current_char(context) == '/') { |
381 | if(!skip_char(context)) return FALSE; |
382 | stack_pop(context); |
383 | } |
384 | } |
385 | |
386 | if(current_char(context) != '>') { |
387 | printf("element closing error\n"); |
388 | return FALSE; |
389 | } |
390 | |
391 | if(!skip_char(context)) return FALSE; |
392 | |
393 | attributes_free(context); |
394 | |
395 | return TRUE; |
396 | } |
397 | |
398 | gpointer qnd_xml_parse(char *name, qnd_xml_entry_t *root, gpointer userdata) { |
399 | qnd_xml_context_t *context = g_new0(qnd_xml_context_t, 1); |
400 | context->cur = context->buffer; |
401 | context->userdata = userdata; |
402 | |
403 | /* init stack by adding root entry */ |
404 | context->sp = context->stack = g_new0(qnd_xml_stack_t, 1); |
405 | context->sp->entry = root; |
406 | |
407 | /* check if file exists and is a regular file */ |
408 | if(!g_file_test(name, G_FILE_TEST_IS_REGULAR)) { |
409 | printf("file doesn't exist or is not a regular file\n"); |
410 | qnd_xml_cleanup(context); |
411 | return FALSE; |
412 | } |
413 | |
414 | /* open file */ |
415 | context->file = g_fopen(name, "r"); |
416 | if(!context->file) { |
417 | printf("unable to open file\n"); |
418 | qnd_xml_cleanup(context); |
419 | return FALSE; |
420 | } |
421 | |
422 | printf("file is open\n"); |
423 | |
424 | /* get file length */ |
425 | fseek(context->file, 0l, SEEK_END); |
426 | context->total = ftell(context->file); |
427 | fseek(context->file, 0l, SEEK_SET); |
428 | |
429 | printf("file length is %d bytes\n", context->total); |
430 | |
431 | gboolean error = FALSE; |
432 | do |
433 | error = !get_element(context); |
434 | while(!error && !context->done); |
435 | |
436 | if(error) printf("parser ended with error\n"); |
437 | else printf("parser ended successfully\n"); |
438 | |
439 | printf("current bytes read: %d of %d\n", |
440 | context->bytes_read, context->total); |
441 | printf("current buffer offset: %d\n", context->cur - context->buffer); |
442 | |
443 | /* user pointer[0] of root element is retval */ |
444 | gpointer retval = error?NULL:context->stack->userdata[0]; |
445 | |
446 | /* close file and cleanup */ |
447 | qnd_xml_cleanup(context); |
448 | |
449 | return retval; |
450 | } |
451 | |
452 | char *qnd_xml_get_prop(qnd_xml_attribute_t *attr, char *name) { |
453 | while(attr) { |
454 | if(strcasecmp(name, attr->name) == 0) |
455 | return attr->value; |
456 | |
457 | attr = attr->next; |
458 | } |
459 | return NULL; |
460 | } |
461 | |
462 | char *qnd_xml_get_prop_str(qnd_xml_attribute_t *attr, char *name) { |
463 | char *value = qnd_xml_get_prop(attr, name); |
464 | if(value) return g_strdup(value); |
465 | return NULL; |
466 | } |
467 | |
468 | gboolean qnd_xml_get_prop_double(qnd_xml_attribute_t *attr, char *name, |
469 | double *dest) { |
470 | char *value = qnd_xml_get_prop(attr, name); |
471 | if(!value) return FALSE; |
472 | |
473 | *dest = g_ascii_strtod(value, NULL); |
474 | return TRUE; |
475 | } |
476 | |
477 | gboolean qnd_xml_get_prop_gulong(qnd_xml_attribute_t *attr, char *name, |
478 | gulong *dest) { |
479 | char *value = qnd_xml_get_prop(attr, name); |
480 | if(!value) return FALSE; |
481 | |
482 | *dest = strtoul(value, NULL, 10); |
483 | return TRUE; |
484 | } |
485 | |
486 | gboolean qnd_xml_get_prop_is(qnd_xml_attribute_t *attr, char *name, |
487 | char *ref) { |
488 | char *value = qnd_xml_get_prop(attr, name); |
489 | if(!value) return FALSE; |
490 | |
491 | return g_strcasecmp(ref, value); |
492 | } |