Contents of /trunk/src/qnd_xml.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 39 - (hide annotations)
Fri Jan 16 20:01:07 2009 UTC (15 years, 4 months ago) by harbaum
File MIME type: text/plain
File size: 12676 byte(s)
Quick'n dirty XML parser
1 harbaum 39 /*
2     * Copyright (C) 2008 Till Harbaum <till@harbaum.org>.
3     *
4     * This file is part of OSM2Go.
5     *
6     * OSM2Go is free software: you can redistribute it and/or modify
7     * it under the terms of the GNU General Public License as published by
8     * the Free Software Foundation, either version 3 of the License, or
9     * (at your option) any later version.
10     *
11     * OSM2Go is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15     *
16     * You should have received a copy of the GNU General Public License
17     * along with OSM2Go. If not, see <http://www.gnu.org/licenses/>.
18     */
19    
20     /*
21     * qnd_xml - quick'n dirty xml is a very small and very fast implementation
22     * of a xml parser. The idea is to replace the usage of libxml2
23     * by this whenever performance is an issue. This is the case
24     * with reading the *.osm files on mobile devices. A powerful
25     * desktop will likely still use the libxml as it's just "better"
26     */
27    
28     #include "appdata.h"
29    
30     #include <ctype.h>
31     int isblank(int c);
32    
33     #define QND_XML_BUFFER_SIZE 1024
34     typedef struct {
35     gpointer userdata;
36    
37     FILE *file;
38     int total, bytes_read;
39    
40     char buffer[QND_XML_BUFFER_SIZE], *cur;
41     int fill;
42    
43     qnd_xml_stack_t *stack, *sp;
44     int mod; // modifier (?, !, /) in element
45     gboolean done;
46    
47     qnd_xml_attribute_t *attributes;
48    
49     } qnd_xml_context_t;
50    
51    
52     void stack_dump(qnd_xml_context_t *context) {
53     qnd_xml_stack_t *stack = context->stack;
54    
55     printf("Stack:\n");
56     while(stack) {
57     if(stack == context->sp) printf(" *");
58     else printf(" ");
59    
60     printf("%s\n", stack->entry->name);
61     stack = stack->next;
62     }
63     }
64    
65     void stack_push(qnd_xml_context_t *context, qnd_xml_entry_t *entry) {
66     // printf("push %s\n", entry->name);
67    
68     context->sp->next = g_new0(qnd_xml_stack_t, 1);
69     context->sp->next->prev = context->sp;
70     context->sp = context->sp->next;
71     context->sp->entry = entry;
72    
73     // stack_dump(context);
74     }
75    
76     qnd_xml_entry_t *stack_pop(qnd_xml_context_t *context) {
77     qnd_xml_entry_t *cur = context->sp->entry;
78    
79     context->sp = context->sp->prev;
80     g_free(context->sp->next);
81     context->sp->next = NULL;
82    
83     /* did we just empty the stack? if yes, we're done parsing */
84     if(context->sp == context->stack) {
85     printf("done parsing\n");
86     context->done = TRUE;
87     }
88    
89     // printf("popped %s\n", cur->name);
90     // stack_dump(context);
91     return cur;
92     }
93    
94     gboolean update_buffer(qnd_xml_context_t *context) {
95    
96     /* if buffer is empty just fill it */
97     if(!context->fill) {
98     context->cur = context->buffer;
99     context->fill = fread(context->buffer, 1l,
100     QND_XML_BUFFER_SIZE, context->file);
101    
102     if(context->fill < 0) {
103     printf("read error\n");
104     context->fill = 0;
105     return FALSE;
106     }
107     context->bytes_read += context->fill;
108     return TRUE;
109     }
110    
111     /* shift remaining data down */
112     int offset = context->cur - context->buffer;
113     g_memmove(context->buffer, context->cur, QND_XML_BUFFER_SIZE - offset);
114     context->fill -= offset;
115     int bytes_read = fread(context->buffer + QND_XML_BUFFER_SIZE -
116     offset, 1l, offset, context->file);
117    
118     context->cur = context->buffer;
119     if(bytes_read < 0) {
120     printf("read error\n");
121     return FALSE;
122     }
123    
124     context->bytes_read += bytes_read;
125     context->fill += bytes_read;
126     return TRUE;
127     }
128    
129     /*
130     utf8:
131     0xxxxxxx
132     110xxxxx 10xxxxxx
133     1110xxxx 10xxxxxx 10xxxxxx
134     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
135    
136     Do we really need to handle this? Internally we are only
137     handling ascii characters (e.g. '<', '>', '/', '?' etc.)
138     thus it's only important to be able to skip utf8 characters
139     correctly. Since a subbyte of utf8 never equals a ascii character
140     it should be possible to parse the file correctly when ignoring utf8
141     */
142    
143     /* TODO: this needs to be updated to cope with utf8 */
144     inline char current_char(qnd_xml_context_t *context) {
145     return *context->cur;
146     }
147    
148     /* TODO: this needs to be updated to cope with utf8 */
149     inline gboolean skip_char(qnd_xml_context_t *context) {
150     context->cur++;
151     /* TODO: check buffer range */
152     return TRUE;
153     }
154    
155     gboolean skip_to_char(qnd_xml_context_t *context, char *chrs) {
156     do {
157     while(context->cur < context->buffer + context->fill) {
158     if(strchr(chrs, current_char(context))) {
159     return skip_char(context);
160     }
161     if(!skip_char(context)) return FALSE;
162     }
163    
164     /* try to get more data */
165     if(!update_buffer(context))
166     return FALSE;
167    
168     } while(context->fill);
169    
170     /* if we get here the system was unable to fill the buffer */
171     return FALSE;
172     }
173    
174     gboolean buffer_overflow(qnd_xml_context_t *context) {
175     return(!(context->cur < context->buffer + context->fill));
176     }
177    
178     gboolean get_element_name(qnd_xml_context_t *context) {
179    
180     /* drop everything before element from buffer */
181     if(!update_buffer(context)) return FALSE;
182    
183     char *start = context->cur;
184    
185     if(buffer_overflow(context) || !isalpha(current_char(context))) {
186     printf("invalid element name #1 (%c)\n", current_char(context));
187     return FALSE;
188     }
189    
190     while(!buffer_overflow(context) && !isblank(current_char(context)) &&
191     (current_char(context) != '>')) {
192     if(!isalnum(current_char(context))) {
193     printf("invalid element name #2 (%c)\n", current_char(context));
194     return FALSE;
195     }
196     if(!skip_char(context)) return FALSE;
197     }
198    
199     #if 0
200     char *format = g_strdup_printf("Element name = %%.%ds\n",
201     context->cur-start);
202     printf(format, start);
203     g_free(format);
204     #endif
205    
206     /* handle special elements locally */
207     if(context->mod) {
208    
209     } else {
210     qnd_xml_entry_t *entry = context->sp->entry, *hit = NULL;
211    
212     int i=0;
213     for(i=0;!hit && i<entry->num_children;i++)
214     if(strncmp(entry->children[i]->name, start,
215     strlen(entry->children[i]->name)) == 0)
216     hit = entry->children[i];
217    
218     if(hit)
219     stack_push(context, hit);
220     else {
221     printf("element search failed\n");
222     return FALSE;
223     }
224     }
225    
226     return TRUE;
227     }
228    
229     gboolean get_attribute_name(qnd_xml_context_t *context) {
230    
231     char *start = context->cur;
232    
233     if(buffer_overflow(context) || !isalpha(current_char(context))) {
234     printf("invalid attribute name\n");
235     return FALSE;
236     }
237    
238     while(!buffer_overflow(context) && !isblank(current_char(context)) &&
239     !(current_char(context) == '=')) {
240     if(!isalnum(current_char(context))) {
241     printf("invalid attribute name\n");
242     return FALSE;
243     }
244     if(!skip_char(context)) return FALSE;
245     }
246    
247     /* attach a new attribute to chain */
248     qnd_xml_attribute_t **attr = &context->attributes;
249     while(*attr) attr = &(*attr)->next;
250    
251     /* terminate name at closing '=' */
252     *context->cur = '\0';
253    
254     *attr = g_new0(qnd_xml_attribute_t, 1);
255     (*attr)->name = start;
256    
257     return TRUE;
258     }
259    
260     gboolean get_attribute_value(qnd_xml_context_t *context) {
261    
262     char *start = context->cur;
263    
264     while(!buffer_overflow(context) && !(current_char(context) == '\"'))
265     if(!skip_char(context)) return FALSE;
266    
267     /* attach a new attribute to chain */
268     qnd_xml_attribute_t **attr = &context->attributes;
269     while((*attr) && (*attr)->next) attr = &(*attr)->next;
270    
271     if(!(*attr) || (*attr)->value) {
272     printf("error storing attribute value\n");
273     return FALSE;
274     }
275    
276     /* terminate value at closing '\"' */
277     *context->cur = '\0';
278     (*attr)->value = start;
279    
280     return TRUE;
281     }
282    
283     gboolean skip_white(qnd_xml_context_t *context) {
284     /* skip all white space */
285     while(!buffer_overflow(context) && isblank(current_char(context)))
286     if(!skip_char(context)) return FALSE;
287    
288     if(isblank(current_char(context))) {
289     printf("error skipping white space\n");
290     return FALSE;
291     }
292    
293     return TRUE;
294     }
295    
296     gboolean get_attributes(qnd_xml_context_t *context) {
297     /* drop everything before element from buffer */
298    
299     if(!update_buffer(context)) return FALSE;
300     if(!skip_white(context)) return FALSE;
301    
302     while(isalpha(current_char(context))) {
303    
304     /* get attribute name */
305     if(!get_attribute_name(context)) return FALSE;
306    
307     if(!skip_to_char(context, "=")) return FALSE;
308     if(!skip_to_char(context, "\"")) return FALSE;
309    
310     if(!get_attribute_value(context)) return FALSE;
311     if(!skip_to_char(context, "\"")) return FALSE;
312    
313     if(!skip_white(context)) return FALSE;
314     }
315     return TRUE;
316     }
317    
318     void attributes_free(qnd_xml_context_t *context) {
319     qnd_xml_attribute_t *attr = context->attributes;
320    
321     while(attr) {
322     qnd_xml_attribute_t *next = attr->next;
323     g_free(attr);
324     attr = next;
325     }
326    
327     context->attributes = NULL;
328     }
329    
330     void qnd_xml_cleanup(qnd_xml_context_t *context) {
331     /* todo: clean stack */
332    
333     if(context->file) fclose(context->file);
334     g_free(context);
335     }
336    
337     gboolean get_element(qnd_xml_context_t *context) {
338    
339     /* skip all text */
340     if(!skip_to_char(context, "<")) return FALSE;
341    
342     /* handle optional modifier */
343     if(current_char(context) == '?' || current_char(context) == '!') {
344     context->mod = current_char(context);
345     if(!skip_char(context)) return FALSE;
346     } else
347     context->mod = 0;
348    
349     /* check for closing element */
350     if(current_char(context) == '/') {
351     context->mod = '/';
352     if(!skip_char(context)) return FALSE;
353     }
354    
355     if(!get_element_name(context)) return FALSE;
356     if(!get_attributes(context)) return FALSE;
357    
358     if(context->mod && context->mod != '/') {
359     if(current_char(context) != context->mod) {
360     printf("modifier mismatch\n");
361     return FALSE;
362     }
363    
364     /* skip the modifier */
365     if(!skip_char(context)) return FALSE;
366     }
367    
368     if(!skip_white(context)) return FALSE;
369    
370     /* call callback now since the entry may be taken from stack */
371     if(!context->mod && context->sp->entry->cb)
372     if(!context->sp->entry->cb(context->sp,
373     context->attributes, context->userdata))
374     return FALSE;
375    
376     if(context->mod == '/')
377     stack_pop(context);
378     else {
379     /* if this element closes here it's cleaned up immediately */
380     if(current_char(context) == '/') {
381     if(!skip_char(context)) return FALSE;
382     stack_pop(context);
383     }
384     }
385    
386     if(current_char(context) != '>') {
387     printf("element closing error\n");
388     return FALSE;
389     }
390    
391     if(!skip_char(context)) return FALSE;
392    
393     attributes_free(context);
394    
395     return TRUE;
396     }
397    
398     gpointer qnd_xml_parse(char *name, qnd_xml_entry_t *root, gpointer userdata) {
399     qnd_xml_context_t *context = g_new0(qnd_xml_context_t, 1);
400     context->cur = context->buffer;
401     context->userdata = userdata;
402    
403     /* init stack by adding root entry */
404     context->sp = context->stack = g_new0(qnd_xml_stack_t, 1);
405     context->sp->entry = root;
406    
407     /* check if file exists and is a regular file */
408     if(!g_file_test(name, G_FILE_TEST_IS_REGULAR)) {
409     printf("file doesn't exist or is not a regular file\n");
410     qnd_xml_cleanup(context);
411     return FALSE;
412     }
413    
414     /* open file */
415     context->file = g_fopen(name, "r");
416     if(!context->file) {
417     printf("unable to open file\n");
418     qnd_xml_cleanup(context);
419     return FALSE;
420     }
421    
422     printf("file is open\n");
423    
424     /* get file length */
425     fseek(context->file, 0l, SEEK_END);
426     context->total = ftell(context->file);
427     fseek(context->file, 0l, SEEK_SET);
428    
429     printf("file length is %d bytes\n", context->total);
430    
431     gboolean error = FALSE;
432     do
433     error = !get_element(context);
434     while(!error && !context->done);
435    
436     if(error) printf("parser ended with error\n");
437     else printf("parser ended successfully\n");
438    
439     printf("current bytes read: %d of %d\n",
440     context->bytes_read, context->total);
441     printf("current buffer offset: %d\n", context->cur - context->buffer);
442    
443     /* user pointer[0] of root element is retval */
444     gpointer retval = error?NULL:context->stack->userdata[0];
445    
446     /* close file and cleanup */
447     qnd_xml_cleanup(context);
448    
449     return retval;
450     }
451    
452     char *qnd_xml_get_prop(qnd_xml_attribute_t *attr, char *name) {
453     while(attr) {
454     if(strcasecmp(name, attr->name) == 0)
455     return attr->value;
456    
457     attr = attr->next;
458     }
459     return NULL;
460     }
461    
462     char *qnd_xml_get_prop_str(qnd_xml_attribute_t *attr, char *name) {
463     char *value = qnd_xml_get_prop(attr, name);
464     if(value) return g_strdup(value);
465     return NULL;
466     }
467    
468     gboolean qnd_xml_get_prop_double(qnd_xml_attribute_t *attr, char *name,
469     double *dest) {
470     char *value = qnd_xml_get_prop(attr, name);
471     if(!value) return FALSE;
472    
473     *dest = g_ascii_strtod(value, NULL);
474     return TRUE;
475     }
476    
477     gboolean qnd_xml_get_prop_gulong(qnd_xml_attribute_t *attr, char *name,
478     gulong *dest) {
479     char *value = qnd_xml_get_prop(attr, name);
480     if(!value) return FALSE;
481    
482     *dest = strtoul(value, NULL, 10);
483     return TRUE;
484     }
485    
486     gboolean qnd_xml_get_prop_is(qnd_xml_attribute_t *attr, char *name,
487     char *ref) {
488     char *value = qnd_xml_get_prop(attr, name);
489     if(!value) return FALSE;
490    
491     return g_strcasecmp(ref, value);
492     }