Working multifeed RSS - needs testing
[monky] / src / prss.c
1 /*
2  * Copyright (c) 2007 Mikko Sysikaski <mikko.sysikaski@gmail.com>
3  *                    Toni Spets <toni.spets@gmail.com>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 #include <libxml/parser.h>
19 #include <libxml/tree.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include "prss.h"
24
25 #ifndef PARSE_OPTIONS
26 #define PARSE_OPTIONS 0
27 #endif
28
29 static PRSS* get_data(xmlDocPtr doc);
30
31 PRSS* prss_parse_data(const char* xml_data)
32 {
33         xmlDocPtr doc = xmlReadMemory(xml_data, strlen(xml_data), "", NULL, PARSE_OPTIONS);
34         if (!doc)
35                 return NULL;
36         
37         return get_data(doc);
38 }
39 PRSS* prss_parse_file(const char* xml_file)
40 {
41         xmlDocPtr doc = xmlReadFile(xml_file, NULL, PARSE_OPTIONS);
42         if (!doc)
43                 return NULL;
44         
45         return get_data(doc);
46 }
47 void prss_free(PRSS* data)
48 {
49         if (!data)
50                 return;
51         xmlFreeDoc(data->_data);
52         free(data->items);
53         free(data);
54 }
55
56 static inline void prss_null(PRSS* p)
57 {
58         p->title = p->link = p->description = p->language = NULL;
59         p->items = NULL;
60         p->item_count = 0;
61 }
62 static inline void prss_null_item(PRSS_Item* i)
63 {
64         i->title = i->link = i->description = i->category = i->pubdate = NULL;
65 }
66
67 static inline void read_item(PRSS_Item* res, xmlNodePtr data)
68 {
69         prss_null_item(res);
70         
71         res->title = res->link = res->description = NULL;
72         for(; data; data = data->next) {
73                 if (data->type != XML_ELEMENT_NODE)
74                         continue;
75                 xmlNodePtr child = data->children;
76                 if (!child)
77                         continue;
78         
79                 if (!strcmp((char*)data->name, "title")) {
80                         res->title = (char*)child->content;
81                 } else if (!strcmp((char*)data->name, "link")) {
82                         res->link = (char*)child->content;
83                 } else if (!strcmp((char*)data->name, "description")) {
84                         res->description = (char*)child->content;
85                 } else if (!strcmp((char*)data->name, "category")) {
86                         res->category = (char*)child->content;
87                 } else if (!strcmp((char*)data->name, "pubDate")) {
88                         res->pubdate = (char*)child->content;
89                 } else if (!strcmp((char*)data->name, "guid")) {
90                         res->guid = (char*)child->content;
91                 }
92         }
93 }
94 static inline void read_element(PRSS* res, xmlNodePtr n)
95 {
96         if (n->type != XML_ELEMENT_NODE)
97                 return;
98         xmlNodePtr child = n->children;
99         if (!child)
100                 return;
101                 
102         if (!strcmp((char*)n->name, "title")) {
103                 res->title = (char*)child->content;
104         } else if (!strcmp((char*)n->name, "link")) {
105                 res->link = (char*)child->content;
106         } else if (!strcmp((char*)n->name, "description")) {
107                 res->description = (char*)child->content;
108         } else if (!strcmp((char*)n->name, "language")) {
109                 res->language = (char*)child->content;
110         } else if (!strcmp((char*)n->name, "pubDate")) {
111                 res->pubdate = (char*)child->content;
112         } else if (!strcmp((char*)n->name, "lastBuildDate")) {
113                 res->lastbuilddate = (char*)child->content;
114         } else if (!strcmp((char*)n->name, "generator")) {
115                 res->generator = (char*)child->content;
116         } else if (!strcmp((char*)n->name, "docs")) {
117                 res->docs = (char*)child->content;
118         } else if (!strcmp((char*)n->name, "managingEditor")) {
119                 res->managingeditor = (char*)child->content;
120         } else if (!strcmp((char*)n->name, "webMaster")) {
121                 res->webmaster = (char*)child->content;
122         } else if (!strcmp((char*)n->name, "item")) {
123                 read_item(&res->items[res->item_count++], n->children);
124         }
125 }
126
127 static inline int parse_rss_2_0(PRSS* res, xmlNodePtr root)
128 {
129         xmlNodePtr channel = root->children;
130         while(channel && (channel->type!=XML_ELEMENT_NODE || strcmp((char*)channel->name, "channel")))
131                 channel = channel->next;
132         if (!channel)
133                 return 0;
134
135         int items = 0;
136         xmlNodePtr n;
137         for(n = channel->children; n; n = n->next)
138                 if (n->type==XML_ELEMENT_NODE && !strcmp((char*)n->name, "item"))
139                         ++items;
140         
141         res->items = malloc(items*sizeof(PRSS_Item));
142         res->item_count = 0;
143
144         for(n = channel->children; n; n = n->next) {
145                 read_element(res, n);
146         }
147
148         return 1;
149 }
150 static inline int parse_rss_1_0(PRSS* res, xmlNodePtr root)
151 {
152         int items = 0;
153         xmlNodePtr n;
154         for(n = root->children; n; n = n->next) {
155                 if (n->type==XML_ELEMENT_NODE) {
156                         if (!strcmp((char*)n->name, "item"))
157                                 ++items;
158                         else if (!strcmp((char*)n->name, "channel")) {
159                                 xmlNodePtr i;
160                                 for(i = n->children; i; i = i->next) {
161                                         read_element(res, i);
162                                 }
163                         }
164                 }
165         }
166         
167         res->items = malloc(items*sizeof(PRSS_Item));
168         res->item_count = 0;
169
170         for(n = root->children; n; n = n->next) {
171                 if (n->type==XML_ELEMENT_NODE && !strcmp((char*)n->name, "item"))
172                         read_item(&res->items[res->item_count++], n->children);
173         }
174         
175         return 1;
176 }
177 static inline int parse_rss_0_9x(PRSS* res, xmlNodePtr root)
178 {
179         // almost same...
180         return parse_rss_2_0(res, root);
181 }
182
183 PRSS* get_data(xmlDocPtr doc)
184 {
185         xmlNodePtr root = xmlDocGetRootElement(doc);
186         PRSS* result = malloc(sizeof(PRSS));
187         prss_null(result);
188         result->_data = doc;
189         do {
190                 if (root->type == XML_ELEMENT_NODE) {
191                         if (!strcmp((char*)root->name, "RDF")) {
192                                 // RSS 1.0 document
193                                 if (!parse_rss_1_0(result, root)) {
194                                         free(result);
195                                         return NULL;
196                                 }
197                                 return result;
198                         } else if (!strcmp((char*)root->name, "rss")) {
199                                 // RSS 2.0 or <1.0 document
200                                 if (!parse_rss_2_0(result, root)) {
201                                         free(result);
202                                         return NULL;
203                                 }
204                                 return result;
205                         }
206                 }
207                 root = root->next;
208         } while(root);
209         free(result);
210         return NULL;
211 }