1 /* Queries Google movies for the theaters of a city and parses
2 * the response using libxml2.
5 #include <libxml/HTMLparser.h>
6 #include <libxml/tree.h>
7 #include <libsoup/soup.h>
12 #define BASE_URL "http://www.google.com/movies?near=%s"
14 typedef struct struct_theater
18 xmlNodePtr data; //points to the tree node with the theater info, including movies
22 typedef struct struct_showtime
28 typedef struct struct_movie
37 /** Search for a sibling node by the name of the sibling node */
38 xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
41 xmlNodePtr sibling = node->next;
42 while((sibling != NULL) && (strncmp(sibling->name, name, nameLen) != 0)) {
43 sibling = sibling->next;
50 /** Search a child node by its node name */
51 xmlNodePtr getChildByName(xmlNodePtr node, xmlChar* name, int nameLen)
53 return getSiblingByName(node->children, name, nameLen);
56 /** Search the first sibling node that has an attribute 'attr'
57 * with the value 'attrValue' */
58 xmlNodePtr getFirstSiblingByAttributeValue(
59 xmlNodePtr sibling, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
61 xmlNodePtr tempNode = sibling;
62 xmlNodePtr result = NULL;
64 while ((tempNode != NULL) && (result == NULL)) {
65 xmlChar* value = xmlGetProp(tempNode, attr);
67 if (strncmp(value, attrValue, attrValueLen) == 0) {
72 tempNode = tempNode->next;
78 /** Search the first child node that has an attribute 'attr' with
79 * value 'attrValue' */
80 xmlNodePtr getFirstChildByAttributeValue(
81 xmlNodePtr node, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
83 return getFirstSiblingByAttributeValue(node->children, attr, attrValue, attrValueLen);
86 /** Advances N sibling nodes in the node list */
87 xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
92 for(; i<siblings; i++) {
100 int childrenCount(xmlNodePtr node)
103 xmlNodePtr nav = node->children;
113 /** Search the <div> with the results and returns it, or NULL
114 * if it couldn't be found */
115 xmlNodePtr getMovieResultsDiv(xmlNodePtr root)
118 xmlNodePtr body = getSiblingByName(root->children, "body", 4);
121 xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8);
123 if (tempNode == NULL) {
128 //<div id="movie_results">
129 tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
131 if (tempNode == NULL) {
136 //<div class="movie_results">
137 tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
139 if (tempNode == NULL) {
149 /** Parses the results and returns a list with all the theaters.
150 * Theater info is parsed and returned as Theater 'objects", the movie
151 * info is not parsed */
152 GList* getTheaterList(xmlNodePtr movieResults)
154 //<div class="movie_results"><div class="theater"/><div class="theater"/>...
155 xmlNodePtr nav = movieResults->children;
156 xmlNodePtr tmp1, tmp2, tmp3 = NULL;
158 GList* resultList = NULL;
161 tmp1 = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
164 * <div class="theater">
166 * <div class="name"/><div class="info"/>
168 * <div class="showtimes"/>
172 if (tmp1 != NULL) { //its theater data
173 Theater* t = malloc(sizeof(Theater));
174 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "desc", 4);
175 if (tmp2 != NULL) { //has desc
176 tmp3 = getFirstChildByAttributeValue(tmp2, "class", "name", 4);
177 if ((tmp3 != NULL) && (tmp3->children != NULL) && (tmp3->children->children != NULL)) { //<div class="name"><a><span/>
178 t->name = xmlNodeGetContent(tmp3->children->children);
180 tmp3 = getFirstChildByAttributeValue(tmp2, "class", "info", 4);
181 if (tmp3 != NULL) { //<div class="info">
182 t->address = xmlNodeGetContent(tmp3);
186 resultList = g_list_append(resultList, t);
195 GList* getShowtimes(gchar* times)
197 GList* resultList = NULL;
199 gchar** timesArray = g_strsplit(times, " ", -1);
202 for(i=0; timesArray[i] != NULL; i++) {
203 ShowTime* st = malloc(sizeof(ShowTime));
204 st->time = g_strndup(timesArray[i], 5);
205 resultList = g_list_append(resultList, st);
208 g_strfreev(timesArray);
214 GList* getMovieList(xmlNodePtr movieSideDiv)
217 xmlNodePtr nav = movieSideDiv->children;
218 xmlNodePtr tmp1, tmp2, tmp3 = NULL;
220 GList* resultList = NULL;
223 tmp1 = getFirstSiblingByAttributeValue(nav, "class", "movie", 5);
224 if (tmp1 != NULL) { //is a movie
225 Movie* m = malloc(sizeof(Movie));
226 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "name", 4);
227 if ((tmp2 != NULL) && (tmp2->children != NULL) && (tmp2->children->children != NULL)) { //<div class="name"><a><span/>
228 m->title = xmlNodeGetContent(tmp2->children->children);
230 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "info", 4);
232 m->info = xmlNodeGetContent(tmp2);
234 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "times", 5);
236 gchar* showtimesStr = xmlNodeGetContent(tmp2);
237 GList* showtimes = getShowtimes(showtimesStr);
239 m->showTimes = showtimes;
241 resultList = g_list_append(resultList, m);
250 GList* getTheaterMovies(Theater* t)
254 * <div class="theater">
255 * <div class="desc"/>
256 * <div class="showtimes">
257 * <div class="show_left">
258 * <div class="movie/>
261 * <div class="show_right">
262 * <div class="movie"/>
271 xmlNodePtr showtimesDiv = getFirstChildByAttributeValue(t->data, "class", "showtimes", 9);
272 if (showtimesDiv != NULL) {
274 xmlNodePtr showLeft = getFirstChildByAttributeValue(showtimesDiv, "class", "show_left", 9);
275 if (showLeft != NULL)
276 left = getMovieList(showLeft);
278 xmlNodePtr showRight = getFirstChildByAttributeValue(showtimesDiv, "class", "show_right", 10);
279 if (showRight != NULL)
280 right = getMovieList(showRight);
282 return g_list_concat(left, right);
290 void deleteShowTime(ShowTime* st)
297 void deleteShowTimeList(GList * showTimeList)
299 g_list_foreach(showTimeList, (GFunc) deleteShowTime, NULL);
300 g_list_free(showTimeList);
304 void deleteMovie(Movie* m)
308 deleteShowTimeList(m->showTimes);
313 void deleteMovieList(GList* movieList)
315 g_list_foreach(movieList, (GFunc) deleteMovie, NULL);
316 g_list_free(movieList);
320 void deleteTheater(Theater* t)
327 void deleteTheaterList(GList* theaterList)
329 g_list_foreach(theaterList, (GFunc) deleteTheater, NULL);
330 g_list_free(theaterList);
334 void showTime(ShowTime* st, gpointer nothing)
336 printf("%s ", st->time);
339 void showMovie(Movie* m, gpointer nothing)
341 printf(" Title = %s\n", m->title);
342 printf(" Info = %s\n", m->info);
343 printf(" Schedule = ");
344 g_list_foreach(m->showTimes, (GFunc) showTime, NULL);
349 void showTheater(Theater * t, gpointer nothing)
351 printf("Name = %s\n", t->name);
352 printf("Info = %s\n", t->address);
353 GList* movieList = getTheaterMovies(t);
354 g_list_foreach(movieList, (GFunc) showMovie, NULL);
355 deleteMovieList(movieList);
360 get_query_uri (const char *city)
362 char *query_uri = g_strdup_printf (BASE_URL,
364 g_message ("%s\n", query_uri);
369 int main (int argc, char ** argv)
373 printf("usage: gmovies city_name\n");
377 SoupSession *session;
378 SoupMessage *message;
380 const gchar *payload;
381 const char *city = argv[1];
387 session = soup_session_async_new ();
388 query_uri = get_query_uri (city);
389 message = soup_message_new ("GET", query_uri);
391 soup_session_send_message (session, message);
393 payload = message->response_body->data;
394 len = message->response_body->length;
396 htmlDocPtr doc = htmlReadMemory(payload, len, "http://movies.google.com", "UTF-8", HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
398 xmlNodePtr root = xmlDocGetRootElement(doc);//html
401 xmlNodePtr movieResults = getMovieResultsDiv(root);
403 if (movieResults == NULL) {
404 printf("NO RESULTS.\n");
408 GList* theaterList = getTheaterList(movieResults);
410 g_list_foreach(theaterList, (GFunc) showTheater, NULL);
412 deleteTheaterList(theaterList);
417 g_object_unref(session);