-/* Parses a Google movies web (previously downloaded on a file)
- * using libxml2. Examples of Google movies web files can be
- * found in the gmovies_data directory.
+/* Queries Google movies for the theaters of a city and parses
+ * the response using libxml2.
*/
#include <libxml/HTMLparser.h>
#include <libxml/tree.h>
-#include <rest/rest/rest-proxy.h>
+#include <rest/rest-proxy.h>
#include <glib.h>
#include <stdio.h>
#include <unistd.h>
+typedef struct struct_theater
+{
+ gchar* name;
+ gchar* address;
+ xmlNodePtr data; //points to the tree node with the theater info, including movies
+} Theater;
+
+
+typedef struct struct_showtime
+{
+ gchar* time;
+} ShowTime;
+
+
+typedef struct struct_movie
+{
+ gchar* title;
+ int rating;
+ gchar* info;
+ GList* showTimes;
+} Movie;
+
+
+/** Search for a sibling node by the name of the sibling node */
xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
{
}
+/** Search a child node by its node name */
xmlNodePtr getChildByName(xmlNodePtr node, xmlChar* name, int nameLen)
{
return getSiblingByName(node->children, name, nameLen);
}
+/** Search the first sibling node that has an attribute 'attr'
+ * with the value 'attrValue' */
xmlNodePtr getFirstSiblingByAttributeValue(
xmlNodePtr sibling, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
{
xmlNodePtr tempNode = sibling;
+ xmlNodePtr result = NULL;
- while(tempNode != NULL) {
+ while ((tempNode != NULL) && (result == NULL)) {
xmlChar* value = xmlGetProp(tempNode, attr);
- if ((value != NULL) && (strncmp(value, attrValue, attrValueLen)) == 0)
- return tempNode;
+ if (value != NULL) {
+ if (strncmp(value, attrValue, attrValueLen) == 0) {
+ result = tempNode;
+ }
+ free(value);
+ }
tempNode = tempNode->next;
}
- return NULL;
+ return result;
}
-
+/** Search the first child node that has an attribute 'attr' with
+ * value 'attrValue' */
xmlNodePtr getFirstChildByAttributeValue(
xmlNodePtr node, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
{
return getFirstSiblingByAttributeValue(node->children, attr, attrValue, attrValueLen);
}
-
+/** Advances N sibling nodes in the node list */
xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
{
xmlNodePtr r = node;
return r;
}
-int isSeparatorTR(xmlNodePtr node)
-{
- return ((node != NULL) && (childrenCount(node) == 1));
-}
int childrenCount(xmlNodePtr node)
{
}
-int startsTheatherData(xmlNodePtr node)
+/** Search the <div> with the results and returns it, or NULL
+ * if it couldn't be found */
+xmlNodePtr getMovieResultsDiv(xmlNodePtr root)
+{
+ //<body>
+ xmlNodePtr body = getSiblingByName(root->children, "body", 4);
+
+ //<div id="results">
+ xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8);
+
+ if (tempNode == NULL) {
+ //no results
+ return NULL;
+ }
+
+ //<div id="movie_results">
+ tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
+
+ if (tempNode == NULL) {
+ //no results
+ return NULL;
+ }
+
+ //<div class="movie_results">
+ tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
+
+ if (tempNode == NULL) {
+ //no results
+ return NULL;
+ }
+
+ return tempNode;
+
+}
+
+
+/** Parses the results and returns a list with all the theaters.
+ * Theater info is parsed and returned as Theater 'objects", the movie
+ * info is not parsed */
+GList* getTheaterList(xmlNodePtr movieResults)
{
+ //<div class="movie_results"><div class="theater"/><div class="theater"/>...
+ xmlNodePtr nav = movieResults->children;
+ xmlNodePtr tmp1, tmp2, tmp3 = NULL;
- if (strncmp(node->name, "tr", 2) == 0) {
- xmlNodePtr td = node->children;
- if ((td != NULL) && (strncmp(td->name, "td", 2) == 0)) {
- xmlChar* value = xmlGetProp(td, "colspan");
- return ((value != NULL) && (strncmp(value, "4", 1)) == 0);
+ GList* resultList = NULL;
+
+ while(nav != NULL) {
+ tmp1 = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
+
+ /*
+ * <div class="theater">
+ * <div class="desc">
+ * <div class="name"/><div class="info"/>
+ * </div>
+ * <div class="showtimes"/>
+ * </div>
+ */
+
+ if (tmp1 != NULL) { //its theater data
+ Theater* t = malloc(sizeof(Theater));
+ tmp2 = getFirstChildByAttributeValue(tmp1, "class", "desc", 4);
+ if (tmp2 != NULL) { //has desc
+ tmp3 = getFirstChildByAttributeValue(tmp2, "class", "name", 4);
+ if ((tmp3 != NULL) && (tmp3->children != NULL) && (tmp3->children->children != NULL)) { //<div class="name"><a><span/>
+ t->name = xmlNodeGetContent(tmp3->children->children);
+ }
+ tmp3 = getFirstChildByAttributeValue(tmp2, "class", "info", 4);
+ if (tmp3 != NULL) { //<div class="info">
+ t->address = xmlNodeGetContent(tmp3);
+ }
+ }
+ t->data = tmp1;
+ resultList = g_list_append(resultList, t);
}
+ nav = nav->next;
+ }
+
+ return resultList;
+
+}
+
+GList* getShowtimes(gchar* times)
+{
+ GList* resultList = NULL;
+
+ gchar** timesArray = g_strsplit(times, " ", -1);
+
+ int i = 0;
+ for(i=0; timesArray[i] != NULL; i++) {
+ ShowTime* st = malloc(sizeof(ShowTime));
+ st->time = g_strndup(timesArray[i], 5);
+ resultList = g_list_append(resultList, st);
}
- return -1;
+ g_strfreev(timesArray);
+
+ return resultList;
+}
+
+
+GList* getMovieList(xmlNodePtr movieSideDiv)
+{
+
+ xmlNodePtr nav = movieSideDiv->children;
+ xmlNodePtr tmp1, tmp2, tmp3 = NULL;
+
+ GList* resultList = NULL;
+
+ while(nav != NULL) {
+ tmp1 = getFirstSiblingByAttributeValue(nav, "class", "movie", 5);
+ if (tmp1 != NULL) { //is a movie
+ Movie* m = malloc(sizeof(Movie));
+ tmp2 = getFirstChildByAttributeValue(tmp1, "class", "name", 4);
+ if ((tmp2 != NULL) && (tmp2->children != NULL) && (tmp2->children->children != NULL)) { //<div class="name"><a><span/>
+ m->title = xmlNodeGetContent(tmp2->children->children);
+ }
+ tmp2 = getFirstChildByAttributeValue(tmp1, "class", "info", 4);
+ if (tmp2 != NULL) {
+ m->info = xmlNodeGetContent(tmp2);
+ }
+ tmp2 = getFirstChildByAttributeValue(tmp1, "class", "times", 5);
+ if (tmp2 != NULL) {
+ gchar* showtimesStr = xmlNodeGetContent(tmp2);
+ GList* showtimes = getShowtimes(showtimesStr);
+ free(showtimesStr);
+ m->showTimes = showtimes;
+ }
+ resultList = g_list_append(resultList, m);
+ }
+
+ nav = nav->next;
+ }
+
+ return resultList;
+}
+
+GList* getTheaterMovies(Theater* t)
+{
+
+ /*
+ * <div class="theater">
+ * <div class="desc"/>
+ * <div class="showtimes">
+ * <div class="show_left">
+ * <div class="movie/>
+ * ...
+ * </div>
+ * <div class="show_right">
+ * <div class="movie"/>
+ * ...
+ * </div>
+ * </div>
+ * </div>
+ */
+
+ GList* left = NULL;
+ GList* right = NULL;
+ xmlNodePtr showtimesDiv = getFirstChildByAttributeValue(t->data, "class", "showtimes", 9);
+ if (showtimesDiv != NULL) {
+
+ xmlNodePtr showLeft = getFirstChildByAttributeValue(showtimesDiv, "class", "show_left", 9);
+ if (showLeft != NULL)
+ left = getMovieList(showLeft);
+
+ xmlNodePtr showRight = getFirstChildByAttributeValue(showtimesDiv, "class", "show_right", 10);
+ if (showRight != NULL)
+ right = getMovieList(showRight);
+
+ return g_list_concat(left, right);
+
+ }
+
+
+}
+
+
+void deleteShowTime(ShowTime* st)
+{
+ free(st->time);
+ free(st);
+}
+
+
+void deleteShowTimeList(GList * showTimeList)
+{
+ g_list_foreach(showTimeList, (GFunc) deleteShowTime, NULL);
+ g_list_free(showTimeList);
+}
+
+
+void deleteMovie(Movie* m)
+{
+ free(m->title);
+ free(m->info);
+ deleteShowTimeList(m->showTimes);
+ free(m);
+}
+
+
+void deleteMovieList(GList* movieList)
+{
+ g_list_foreach(movieList, (GFunc) deleteMovie, NULL);
+ g_list_free(movieList);
+}
+
+
+void deleteTheater(Theater* t)
+{
+ free(t->name);
+ free(t->address);
+ free(t);
+}
+
+void deleteTheaterList(GList* theaterList)
+{
+ g_list_foreach(theaterList, (GFunc) deleteTheater, NULL);
+ g_list_free(theaterList);
+}
+
+
+void showTime(ShowTime* st, gpointer nothing)
+{
+ printf("%s ", st->time);
+}
+
+void showMovie(Movie* m, gpointer nothing)
+{
+ printf(" Title = %s\n", m->title);
+ printf(" Info = %s\n", m->info);
+ printf(" Schedule = ");
+ g_list_foreach(m->showTimes, (GFunc) showTime, NULL);
+ printf("\n");
+}
+
+
+void showTheater(Theater * t, gpointer nothing)
+{
+ printf("Name = %s\n", t->name);
+ printf("Info = %s\n", t->address);
+ GList* movieList = getTheaterMovies(t);
+ g_list_foreach(movieList, (GFunc) showMovie, NULL);
+ deleteMovieList(movieList);
+ printf("\n");
}
{
if (argc != 2) {
- printf("usage: gmovies file.html\n");
+ printf("usage: gmovies city_name\n");
exit(-1);
}
payload = rest_proxy_call_get_payload(call);
len = rest_proxy_call_get_payload_length(call);
-
- //write(1, payload, len);
- //printf("\n\n");
htmlDocPtr doc = htmlReadMemory(payload, len, "http://movies.google.com", "UTF-8", HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
xmlNodePtr root = xmlDocGetRootElement(doc);//html
//get the body node
- xmlNodePtr body = getSiblingByName(root->children, "body", 4);
-
- xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8); //the data is a div with id = results
-
- if (tempNode == NULL) {
- printf("results div not found.\n");
+ xmlNodePtr movieResults = getMovieResultsDiv(root);
+
+ if (movieResults == NULL) {
+ printf("NO RESULTS.\n");
exit(-1);
}
- tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
+ GList* theaterList = getTheaterList(movieResults);
- if (tempNode == NULL) {
- printf("movie_results div not found.\n");
- exit(-1);
- }
+ g_list_foreach(theaterList, (GFunc) showTheater, NULL);
- tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
-
- if (tempNode == NULL) {
- printf("movie_results class not found.\n");
- exit(-1);
- }
+ deleteTheaterList(theaterList);
- //look for theaters
- xmlNodePtr nav = tempNode->children;
- while(nav != NULL) {
- tempNode = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
- tempNode = getFirstChildByAttributeValue(tempNode, "class", "desc", 4);
- if (tempNode != NULL) {
- tempNode = getFirstChildByAttributeValue(tempNode, "class", "name", 4);
- printf("Info = %s\n", xmlNodeGetContent(tempNode->children->children));
- }
- nav = nav->next;
- }
+ xmlFreeDoc(doc);
+
+ g_object_unref(call);
+ g_object_unref(proxy);
exit(0);
-
- //get the form node inside body, the data is in the next node (a table)
- xmlNodePtr dataTable = getChildByName(body, "form", 4)->next;
-
-/*
- //tbody
- xmlNodePtr elem = dataTable;
- //xmlNodePtr nav = dataTable->children;
- int i = 0;
- while(nav != NULL) {
- elem = nav;
- if (startsTheatherData(elem)) {
- elem = elem->children; //td
- elem = elem->children; //a
- elem = elem->children; //b
-
- printf("Theather %d = %s\n", i++, xmlNodeGetContent(elem));
- printf("-------------------------------------------------\n");
-
- xmlNodePtr n1 = nav->next; //in this tr there is 4 td with 2 film data
- while(!startsTheatherData(n1) && !isSeparatorTR(n1)) {
- elem = n1->children->next; //the first td is for rating
- printf("%s\n", xmlNodeGetContent(elem->children->children));
- if (childrenCount(n1->children) > 2) {
- elem = elem->next->next; //the first td is for rating
- printf("%s\n", xmlNodeGetContent(elem->children->children));
- }
- n1 = n1->next;
- }
-
- printf("\n\n");
- }
- nav = nav->next;
- }
-*/
}
+