X-Git-Url: https://vcs.maemo.org/git/?a=blobdiff_plain;f=examples%2Fgmovies.c;h=577139195d731c4d995d233c05b7a999c98c480b;hb=b771798e893986c83f6f884a95040d6174e61602;hp=673004952c6f7db70229d576d038befc2d5c95a4;hpb=0d1ae54a26dee626392d2094afec47c0dac855e0;p=maevies diff --git a/examples/gmovies.c b/examples/gmovies.c index 6730049..5771391 100644 --- a/examples/gmovies.c +++ b/examples/gmovies.c @@ -1,6 +1,5 @@ -/* Parses a Google movies web (previously downloaded on a file) - * using libxml2. Examples of Google movies web files can be - * found in the gmovies_data directory. +/* Queries Google movies for the theaters of a city and parses + * the response using libxml2. */ #include @@ -11,6 +10,30 @@ #include +typedef struct struct_theater +{ + gchar* name; + gchar* address; + xmlNodePtr data; //points to the tree node with the theater info, including movies +} Theater; + + +typedef struct struct_showtime +{ + gchar* time; +} ShowTime; + + +typedef struct struct_movie +{ + gchar* title; + int rating; + gchar* info; + GList* showTimes; +} Movie; + + +/** Search for a sibling node by the name of the sibling node */ xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen) { @@ -23,34 +46,43 @@ xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen) } +/** Search a child node by its node name */ xmlNodePtr getChildByName(xmlNodePtr node, xmlChar* name, int nameLen) { return getSiblingByName(node->children, name, nameLen); } +/** Search the first sibling node that has an attribute 'attr' + * with the value 'attrValue' */ xmlNodePtr getFirstSiblingByAttributeValue( xmlNodePtr sibling, xmlChar* attr, xmlChar * attrValue, int attrValueLen) { xmlNodePtr tempNode = sibling; + xmlNodePtr result = NULL; - while(tempNode != NULL) { + while ((tempNode != NULL) && (result == NULL)) { xmlChar* value = xmlGetProp(tempNode, attr); - if ((value != NULL) && (strncmp(value, attrValue, attrValueLen)) == 0) - return tempNode; + if (value != NULL) { + if (strncmp(value, attrValue, attrValueLen) == 0) { + result = tempNode; + } + free(value); + } tempNode = tempNode->next; } - return NULL; + return result; } - +/** Search the first child node that has an attribute 'attr' with + * value 'attrValue' */ xmlNodePtr getFirstChildByAttributeValue( xmlNodePtr node, xmlChar* attr, xmlChar * attrValue, int attrValueLen) { return getFirstSiblingByAttributeValue(node->children, attr, attrValue, attrValueLen); } - +/** Advances N sibling nodes in the node list */ xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings) { xmlNodePtr r = node; @@ -63,10 +95,6 @@ xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings) return r; } -int isSeparatorTR(xmlNodePtr node) -{ - return ((node != NULL) && (childrenCount(node) == 1)); -} int childrenCount(xmlNodePtr node) { @@ -81,18 +109,250 @@ int childrenCount(xmlNodePtr node) } -int startsTheatherData(xmlNodePtr node) +/** Search the
with the results and returns it, or NULL + * if it couldn't be found */ +xmlNodePtr getMovieResultsDiv(xmlNodePtr root) +{ + // + xmlNodePtr body = getSiblingByName(root->children, "body", 4); + + //
+ xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8); + + if (tempNode == NULL) { + //no results + return NULL; + } + + //
+ tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14); + + if (tempNode == NULL) { + //no results + return NULL; + } + + //
+ tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14); + + if (tempNode == NULL) { + //no results + return NULL; + } + + return tempNode; + +} + + +/** Parses the results and returns a list with all the theaters. + * Theater info is parsed and returned as Theater 'objects", the movie + * info is not parsed */ +GList* getTheaterList(xmlNodePtr movieResults) { + //
... + xmlNodePtr nav = movieResults->children; + xmlNodePtr tmp1, tmp2, tmp3 = NULL; - if (strncmp(node->name, "tr", 2) == 0) { - xmlNodePtr td = node->children; - if ((td != NULL) && (strncmp(td->name, "td", 2) == 0)) { - xmlChar* value = xmlGetProp(td, "colspan"); - return ((value != NULL) && (strncmp(value, "4", 1)) == 0); + GList* resultList = NULL; + + while(nav != NULL) { + tmp1 = getFirstSiblingByAttributeValue(nav, "class", "theater", 7); + + /* + *
+ *
+ *
+ *
+ *
+ *
+ */ + + if (tmp1 != NULL) { //its theater data + Theater* t = malloc(sizeof(Theater)); + tmp2 = getFirstChildByAttributeValue(tmp1, "class", "desc", 4); + if (tmp2 != NULL) { //has desc + tmp3 = getFirstChildByAttributeValue(tmp2, "class", "name", 4); + if ((tmp3 != NULL) && (tmp3->children != NULL) && (tmp3->children->children != NULL)) { //
+ t->name = xmlNodeGetContent(tmp3->children->children); + } + tmp3 = getFirstChildByAttributeValue(tmp2, "class", "info", 4); + if (tmp3 != NULL) { //
+ t->address = xmlNodeGetContent(tmp3); + } + } + t->data = tmp1; + resultList = g_list_append(resultList, t); } + nav = nav->next; + } + + return resultList; + +} + +GList* getShowtimes(gchar* times) +{ + GList* resultList = NULL; + + gchar** timesArray = g_strsplit(times, " ", -1); + + int i = 0; + for(i=0; timesArray[i] != NULL; i++) { + ShowTime* st = malloc(sizeof(ShowTime)); + st->time = g_strndup(timesArray[i], 5); + resultList = g_list_append(resultList, st); } - return -1; + g_strfreev(timesArray); + + return resultList; +} + + +GList* getMovieList(xmlNodePtr movieSideDiv) +{ + + xmlNodePtr nav = movieSideDiv->children; + xmlNodePtr tmp1, tmp2, tmp3 = NULL; + + GList* resultList = NULL; + + while(nav != NULL) { + tmp1 = getFirstSiblingByAttributeValue(nav, "class", "movie", 5); + if (tmp1 != NULL) { //is a movie + Movie* m = malloc(sizeof(Movie)); + tmp2 = getFirstChildByAttributeValue(tmp1, "class", "name", 4); + if ((tmp2 != NULL) && (tmp2->children != NULL) && (tmp2->children->children != NULL)) { //
+ m->title = xmlNodeGetContent(tmp2->children->children); + } + tmp2 = getFirstChildByAttributeValue(tmp1, "class", "info", 4); + if (tmp2 != NULL) { + m->info = xmlNodeGetContent(tmp2); + } + tmp2 = getFirstChildByAttributeValue(tmp1, "class", "times", 5); + if (tmp2 != NULL) { + gchar* showtimesStr = xmlNodeGetContent(tmp2); + GList* showtimes = getShowtimes(showtimesStr); + free(showtimesStr); + m->showTimes = showtimes; + } + resultList = g_list_append(resultList, m); + } + + nav = nav->next; + } + + return resultList; +} + +GList* getTheaterMovies(Theater* t) +{ + + /* + *
+ *
+ *
+ *
+ *
+ *
+ * ... + *
+ *
+ *
+ */ + + GList* left = NULL; + GList* right = NULL; + xmlNodePtr showtimesDiv = getFirstChildByAttributeValue(t->data, "class", "showtimes", 9); + if (showtimesDiv != NULL) { + + xmlNodePtr showLeft = getFirstChildByAttributeValue(showtimesDiv, "class", "show_left", 9); + if (showLeft != NULL) + left = getMovieList(showLeft); + + xmlNodePtr showRight = getFirstChildByAttributeValue(showtimesDiv, "class", "show_right", 10); + if (showRight != NULL) + right = getMovieList(showRight); + + return g_list_concat(left, right); + + } + + +} + + +void deleteShowTime(ShowTime* st) +{ + free(st->time); + free(st); +} + + +void deleteShowTimeList(GList * showTimeList) +{ + g_list_foreach(showTimeList, (GFunc) deleteShowTime, NULL); + g_list_free(showTimeList); +} + + +void deleteMovie(Movie* m) +{ + free(m->title); + free(m->info); + deleteShowTimeList(m->showTimes); + free(m); +} + + +void deleteMovieList(GList* movieList) +{ + g_list_foreach(movieList, (GFunc) deleteMovie, NULL); + g_list_free(movieList); +} + + +void deleteTheater(Theater* t) +{ + free(t->name); + free(t->address); + free(t); +} + +void deleteTheaterList(GList* theaterList) +{ + g_list_foreach(theaterList, (GFunc) deleteTheater, NULL); + g_list_free(theaterList); +} + + +void showTime(ShowTime* st, gpointer nothing) +{ + printf("%s ", st->time); +} + +void showMovie(Movie* m, gpointer nothing) +{ + printf(" Title = %s\n", m->title); + printf(" Info = %s\n", m->info); + printf(" Schedule = "); + g_list_foreach(m->showTimes, (GFunc) showTime, NULL); + printf("\n"); +} + + +void showTheater(Theater * t, gpointer nothing) +{ + printf("Name = %s\n", t->name); + printf("Info = %s\n", t->address); + GList* movieList = getTheaterMovies(t); + g_list_foreach(movieList, (GFunc) showMovie, NULL); + deleteMovieList(movieList); + printf("\n"); } @@ -100,7 +360,7 @@ int main (int argc, char ** argv) { if (argc != 2) { - printf("usage: gmovies file.html\n"); + printf("usage: gmovies city_name\n"); exit(-1); } @@ -125,85 +385,31 @@ int main (int argc, char ** argv) payload = rest_proxy_call_get_payload(call); len = rest_proxy_call_get_payload_length(call); - - //write(1, payload, len); - //printf("\n\n"); htmlDocPtr doc = htmlReadMemory(payload, len, "http://movies.google.com", "UTF-8", HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); xmlNodePtr root = xmlDocGetRootElement(doc);//html //get the body node - xmlNodePtr body = getSiblingByName(root->children, "body", 4); - - xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8); //the data is a div with id = results - - if (tempNode == NULL) { - printf("results div not found.\n"); + xmlNodePtr movieResults = getMovieResultsDiv(root); + + if (movieResults == NULL) { + printf("NO RESULTS.\n"); exit(-1); } - tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14); + GList* theaterList = getTheaterList(movieResults); - if (tempNode == NULL) { - printf("movie_results div not found.\n"); - exit(-1); - } + g_list_foreach(theaterList, (GFunc) showTheater, NULL); - tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14); - - if (tempNode == NULL) { - printf("movie_results class not found.\n"); - exit(-1); - } + deleteTheaterList(theaterList); - //look for theaters - xmlNodePtr nav = tempNode->children; - while(nav != NULL) { - tempNode = getFirstSiblingByAttributeValue(nav, "class", "theater", 7); - tempNode = getFirstChildByAttributeValue(tempNode, "class", "desc", 4); - if (tempNode != NULL) { - tempNode = getFirstChildByAttributeValue(tempNode, "class", "name", 4); - printf("Info = %s\n", xmlNodeGetContent(tempNode->children->children)); - } - nav = nav->next; - } + xmlFreeDoc(doc); + + g_object_unref(call); + g_object_unref(proxy); exit(0); - - //get the form node inside body, the data is in the next node (a table) - xmlNodePtr dataTable = getChildByName(body, "form", 4)->next; - -/* - //tbody - xmlNodePtr elem = dataTable; - //xmlNodePtr nav = dataTable->children; - int i = 0; - while(nav != NULL) { - elem = nav; - if (startsTheatherData(elem)) { - elem = elem->children; //td - elem = elem->children; //a - elem = elem->children; //b - - printf("Theather %d = %s\n", i++, xmlNodeGetContent(elem)); - printf("-------------------------------------------------\n"); - - xmlNodePtr n1 = nav->next; //in this tr there is 4 td with 2 film data - while(!startsTheatherData(n1) && !isSeparatorTR(n1)) { - elem = n1->children->next; //the first td is for rating - printf("%s\n", xmlNodeGetContent(elem->children->children)); - if (childrenCount(n1->children) > 2) { - elem = elem->next->next; //the first td is for rating - printf("%s\n", xmlNodeGetContent(elem->children->children)); - } - n1 = n1->next; - } - - printf("\n\n"); - } - nav = nav->next; - } -*/ } +