From d937a4994b54e88ce8cf875e4b30b13112ecac6a Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 13 Aug 2010 22:28:56 +0200 Subject: [PATCH] IMDb line parser: add parser for actors and actresses lists --- src/imdb/imdb-line-parser.vala | 133 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/src/imdb/imdb-line-parser.vala b/src/imdb/imdb-line-parser.vala index 7963fda..43b6d96 100644 --- a/src/imdb/imdb-line-parser.vala +++ b/src/imdb/imdb-line-parser.vala @@ -312,3 +312,136 @@ class PlotLineParser : LineParser { } } } + +class PersonParser : LineParser { + enum PersonState { + HEADER, + NONE, + PERSON + } + enum PersonType { + NONE, + ACTOR, + ACTRESS, + DIRECTOR, + WRITER + } + PersonState state; + PersonType type; + string name; + + public PersonParser (IMDbSqlite _sqlite) { + base (_sqlite); + reset (); + } + + public void reset () { + state = PersonState.HEADER; + type = PersonType.NONE; + name = null; + } + + public override void parse_line (string line) { + if (state == PersonState.HEADER) { + if (line == "THE ACTORS LIST") type = PersonType.ACTOR; + else if (line == "THE ACTRESSES LIST") type = PersonType.ACTRESS; + else if (line == "THE DIRECTORS LIST") type = PersonType.DIRECTOR; + else if (line == "THE WRITERS LIST") type = PersonType.WRITER; + else if (line == "----\t\t\t------" && type != PersonType.NONE) + state = PersonState.NONE; + + return; + } + + // Skip empty lines + if (line == "") { + state = PersonState.NONE; + name = null; + + return; + } + + if (state == PersonState.NONE) { + if (line.has_prefix ("\t")) { + stderr.printf ("Invalid person entry: %s\n", line); + error ("EXIT\n"); + return; + } + + unowned string title = line.rstr ("\t"); + if (title == null) + return; + title = title.offset (1); + + char* end = line.str ("\t"); + if (end != null) + end[0] = '\0'; + + try { + name = convert (line, -1, "utf-8", "latin1"); + } catch (ConvertError e) { + stderr.printf ("Error converting name to UTF-8\n"); + name = null; + + return; + } + + sqlite.add_person (name); + parse_title (convert (title, -1, "utf-8", "latin1")); + + state = PersonState.PERSON; + + return; + } + + if (state == PersonState.PERSON) { + if (line.has_prefix ("\t\t\t")) + parse_title (line.offset (3)); + else + stderr.printf ("\t???: %s\n", line); + } + } + + private void parse_title (string title) { + // Skip series episodes + if (title[0] == '"') + return; + + char* end = title.str (" "); + unowned string next; + if (end != null) { + next = ((string) end).offset (2); + end[0] = '\0'; + } else { + return; + } + + if (skip_title (title)) + return; + + + if (type == PersonType.ACTOR || type == PersonType.ACTRESS) { + string character = null; + int number = 0; + string info = null; + do { + unowned string current = next; + end = current.str (" "); + if (end != null) { + next = ((string) end).offset (2); + end[0] = '\0'; + } + + if (current.has_prefix ("[")) + character = current.substring (1, current.length - 2); + if (current.has_prefix ("<")) + number = current.offset (1).to_int (); + if (current.has_prefix ("(")) + info = current; + } while (end != null); + + sqlite.add_actor (name, title, info, character, number); + } + } +} + -- 1.7.9.5