abstract class LineParser { internal unowned IMDbSqlite sqlite; public LineParser (IMDbSqlite _sqlite) { sqlite = _sqlite; } public abstract void parse_line (string line); internal bool skip_title (string title) { if (title.has_suffix ("(TV)")) { return true; } if (title.has_suffix ("(V)")) { return true; } if (title.has_suffix ("(VG)")) { return true; } return false; } } class MovieLineParser : LineParser { Regex re_movie; public MovieLineParser (IMDbSqlite _sqlite) { base (_sqlite); try { re_movie = new Regex ("^([^\t]+)\t+([0-9]+)$"); } catch (RegexError e) { critical ("Failed to initialize regex: %s\n", e.message); } } public override void parse_line (string line) { MatchInfo matchinfo; // Skip series episodes if (line[0] == '"') return; if (!re_movie.match(line, 0, out matchinfo)) return; string title; string year = matchinfo.fetch (2); try { title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1"); } catch (ConvertError e) { return; } if (skip_title (title)) return; sqlite.add_movie (title, year.to_int ()); } } class GenreLineParser : LineParser { Regex re_genre; public GenreLineParser (IMDbSqlite _sqlite) { base (_sqlite); try { re_genre = new Regex ("^([^\t]+)\t+([A-Za-z-]+)$"); } catch (RegexError e) { critical ("Failed to initialize regex: %s\n", e.message); } } public override void parse_line (string line) { MatchInfo matchinfo; // Skip series episodes if (line[0] == '"') return; if (!re_genre.match(line, 0, out matchinfo)) return; string title; string genre = matchinfo.fetch (2); try { title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1"); } catch (ConvertError e) { return; } sqlite.movie_add_genre (title, genre); } } class RatingLineParser : LineParser { enum RatingState { HEADER, NONE } RatingState state; Regex re_rating; public RatingLineParser (IMDbSqlite _sqlite) { base (_sqlite); state = RatingState.HEADER; try { re_rating = new Regex ("^ .+ +([0-9]+) +([0-9.]+) +(.+)$"); } catch (RegexError e) { critical ("Failed to initialize regex: %s\n", e.message); } } public override void parse_line (string line) { if (state == RatingState.HEADER) { if (line == "MOVIE RATINGS REPORT") state = RatingState.NONE; return; } if (state != RatingState.NONE) return; MatchInfo matchinfo; // Skip series episodes if (line[0] == '"') return; if (!re_rating.match(line, 0, out matchinfo)) return; string title; string votes = matchinfo.fetch (1); string rating = matchinfo.fetch (2); try { title = convert(matchinfo.fetch (3), -1, "utf-8", "latin1"); } catch (ConvertError e) { return; } // Skip series episodes if (title[0] == '"') return; if (skip_title (title)) return; sqlite.movie_set_rating (title, (int) (rating.to_double () * 10), votes.to_int ()); } } class AkaLineParser : LineParser { enum AkaState { HEADER, NONE, TITLE } AkaState state; string title; public AkaLineParser (IMDbSqlite _sqlite) { base (_sqlite); state = AkaState.HEADER; title = null; } public override void parse_line (string line) { if (state == AkaState.HEADER) { if (line == "AKA TITLES LIST") title = line; if (line == "===============" && title != null) state = AkaState.NONE; return; } if (state == AkaState.NONE) { // Skip empty lines if (line == "") return; // Skip series episodes if (line[0] == '"') return; // Parse error if (line[0] == ' ') return; try { title = convert (line, -1, "utf-8", "latin1"); } catch (ConvertError e) { title = null; return; } if (skip_title (title)) return; state = AkaState.TITLE; } if (state == AkaState.TITLE) { // Empty lines mark end of title if (line == "") { state = AkaState.NONE; return; } if (line.has_prefix (" (aka ")) { if (skip_title (title)) return; char* start = line.offset (8); char* end = ((string) start).str ("))"); if (end != null) end[1] = '\0'; string aka; try { aka = convert ((string) start, -1, "utf-8", "latin1"); } catch (ConvertError e) { return; } sqlite.add_aka (title, aka); } } } } class PlotLineParser : LineParser { enum PlotState { HEADER, NONE, TITLE } string title; string plot; PlotState state; public PlotLineParser (IMDbSqlite _sqlite) { base (_sqlite); state = PlotState.HEADER; title = null; } public override void parse_line (string line) { if (state == PlotState.HEADER) { if (line == "PLOT SUMMARIES LIST") title = line; if (line == "===================" && title != null) state = PlotState.NONE; return; } // Skip empty lines if (line == "") return; if (state == PlotState.NONE) { if (line.has_prefix ("MV: ")) { // Skip series episodes if (line[4] == '"') return; try { title = convert (line.offset (4), -1, "utf-8", "latin1"); } catch (ConvertError e) { stderr.printf ("Error converting title to UTF-8\n"); title = null; return; } if (skip_title (title)) return; state = PlotState.TITLE; plot = ""; } return; } if (state == PlotState.TITLE) { if (line.has_prefix ("PL: ")) { if (skip_title (title)) return; try { if (plot != "") plot += " "; plot += convert (line.offset (4), -1, "utf-8", "latin1"); } catch (ConvertError e) { stderr.printf ("Error converting plot for \"%s\" to UTF-8\n", title); plot = ""; return; } } // BY: tag marks end of plot if (line.has_prefix ("BY: ")) { string author; try { author = convert (line.offset (4), -1, "utf-8", "latin1"); } catch (ConvertError e) { stderr.printf ("Error converting plot author for \"%s\" to UTF-8\n", title); author = null; } sqlite.add_plot (title, plot, author); state = PlotState.NONE; return; } } } } class PersonParser : LineParser { enum PersonState { HEADER, NONE, PERSON } enum PersonType { NONE, ACTOR, ACTRESS, DIRECTOR, WRITER } PersonState state; PersonType type; string name; public PersonParser (IMDbSqlite _sqlite) { base (_sqlite); reset (); } public void reset () { state = PersonState.HEADER; type = PersonType.NONE; name = null; } public override void parse_line (string line) { if (state == PersonState.HEADER) { if (line == "THE ACTORS LIST") type = PersonType.ACTOR; else if (line == "THE ACTRESSES LIST") type = PersonType.ACTRESS; else if (line == "THE DIRECTORS LIST") type = PersonType.DIRECTOR; else if (line == "THE WRITERS LIST") type = PersonType.WRITER; else if (line == "----\t\t\t------" && type != PersonType.NONE) state = PersonState.NONE; return; } // Skip empty lines if (line == "") { state = PersonState.NONE; name = null; return; } if (state == PersonState.NONE) { if (line.has_prefix ("\t")) { stderr.printf ("Invalid person entry: %s\n", line); error ("EXIT\n"); return; } unowned string title = line.rstr ("\t"); if (title == null) return; title = title.offset (1); char* end = line.str ("\t"); if (end != null) end[0] = '\0'; try { name = convert (line, -1, "utf-8", "latin1"); } catch (ConvertError e) { stderr.printf ("Error converting name to UTF-8\n"); name = null; return; } sqlite.add_person (name); parse_title (convert (title, -1, "utf-8", "latin1")); state = PersonState.PERSON; return; } if (state == PersonState.PERSON) { if (line.has_prefix ("\t\t\t")) parse_title (line.offset (3)); else stderr.printf ("\t???: %s\n", line); } } private void parse_title (string title) { // Skip series episodes if (title[0] == '"') return; char* end = title.str (" "); unowned string next; if (end != null) { next = ((string) end).offset (2); end[0] = '\0'; } else { return; } if (skip_title (title)) return; if (type == PersonType.ACTOR || type == PersonType.ACTRESS) { string character = null; int number = 0; string info = null; do { unowned string current = next; end = current.str (" "); if (end != null) { next = ((string) end).offset (2); end[0] = '\0'; } if (current.has_prefix ("[")) character = current.substring (1, current.length - 2); if (current.has_prefix ("<")) number = current.offset (1).to_int (); if (current.has_prefix ("(")) info = current; } while (end != null); sqlite.add_actor (name, title, info, character, number); } } }