From ec13e711f3c3797eb222cdf4d1788030efbf3400 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Sun, 3 Jan 2010 17:05:07 +0100 Subject: [PATCH] IMDb plugin + downloader: parse IMDb alternative titles and use to match Using the international alternative titles to match the title filter makes finding foreign films much easier (or in some cases possible at all). If the movie is returned via a match of the alternative title, display it in the user interface as secondary text. --- src/imdb/imdb-line-parser.vala | 78 ++++++++++++++++++++ src/imdb/imdb-plaintext-downloader.vala | 14 +++- src/imdb/imdb-sqlite.vala | 101 ++++++++++++++++++++++++-- src/imdb/plaintext-downloader-interface.vala | 3 +- src/plugins/imdb-download-dialog.vala | 2 +- src/plugins/imdb-plugin.vala | 8 +- 6 files changed, 192 insertions(+), 14 deletions(-) diff --git a/src/imdb/imdb-line-parser.vala b/src/imdb/imdb-line-parser.vala index fd3875a..25d1658 100644 --- a/src/imdb/imdb-line-parser.vala +++ b/src/imdb/imdb-line-parser.vala @@ -133,3 +133,81 @@ class RatingLineParser : LineParser { sqlite.movie_set_rating (title, (int) (rating.to_double () * 10), votes.to_int ()); } } + +class AkaLineParser : LineParser { + enum AkaState { + HEADER, + NONE, + TITLE + } + AkaState state; + string title; + + public AkaLineParser (IMDbSqlite _sqlite) { + base (_sqlite); + state = AkaState.HEADER; + title = null; + } + + public override void parse_line (string line) { + if (state == AkaState.HEADER) { + if (line == "AKA TITLES LIST") title = line; + if (line == "===============" && title != null) + state = AkaState.NONE; + return; + } + + if (state == AkaState.NONE) { + // Skip empty lines + if (line == "") + return; + + // Skip series episodes + if (line[0] == '"') + return; + + // Parse error + if (line[0] == ' ') + return; + + try { + title = convert (line, -1, "utf-8", "latin1"); + } catch (ConvertError e) { + title = null; + return; + } + + if (skip_title (title)) + return; + + state = AkaState.TITLE; + } + + if (state == AkaState.TITLE) { + // Empty lines mark end of title + if (line == "") { + state = AkaState.NONE; + return; + } + + if (line.has_prefix (" (aka ")) { + if (skip_title (title)) + return; + + char* start = line.offset (8); + char* end = ((string) start).str ("))"); + if (end != null) + end[1] = '\0'; + + string aka; + try { + aka = convert ((string) start, -1, "utf-8", "latin1"); + } catch (ConvertError e) { + return; + } + + sqlite.add_aka (title, aka); + } + } + } +} diff --git a/src/imdb/imdb-plaintext-downloader.vala b/src/imdb/imdb-plaintext-downloader.vala index 821c9a2..a978527 100644 --- a/src/imdb/imdb-plaintext-downloader.vala +++ b/src/imdb/imdb-plaintext-downloader.vala @@ -72,6 +72,7 @@ class IMDbDownloadServer : Object, IMDbDownloader { var movie_parser = new MovieLineParser (sqlite); var genre_parser = new GenreLineParser (sqlite); var rating_parser = new RatingLineParser (sqlite); + var aka_parser = new AkaLineParser (sqlite); var downloader = new IMDbFtpDownloader (cancellable); downloader.progress_changed.connect (on_progress_changed); @@ -80,16 +81,21 @@ class IMDbDownloadServer : Object, IMDbDownloader { description_changed ("Downloading movie list ..."); downloader.download (url + "movies.list.gz", movie_parser); } - percent_finished = 33; + percent_finished = 25; if (GENRES in flags) { description_changed ("Downloading genre data ..."); downloader.download (url + "genres.list.gz", genre_parser); } - percent_finished = 66; + percent_finished = 50; if (RATINGS in flags) { description_changed ("Downloading rating data ..."); downloader.download (url + "ratings.list.gz", rating_parser); } + percent_finished = 75; + if (AKAS in flags) { + description_changed ("Downloading alternative titles ..."); + downloader.download (url + "aka-titles.list.gz", aka_parser); + } } catch (Error e2) { if (e2 is IOError.CANCELLED) stdout.printf ("Download cancelled.\n"); @@ -98,6 +104,8 @@ class IMDbDownloadServer : Object, IMDbDownloader { } description_changed ("Creating indices ..."); + if (AKAS in flags) + sqlite.create_aka_index (); if (MOVIES in flags) sqlite.create_votes_index (); @@ -115,7 +123,7 @@ class IMDbDownloadServer : Object, IMDbDownloader { } private void on_progress_changed (int percent) { - progress (percent_finished + percent / 3); + progress (percent_finished + percent / 4); } private void timeout_quit () { diff --git a/src/imdb/imdb-sqlite.vala b/src/imdb/imdb-sqlite.vala index d26564b..ee655d5 100644 --- a/src/imdb/imdb-sqlite.vala +++ b/src/imdb/imdb-sqlite.vala @@ -22,7 +22,7 @@ class IMDbSqlite : Object { Database db; List genres; - public delegate void ReceiveMovieFunction (string title, int year, int rating, int genres); + public delegate void ReceiveMovieFunction (string title, string? aka, int year, int rating, int genres); public IMDbSqlite (string filename) { int rc; @@ -126,6 +126,48 @@ class IMDbSqlite : Object { return 0; } + public int add_aka (string title, string aka) { + int rowid; + + if (!movie_exists (title, out rowid)) + return 1; + + string sql = "INSERT INTO Akas(Aka, TitleID) VALUES (\"%s\", %d);".printf (aka, rowid); + int rc; + rc = db.exec (sql, callback, null); + if (rc != Sqlite.OK) { + stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ()); + return 1; + } + + return 0; + } + + public bool movie_exists (string title, out int rowid = null) { + string sql = "SELECT rowid FROM Movies WHERE Title=\"%s\"".printf (title); + Statement stmt; + int rc; + int count = 0; + + rc = db.prepare_v2 (sql, -1, out stmt); + if (rc != Sqlite.OK) { + stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ()); + return false; + } + + do { + rc = stmt.step (); + if (rc == Sqlite.ROW) { + if (&rowid != null) { + rowid = stmt.column_int (0); + } + return true; + } + } while (rc == Sqlite.ROW); + + return false; + } + public int clear () { int rc; @@ -133,7 +175,9 @@ class IMDbSqlite : Object { "DROP TABLE IF EXISTS Movies;" + "CREATE TABLE Movies (Title TEXT PRIMARY KEY COLLATE NOCASE, Year INTEGER, Rating INTEGER, Votes INTEGER NOT NULL DEFAULT 0, Genres INTEGER NOT NULL DEFAULT 0);" + "DROP TABLE IF EXISTS Genres;" + - "CREATE TABLE Genres (Bit INTEGER PRIMARY KEY, Genre TEXT NOT NULL);", + "CREATE TABLE Genres (Bit INTEGER PRIMARY KEY, Genre TEXT NOT NULL);" + + "DROP TABLE IF EXISTS Akas;" + + "CREATE TABLE Akas (Aka TEXT NOT NULL COLLATE NOCASE, TitleID INTEGER NOT NULL);", callback, null); if (rc != Sqlite.OK) { stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ()); @@ -143,6 +187,18 @@ class IMDbSqlite : Object { return 0; } + public int create_aka_index () { + int rc; + + rc = db.exec ("CREATE INDEX AkasAka ON Akas(Aka);", callback, null); + if (rc != Sqlite.OK) { + stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ()); + return 1; + } + + return 0; + } + public int create_votes_index () { int rc; @@ -159,6 +215,7 @@ class IMDbSqlite : Object { public async int query (MovieFilter filter, ReceiveMovieFunction receive_movie, int limit, Cancellable? cancellable) { var sql = "SELECT Title, Year, Rating, Genres FROM Movies"; var sep = " WHERE "; + string match = null; Statement stmt; int rc; @@ -167,10 +224,12 @@ class IMDbSqlite : Object { db.progress_handler (1000, progress_handler); if (filter.title != null && filter.title != "") { - if ("*" in filter.title) - sql += sep + "Title GLOB \"%s (*)\"".printf (filter.title); - else - sql += sep + "Title LIKE \"%s%%\"".printf (filter.title); + if ("*" in filter.title) { + match = "GLOB \"%s (*)\"".printf (filter.title); + } else { + match = "LIKE \"%s%%\"".printf (filter.title); + } + sql += sep + "(Title %s OR rowid IN (SELECT TitleID FROM Akas WHERE Aka %s))".printf (match, match); sep = " AND "; } if (filter.year_min > 0) { @@ -208,7 +267,13 @@ class IMDbSqlite : Object { string title = stmt.column_text (0); int rating = stmt.column_int (2); int genres = stmt.column_int (3); - receive_movie (strip_year (title, year), year, rating, genres); + string aka = null; + if (match != null && !(filter.matches_title (strip_year (title, year)))) { + aka = movie_aka (title, match); + if (aka != null) + aka = strip_year (aka, year); + } + receive_movie (strip_year (title, year), aka, year, rating, genres); } } while (rc == Sqlite.ROW); @@ -216,6 +281,28 @@ class IMDbSqlite : Object { return 0; } + private string movie_aka (string title, string match) { + string sql = "SELECT Aka FROM Akas WHERE (TitleID = (SELECT rowid FROM Movies WHERE Title = \"%s\") AND Aka %s) LIMIT 1;".printf (title, match); + Statement stmt; + int rc; + string aka = null; + + rc = db.prepare_v2 (sql, -1, out stmt); + if (rc != Sqlite.OK) { + stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ()); + return null; + } + + do { + rc = stmt.step (); + if (rc == Sqlite.ROW) { + aka = stmt.column_text (0); + } + } while (rc == Sqlite.ROW); + + return aka; + } + private int progress_handler () { ((MainContext) null).iteration (false); return (int) _cancellable.is_cancelled (); diff --git a/src/imdb/plaintext-downloader-interface.vala b/src/imdb/plaintext-downloader-interface.vala index 0aa840a..af7eae3 100644 --- a/src/imdb/plaintext-downloader-interface.vala +++ b/src/imdb/plaintext-downloader-interface.vala @@ -8,7 +8,8 @@ public interface IMDbDownloader { public const int MOVIES = 0x1; public const int GENRES = 0x2; public const int RATINGS = 0x4; - public const int PLOTS = 0x8; + public const int AKAS = 0x4; + public const int PLOTS = 0x10; public const int SERIES = 0x100; public const int TV = 0x200; public const int VIDEO = 0x400; diff --git a/src/plugins/imdb-download-dialog.vala b/src/plugins/imdb-download-dialog.vala index ba52c8d..701195c 100644 --- a/src/plugins/imdb-download-dialog.vala +++ b/src/plugins/imdb-download-dialog.vala @@ -35,7 +35,7 @@ class IMDbDownloadDialog : Note { server.Progress += this.on_progress; server.DescriptionChanged += this.on_description_changed; - server.download (mirror, IMDbDownloader.MOVIES | IMDbDownloader.GENRES | IMDbDownloader.RATINGS); + server.download (mirror, IMDbDownloader.MOVIES | IMDbDownloader.GENRES | IMDbDownloader.RATINGS | IMDbDownloader.AKAS); } catch (DBus.Error e) { warning ("Failed to invoke IMDb downloader: %s", e.message); } diff --git a/src/plugins/imdb-plugin.vala b/src/plugins/imdb-plugin.vala index d03a0eb..b8b33ea 100644 --- a/src/plugins/imdb-plugin.vala +++ b/src/plugins/imdb-plugin.vala @@ -174,14 +174,18 @@ class IMDBSource : MovieSource { yield sqlite.query (filter, receive_movie, limit, cancellable); } - private void receive_movie (string title, int year, int rating, int genres) { + private void receive_movie (string title, string? aka, int year, int rating, int genres) { Movie movie = new Movie (); movie.title = title; movie.year = year; movie.rating = rating; movie.genres.field = genres; // TODO - depending on settings, this could be something else, like director info or runtime - movie.secondary = movie.genres.to_string (); + if (aka != null) { + movie.secondary = "aka \"%s\" - %s".printf (aka, movie.genres.to_string ()); + } else { + movie.secondary = movie.genres.to_string (); + } _get_callback (movie); } -- 1.7.9.5