IMDb plugin + downloader: parse IMDb alternative titles and use to match
[cinaest] / src / imdb / imdb-line-parser.vala
1 abstract class LineParser {
2         internal unowned IMDbSqlite sqlite;
3
4         public LineParser (IMDbSqlite _sqlite) {
5                 sqlite = _sqlite;
6         }
7
8         public abstract void parse_line (string line);
9
10         internal bool skip_title (string title) {
11                 if (title.has_suffix ("(TV)")) {
12                         return true;
13                 }
14                 if (title.has_suffix ("(V)")) {
15                         return true;
16                 }
17                 if (title.has_suffix ("(VG)")) {
18                         return true;
19                 }
20                 return false;
21         }
22 }
23
24 class MovieLineParser : LineParser {
25         Regex re_movie;
26
27         public MovieLineParser (IMDbSqlite _sqlite) {
28                 base (_sqlite);
29                 try {
30                         re_movie = new Regex ("^([^\t]+)\t+([0-9]+)$");
31                 } catch (RegexError e) {
32                         critical ("Failed to initialize regex: %s\n", e.message);
33                 }
34         }
35
36         public override void parse_line (string line) {
37                 MatchInfo matchinfo;
38
39                 // Skip series episodes
40                 if (line[0] == '"')
41                         return;
42
43                 if (!re_movie.match(line, 0, out matchinfo))
44                         return;
45
46                 string title;
47                 string year = matchinfo.fetch (2);
48                 try {
49                         title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
50                 } catch (ConvertError e) {
51                         return;
52                 }
53
54                 if (skip_title (title))
55                         return;
56
57                 sqlite.add_movie (title, year.to_int ());
58         }
59 }
60
61 class GenreLineParser : LineParser {
62         Regex re_genre;
63
64         public GenreLineParser (IMDbSqlite _sqlite) {
65                 base (_sqlite);
66                 try {
67                         re_genre = new Regex ("^([^\t]+)\t+([A-Za-z-]+)$");
68                 } catch (RegexError e) {
69                         critical ("Failed to initialize regex: %s\n", e.message);
70                 }
71         }
72
73         public override void parse_line (string line) {
74                 MatchInfo matchinfo;
75
76                 // Skip series episodes
77                 if (line[0] == '"')
78                         return;
79
80                 if (!re_genre.match(line, 0, out matchinfo))
81                         return;
82
83                 string title;
84                 string genre = matchinfo.fetch (2);
85                 try {
86                         title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
87                 } catch (ConvertError e) {
88                         return;
89                 }
90
91                 sqlite.movie_add_genre (title, genre);
92         }
93 }
94
95 class RatingLineParser : LineParser {
96         Regex re_rating;
97
98         public RatingLineParser (IMDbSqlite _sqlite) {
99                 base (_sqlite);
100                 try {
101                         re_rating = new Regex ("^      .+ +([0-9]+) +([0-9.]+) +(.+)$");
102                 } catch (RegexError e) {
103                         critical ("Failed to initialize regex: %s\n", e.message);
104                 }
105         }
106
107         public override void parse_line (string line) {
108                 MatchInfo matchinfo;
109
110                 // Skip series episodes
111                 if (line[0] == '"')
112                         return;
113
114                 if (!re_rating.match(line, 0, out matchinfo))
115                         return;
116
117                 string title;
118                 string votes = matchinfo.fetch (1);
119                 string rating = matchinfo.fetch (2);
120                 try {
121                         title = convert(matchinfo.fetch (3), -1, "utf-8", "latin1");
122                 } catch (ConvertError e) {
123                         return;
124                 }
125
126                 // Skip series episodes
127                 if (title[0] == '"')
128                         return;
129
130                 if (skip_title (title))
131                         return;
132
133                 sqlite.movie_set_rating (title, (int) (rating.to_double () * 10), votes.to_int ());
134         }
135 }
136
137 class AkaLineParser : LineParser {
138         enum AkaState {
139                 HEADER,
140                 NONE,
141                 TITLE
142         }
143         AkaState state;
144         string title;
145
146         public AkaLineParser (IMDbSqlite _sqlite) {
147                 base (_sqlite);
148                 state = AkaState.HEADER;
149                 title = null;
150         }
151
152         public override void parse_line (string line) {
153                 if (state == AkaState.HEADER) {
154                         if (line == "AKA TITLES LIST") title = line;
155                         if (line == "===============" && title != null)
156                                 state = AkaState.NONE;
157                         return;
158                 }
159
160                 if (state == AkaState.NONE) {
161                         // Skip empty lines
162                         if (line == "")
163                                 return;
164
165                         // Skip series episodes
166                         if (line[0] == '"')
167                                 return;
168
169                         // Parse error
170                         if (line[0] == ' ')
171                                 return;
172
173                         try {
174                                 title = convert (line, -1, "utf-8", "latin1");
175                         } catch (ConvertError e) {
176                                 title = null;
177                                 return;
178                         }
179
180                         if (skip_title (title))
181                                 return;
182
183                         state = AkaState.TITLE;
184                 }
185
186                 if (state == AkaState.TITLE) {
187                         // Empty lines mark end of title
188                         if (line == "") {
189                                 state = AkaState.NONE;
190                                 return;
191                         }
192
193                         if (line.has_prefix ("   (aka ")) {
194                                 if (skip_title (title))
195                                         return;
196
197                                 char* start = line.offset (8);
198                                 char* end = ((string) start).str ("))");
199                                 if (end != null)
200                                         end[1] = '\0';
201
202                                 string aka;
203                                 try {
204                                         aka = convert ((string) start, -1, "utf-8", "latin1");
205                                 } catch (ConvertError e) {
206                                         return;
207                                 }
208
209                                 sqlite.add_aka (title, aka);
210                         }
211                 }
212         }
213 }