7963fda139926b6e11dedfbe5694466a90ed2ff5
[cinaest] / src / imdb / imdb-line-parser.vala
1 abstract class LineParser {
2         internal unowned IMDbSqlite sqlite;
3
4         public LineParser (IMDbSqlite _sqlite) {
5                 sqlite = _sqlite;
6         }
7
8         public abstract void parse_line (string line);
9
10         internal bool skip_title (string title) {
11                 if (title.has_suffix ("(TV)")) {
12                         return true;
13                 }
14                 if (title.has_suffix ("(V)")) {
15                         return true;
16                 }
17                 if (title.has_suffix ("(VG)")) {
18                         return true;
19                 }
20                 return false;
21         }
22 }
23
24 class MovieLineParser : LineParser {
25         Regex re_movie;
26
27         public MovieLineParser (IMDbSqlite _sqlite) {
28                 base (_sqlite);
29                 try {
30                         re_movie = new Regex ("^([^\t]+)\t+([0-9]+)$");
31                 } catch (RegexError e) {
32                         critical ("Failed to initialize regex: %s\n", e.message);
33                 }
34         }
35
36         public override void parse_line (string line) {
37                 MatchInfo matchinfo;
38
39                 // Skip series episodes
40                 if (line[0] == '"')
41                         return;
42
43                 if (!re_movie.match(line, 0, out matchinfo))
44                         return;
45
46                 string title;
47                 string year = matchinfo.fetch (2);
48                 try {
49                         title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
50                 } catch (ConvertError e) {
51                         return;
52                 }
53
54                 if (skip_title (title))
55                         return;
56
57                 sqlite.add_movie (title, year.to_int ());
58         }
59 }
60
61 class GenreLineParser : LineParser {
62         Regex re_genre;
63
64         public GenreLineParser (IMDbSqlite _sqlite) {
65                 base (_sqlite);
66                 try {
67                         re_genre = new Regex ("^([^\t]+)\t+([A-Za-z-]+)$");
68                 } catch (RegexError e) {
69                         critical ("Failed to initialize regex: %s\n", e.message);
70                 }
71         }
72
73         public override void parse_line (string line) {
74                 MatchInfo matchinfo;
75
76                 // Skip series episodes
77                 if (line[0] == '"')
78                         return;
79
80                 if (!re_genre.match(line, 0, out matchinfo))
81                         return;
82
83                 string title;
84                 string genre = matchinfo.fetch (2);
85                 try {
86                         title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
87                 } catch (ConvertError e) {
88                         return;
89                 }
90
91                 sqlite.movie_add_genre (title, genre);
92         }
93 }
94
95 class RatingLineParser : LineParser {
96         enum RatingState {
97                 HEADER,
98                 NONE
99         }
100         RatingState state;
101         Regex re_rating;
102
103         public RatingLineParser (IMDbSqlite _sqlite) {
104                 base (_sqlite);
105                 state = RatingState.HEADER;
106                 try {
107                         re_rating = new Regex ("^      .+ +([0-9]+) +([0-9.]+) +(.+)$");
108                 } catch (RegexError e) {
109                         critical ("Failed to initialize regex: %s\n", e.message);
110                 }
111         }
112
113         public override void parse_line (string line) {
114                 if (state == RatingState.HEADER) {
115                         if (line == "MOVIE RATINGS REPORT")
116                                 state = RatingState.NONE;
117                         return;
118                 }
119
120                 if (state != RatingState.NONE)
121                         return;
122
123                 MatchInfo matchinfo;
124
125                 // Skip series episodes
126                 if (line[0] == '"')
127                         return;
128
129                 if (!re_rating.match(line, 0, out matchinfo))
130                         return;
131
132                 string title;
133                 string votes = matchinfo.fetch (1);
134                 string rating = matchinfo.fetch (2);
135                 try {
136                         title = convert(matchinfo.fetch (3), -1, "utf-8", "latin1");
137                 } catch (ConvertError e) {
138                         return;
139                 }
140
141                 // Skip series episodes
142                 if (title[0] == '"')
143                         return;
144
145                 if (skip_title (title))
146                         return;
147
148                 sqlite.movie_set_rating (title, (int) (rating.to_double () * 10), votes.to_int ());
149         }
150 }
151
152 class AkaLineParser : LineParser {
153         enum AkaState {
154                 HEADER,
155                 NONE,
156                 TITLE
157         }
158         AkaState state;
159         string title;
160
161         public AkaLineParser (IMDbSqlite _sqlite) {
162                 base (_sqlite);
163                 state = AkaState.HEADER;
164                 title = null;
165         }
166
167         public override void parse_line (string line) {
168                 if (state == AkaState.HEADER) {
169                         if (line == "AKA TITLES LIST") title = line;
170                         if (line == "===============" && title != null)
171                                 state = AkaState.NONE;
172                         return;
173                 }
174
175                 if (state == AkaState.NONE) {
176                         // Skip empty lines
177                         if (line == "")
178                                 return;
179
180                         // Skip series episodes
181                         if (line[0] == '"')
182                                 return;
183
184                         // Parse error
185                         if (line[0] == ' ')
186                                 return;
187
188                         try {
189                                 title = convert (line, -1, "utf-8", "latin1");
190                         } catch (ConvertError e) {
191                                 title = null;
192                                 return;
193                         }
194
195                         if (skip_title (title))
196                                 return;
197
198                         state = AkaState.TITLE;
199                 }
200
201                 if (state == AkaState.TITLE) {
202                         // Empty lines mark end of title
203                         if (line == "") {
204                                 state = AkaState.NONE;
205                                 return;
206                         }
207
208                         if (line.has_prefix ("   (aka ")) {
209                                 if (skip_title (title))
210                                         return;
211
212                                 char* start = line.offset (8);
213                                 char* end = ((string) start).str ("))");
214                                 if (end != null)
215                                         end[1] = '\0';
216
217                                 string aka;
218                                 try {
219                                         aka = convert ((string) start, -1, "utf-8", "latin1");
220                                 } catch (ConvertError e) {
221                                         return;
222                                 }
223
224                                 sqlite.add_aka (title, aka);
225                         }
226                 }
227         }
228 }
229
230 class PlotLineParser : LineParser {
231         enum PlotState {
232                 HEADER,
233                 NONE,
234                 TITLE
235         }
236         string title;
237         string plot;
238         PlotState state;
239
240         public PlotLineParser (IMDbSqlite _sqlite) {
241                 base (_sqlite);
242                 state = PlotState.HEADER;
243                 title = null;
244         }
245
246         public override void parse_line (string line) {
247                 if (state == PlotState.HEADER) {
248                         if (line == "PLOT SUMMARIES LIST") title = line;
249                         if (line == "===================" && title != null)
250                                 state = PlotState.NONE;
251                         return;
252                 }
253
254                 // Skip empty lines
255                 if (line == "")
256                         return;
257
258                 if (state == PlotState.NONE) {
259                         if (line.has_prefix ("MV: ")) {
260                                 // Skip series episodes
261                                 if (line[4] == '"')
262                                         return;
263
264                                 try {
265                                         title = convert (line.offset (4), -1, "utf-8", "latin1");
266                                 } catch (ConvertError e) {
267                                         stderr.printf ("Error converting title to UTF-8\n");
268                                         title = null;
269                                         return;
270                                 }
271
272                                 if (skip_title (title))
273                                         return;
274
275                                 state = PlotState.TITLE;
276                                 plot = "";
277                         }
278                         return;
279                 }
280
281                 if (state == PlotState.TITLE) {
282                         if (line.has_prefix ("PL: ")) {
283                                 if (skip_title (title))
284                                         return;
285
286                                 try {
287                                         if (plot != "")
288                                                 plot += " ";
289                                         plot += convert (line.offset (4), -1, "utf-8", "latin1");
290                                 } catch (ConvertError e) {
291                                         stderr.printf ("Error converting plot for \"%s\" to UTF-8\n", title);
292                                         plot = "";
293                                         return;
294                                 }
295                         }
296
297                         // BY: tag marks end of plot
298                         if (line.has_prefix ("BY: ")) {
299                                 string author;
300                                 try {
301                                         author = convert (line.offset (4), -1, "utf-8", "latin1");
302                                 } catch (ConvertError e) {
303                                         stderr.printf ("Error converting plot author for \"%s\" to UTF-8\n", title);
304                                         author = null;
305                                 }
306
307                                 sqlite.add_plot (title, plot, author);
308
309                                 state = PlotState.NONE;
310                                 return;
311                         }
312                 }
313         }
314 }