4 TTableInfo::TTableInfo ()
10 string TTableInfo::close ()
13 if ( td_open ) ret += "</wikitablecell>" ;
14 if ( tr_open ) ret += "</wikitablerow>" ;
15 ret += "</wikitable>" ;
19 string TTableInfo::new_row ()
22 if ( td_open ) ret += "</wikitablecell>" ;
23 if ( tr_open ) ret += "</wikitablerow>" ;
24 ret += "<wikitablerow>" ;
30 string TTableInfo::new_cell ( string type )
33 if ( !tr_open ) ret += new_row () ;
34 if ( td_open ) ret += "</wikitablecell>" ;
35 ret += "<wikitablecell type=\"" + upper ( type ) + "\">" ;
41 // *****************************************************************************
42 // *****************************************************************************
46 // *****************************************************************************
47 // *****************************************************************************
49 void WIKI2XML::parse_symmetric ( string &l , size_t &from ,
50 string s1 , string s2 ,
51 string r1 , string r2 ,
55 if ( !submatch ( l , s1 , from ) ) return ; // Left does not match
56 for ( a = from + s1.length() ; a + s2.length() <= l.length() ; a++ )
58 if ( !submatch ( l , s2 , a ) ) continue ;
59 for ( b = a+1 ; extend && submatch ( l , s2 , b ) ; b++ ) ;
61 l = l.substr ( 0 , from ) +
63 l.substr ( from + s1.length() , b - from - s1.length() ) +
65 l.substr ( b + s2.length() , l.length() ) ;
66 if ( debug ) cout << "newl : " << l << endl ;
71 void WIKI2XML::parse_link ( string &l , size_t &from , char mode )
75 chart par_open = '[' ; // mode 'L'
76 chart par_close = ']' ; // mode 'L'
77 if ( mode == 'T' ) { par_open = '{' ; par_close = '}' ; }
78 for ( a = from ; cnt > 0 && a+1 < l.length() ; a++ )
80 if ( l[a] == par_open && l[a+1] == par_open )
81 parse_link ( l , a ) ;
82 else if ( l[a] == par_close && l[a+1] == par_close )
85 if ( cnt > 0 ) return ; // Not a valid link
87 int to = a-1 ; // Without "]]"
88 string link = l.substr ( from+1 , to-from-1 ) ;
91 vector <string> parts ;
92 explode ( '|' , link , parts ) ;
96 x.add_key_value ( "type" , "internal" ) ;
98 else if ( mode == 'T' ) x.name = "wikitemplate" ;
100 for ( a = 0 ; a < parts.size() ; a++ )
102 bool last = ( a + 1 == parts.size() ) ;
103 string p = parts[a] ;
104 parse_line_sub ( p ) ;
106 if ( a > 0 && ( mode != 'L' || !last ) )
109 vector <string> subparts ;
110 explode ( '=' , p , subparts ) ;
111 if ( subparts.size() == 1 )
113 char *str = g_markup_escape_text(p.c_str(), p.length());
114 value = xml_embed ( str , "value" ) ;
119 key = xml_embed ( subparts[0] , "key" ) ;
120 subparts.erase ( subparts.begin() ) ;
121 string itmp = implode ( "=" , subparts );
122 char *str = g_markup_escape_text(itmp.c_str(), itmp.length());
123 value = xml_embed ( str , "value" ) ;
129 char *str = g_markup_escape_text(p.c_str(), p.length());
130 p = xml_embed ( str , "value" ) ;
134 string param = "number=\"" + val ( a ) + "\"" ;
135 if ( last ) param += " last=\"1\"" ;
136 x.text += xml_embed ( p , "wikiparameter" , param ) ;
139 if ( mode == 'L' ) // Try link trail
142 for ( a = to+2 ; a < l.length() && is_text_char ( l[a] ) ; a++ )
145 if ( trail != "" ) x.text += xml_embed ( trail , "trail" ) ;
148 x.add_key_value ( "parameters" , val ( parts.size() ) ) ;
149 string replacement = x.get_string () ;
150 parse_line_sub ( replacement ) ;
152 l.erase ( from-1 , to-from+3 ) ;
153 l.insert ( from-1 , replacement ) ;
154 if ( debug ) cout << "Link : " << link << endl << "Replacement : " << replacement << endl ;
155 if ( debug ) cout << "Result : " << l << endl << endl ;
156 from = from + replacement.length() - 2 ;
159 bool WIKI2XML::is_list_char ( chart c ) // For now...
161 if ( c == '*' ) return true ;
162 if ( c == '#' ) return true ;
163 if ( c == ':' ) return true ;
167 string WIKI2XML::get_list_tag ( chart c , bool open )
170 if ( debug ) cout << "get_list_tag : " << c << endl ;
171 if ( c == '*' ) ret = "ul" ;
172 if ( c == '#' ) ret = "ol" ;
173 if ( c == ':' ) ret = "dl" ;
176 string itemname = "li" ;
177 if ( c == ':' ) itemname = "dd" ;
178 if ( open ) ret = "<" + ret + "><" + itemname + ">" ;
179 else ret = "</" + itemname + "></" + ret + ">" ;
184 string WIKI2XML::fix_list ( string &l )
187 for ( a = 0 ; a < l.length() && is_list_char ( l[a] ) ; a++ ) ;
188 string newlist , pre ;
191 newlist = left ( l , a ) ;
192 while ( a < l.length() && l[a] == ' ' ) a++ ; // Removing leading blanks
193 l = l.substr ( a , l.length() ) ;
195 if ( debug ) cout << "fix_list : " << l << endl ;
196 if ( list == "" && newlist == "" ) return "" ;
197 for ( a = 0 ; a < list.length() &&
198 a < newlist.length() &&
199 list[a] == newlist[a] ; a++ ) ; // The common part, if any
201 for ( b = a ; b < list.length() ; b++ )
202 pre = get_list_tag ( list[b] , false ) + pre ; // Close old list tags
203 for ( b = a ; b < newlist.length() ; b++ )
204 pre += get_list_tag ( newlist[b] , true ) ; // Open new ones
206 if ( debug ) cout << "pre : " << pre << endl ;
207 if ( debug ) cout << "newlist : " << newlist << endl ;
212 void WIKI2XML::parse_line ( string &l )
215 if ( debug ) cout << l << endl ;
217 string oldlist = list ;
218 pre += fix_list ( l ) ;
219 if ( list != "" && list == oldlist )
221 string itemname = "li" ;
222 if ( right ( list , 1 ) == ":" ) itemname = "dd" ;
223 pre = "</" + itemname + "><" + itemname + ">" + pre ;
226 if ( l == "" ) // Paragraph
230 else if ( left ( l , 4 ) == "----" ) // <hr>
232 for ( a = 0 ; a < l.length() && l[a] == l[0] ; a++ ) ;
233 pre += "<wikiurlcounter action=\"reset\"/><hr/>" ;
234 l = l.substr ( a , l.length() - a ) ;
236 else if ( l != "" && l[0] == '=' ) // Heading
238 for ( a = 0 ; a < l.length() && l[a] == '=' && l[l.length()-a-1] == '=' ; a++ ) ;
240 if ( a >= l.length() ) h = "" ; // No heading
241 // else if ( l[a] != ' ' ) h = "" ;
242 // else if ( l[l.length()-a-1] != ' ' ) h = "" ;
243 else if ( a < 1 || a > 9 ) h = "" ;
246 l = l.substr ( a , l.length() - a*2 ) ;
248 l = xml_embed ( l , h ) ;
251 else if ( l != "" && l[0] == ' ' ) // Pre-formatted text
253 for ( a = 0 ; a < l.length() && l[a] == ' ' ; a++ ) ;
254 l = l.substr ( a , l.length() ) ;
257 pre += "<pre>" + l + "</pre>" ;
261 else if ( left ( l , 2 ) == "{|" || (left ( l , 2 ) == "|}" && l[2] != '}' ) ||
262 ( tables.size() > 0 && l != "" && ( l[0] == '|' || l[0] == '!' ) ) )
264 pre += table_markup ( l ) ;
269 if ( l != "" ) parse_line_sub ( l ) ;
271 if ( pre != "" ) l = pre + l ;
274 bool WIKI2XML::is_external_link_protocol ( string protocol )
276 if ( protocol == "HTTP" ) return true ;
277 if ( protocol == "FTP" ) return true ;
278 if ( protocol == "MAILTO" ) return true ;
282 int WIKI2XML::scan_url ( string &l , size_t from )
285 for ( a = from ; a < l.length() ; a++ )
287 if ( l[a] == ':' || l[a] == '/' || l[a] == '.' ) continue ;
288 if ( l[a] >= '0' && l[a] <= '9' ) continue ;
289 if ( is_text_char ( l[a] ) ) continue ;
290 break ; // End of URL
295 void WIKI2XML::parse_external_freelink ( string &l , size_t &from )
298 for ( a = from - 1 ; a >= 0 && is_text_char ( l[a] ) ; a-- ) ;
299 if ( a == -1 ) return ;
301 string protocol = upper ( l.substr ( a , from - a ) ) ;
302 if ( debug ) cout << "protocol : " << protocol << endl ;
303 if ( !is_external_link_protocol ( protocol ) ) return ;
304 int to = scan_url ( l , a ) ;
305 string url = l.substr ( a , to - a ) ;
307 replacement += xml_embed ( url , "url" ) ;
308 replacement += xml_embed ( url , "title" ) ;
309 l = left ( l , a ) + replacement + l.substr ( to , l.length() - to ) ;
310 from = a + replacement.length() - 1 ;
313 void WIKI2XML::parse_external_link ( string &l , size_t &from )
315 string protocol = upper ( before_first ( ':' , l.substr ( from + 1 , l.length() - from ) ) ) ;
316 if ( !is_external_link_protocol ( protocol ) ) return ;
318 for ( to = from + 1 ; to < l.length() && l[to] != ']' ; to++ ) ;
319 if ( to == l.length() ) return ;
320 string url = l.substr ( from + 1 , to - from - 1 ) ;
321 string title = after_first ( ' ' , url ) ;
322 url = before_first ( ' ' , url ) ;
324 replacement += xml_embed ( url , "url" ) ;
326 replacement += xml_embed ( "<wikiurlcounter action=\"add\"/>" , "title" ) ;
327 else replacement += xml_embed ( title , "title" ) ;
328 replacement = xml_embed ( replacement , "wikilink" , "type='external' protocol='" + protocol + "'" ) ;
329 l = left ( l , from ) + replacement + l.substr ( to + 1 , l.length() - to ) ;
330 from = from + replacement.length() - 1 ;
333 void WIKI2XML::parse_line_sub ( string &l )
336 for ( a = 0 ; a < l.length() ; a++ )
338 if ( l[a] == '[' && a+1 < l.length() && l[a+1] == '[' ) // [[Link]]
339 parse_link ( l , a , 'L' ) ;
340 else if ( l[a] == '{' && a+1 < l.length() && l[a+1] == '{' ) // {{Template}}
341 parse_link ( l , a , 'T' ) ;
342 else if ( l[a] == '[' ) // External link
343 parse_external_link ( l , a ) ;
344 else if ( a+2 < l.length() && l[a] == ':' && l[a+1] == '/' && l[a+2] == '/' ) // External freelink
345 parse_external_freelink ( l , a ) ;
346 else if ( l[a] == SINGLE_QUOTE ) // Bold and italics
348 parse_symmetric ( l , a , "'''" , "'''" , "<b>" , "</b>" , true ) ;
349 parse_symmetric ( l , a , "''" , "''" , "<i>" , "</i>" ) ;
354 void WIKI2XML::parse_lines ( vector <string> &lines )
357 for ( a = 0 ; a < lines.size() ; a++ )
359 parse_line ( lines[a] ) ;
365 end = fix_list ( end ) ;
366 if ( end != "" ) lines.push_back ( end ) ;
370 while ( tables.size() )
372 end += tables[tables.size()-1].close () ;
375 if ( end != "" ) lines.push_back ( end ) ;
378 void WIKI2XML::init ( string s )
383 // Now we remove evil HTML
384 allowed_html.clear () ;
385 allowed_html.push_back ( "b" ) ;
386 allowed_html.push_back ( "i" ) ;
387 allowed_html.push_back ( "p" ) ;
388 allowed_html.push_back ( "b" ) ;
389 allowed_html.push_back ( "br" ) ;
390 allowed_html.push_back ( "hr" ) ;
391 allowed_html.push_back ( "tt" ) ;
392 allowed_html.push_back ( "pre" ) ;
393 allowed_html.push_back ( "nowiki" ) ;
394 allowed_html.push_back ( "math" ) ;
395 allowed_html.push_back ( "strike" ) ;
396 allowed_html.push_back ( "u" ) ;
397 allowed_html.push_back ( "table" ) ;
398 allowed_html.push_back ( "caption" ) ;
399 allowed_html.push_back ( "tr" ) ;
400 allowed_html.push_back ( "td" ) ;
401 allowed_html.push_back ( "th" ) ;
402 allowed_html.push_back ( "li" ) ;
403 allowed_html.push_back ( "ul" ) ;
404 allowed_html.push_back ( "ol" ) ;
405 allowed_html.push_back ( "dl" ) ;
406 allowed_html.push_back ( "dd" ) ;
407 allowed_html.push_back ( "dt" ) ;
408 allowed_html.push_back ( "div" ) ;
409 allowed_html.push_back ( "h1" ) ;
410 allowed_html.push_back ( "h2" ) ;
411 allowed_html.push_back ( "h3" ) ;
412 allowed_html.push_back ( "h4" ) ;
413 allowed_html.push_back ( "h5" ) ;
414 allowed_html.push_back ( "h6" ) ;
415 allowed_html.push_back ( "h7" ) ;
416 allowed_html.push_back ( "h8" ) ;
417 allowed_html.push_back ( "h9" ) ;
418 allowed_html.push_back ( "small" ) ;
419 allowed_html.push_back ( "center" ) ;
420 // allowed_html.push_back ( "" ) ;
422 for ( a = 0 ; a < allowed_html.size() ; a++ )
423 allowed_html[a] = upper ( allowed_html[a] ) ;
425 vector <TXML> taglist ;
426 make_tag_list ( s , taglist ) ;
427 remove_evil_html ( s , taglist ) ;
429 // Now evaluate each line
430 explode ( '\n' , s , lines ) ;
433 string WIKI2XML::get_xml ()
435 string ret = "<text>";
436 ret += implode ( "\n" , lines );
439 // Invalidating mdash
440 /*size_t a = ret.find ( "—" ) ;
441 while ( a >= 0 && a < ret.length() )
444 a = ret.find ( "—" , a ) ;
450 void WIKI2XML::replace_part ( string &s , size_t from , size_t to , string with )
452 s = s.substr ( 0 , from ) + with + s.substr ( to + 1 , s.length() - to - 1 ) ;
455 void WIKI2XML::replace_part_sync ( string &s , size_t from , size_t to , string with , vector <TXML> &list )
458 replace_part ( s , from , to , with ) ;
459 for ( a = 0 ; a < list.size() ; a++ )
461 for ( b = 0 ; b < with.length() ; b++ ) list[a].insert_at ( from ) ;
462 for ( b = from ; b <= to ; b++ ) list[a].remove_at ( from ) ;
466 // ATTENTION : this doesn't handle all HTML comments correctly!
467 void WIKI2XML::make_tag_list ( string &s , vector <TXML> &list )
472 for ( a = 0 ; a < s.length() ; a++ )
474 if ( s[a] == '>' ) // Rouge >
477 s.insert ( a , ">" ) ;
480 else if ( s[a] != '<' ) continue ;
481 b = find_next_unquoted ( '>' , s , a ) ;
482 if ( b == -1 ) // Rouge <
485 s.insert ( a , "<" ) ;
488 list.push_back ( TXML ( a , b , s ) ) ;
489 a = list[list.size()-1].to ;
493 void WIKI2XML::remove_evil_html ( string &s , vector <TXML> &taglist )
496 for ( a = 0 ; a < taglist.size() ; a++ )
498 string tag = upper ( taglist[a].name ) ;
499 for ( b = 0 ; b < allowed_html.size() && tag != allowed_html[b] ; b++ ) ;
500 if ( b < allowed_html.size() ) continue ;
501 replace_part_sync ( s , taglist[a].from , taglist[a].from , "<" , taglist ) ;
502 replace_part_sync ( s , taglist[a].to , taglist[a].to , ">" , taglist ) ;
506 string WIKI2XML::table_markup ( string &l )
510 if ( left ( l , 2 ) == "{|" ) // Open table
512 ret = "<wikitable>" ;
513 ret += xml_embed ( l.substr ( 2 , l.length() - 2 ) , "wikiparameter" ) ;
514 tables.push_back ( TTableInfo () ) ;
516 else if ( left ( l , 2 ) == "|}" )
518 ret = tables[tables.size()-1].close () ;
521 else if ( left ( l , 2 ) == "|-" )
523 ret = tables[tables.size()-1].new_row () ;
524 for ( a = 1 ; a < l.length() && l[a] == '-' ; a++ ) ;
525 ret += xml_params ( l.substr ( a , l.length() - a ) ) ;
530 if ( left ( l , 2 ) == "|+" )
533 l = l.substr ( 2 , l.length() - 2 ) ;
535 else if ( l[0] == '!' )
538 l = l.substr ( 1 , l.length() - 1 ) ;
540 else if ( l[0] == '|' )
543 l = l.substr ( 1 , l.length() - 1 ) ;
545 vector <string> sublines ;
546 for ( a = 0 ; a + 1 < l.length() ; a++ )
548 if ( l[a] == '|' && l[a+1] == '|' )
550 sublines.push_back ( left ( l , a ) ) ;
551 l = l.substr ( a + 2 , l.length() - a ) ;
555 if ( l != "" ) sublines.push_back ( l ) ;
556 for ( a = 0 ; a < sublines.size() ; a++ )
559 parse_line_sub ( l ) ;
561 int b = find_next_unquoted ( '|' , l ) ;
564 params = left ( l , b ) ;
565 l = l.substr ( b + 1 , l.length() - b ) ;
567 if ( params != "" ) l = xml_params ( params ) + l ;
568 ret += tables[tables.size()-1].new_cell ( init ) ;