1 #include "stardict_powerword_parsedata.h"
2 #include <glib/gi18n.h>
8 static size_t xml_strlen(const char *xmlstr)
11 static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
12 static const int xml_ent_len[] = { 3, 3, 4, 5, 5 };
16 for (cur_pos = 0, q = xmlstr; *q; ++cur_pos) {
18 for (i = 0; xml_entrs[i]; ++i)
19 if (strncmp(xml_entrs[i], q + 1,
20 xml_ent_len[i]) == 0) {
21 q += xml_ent_len[i] + 1;
24 if (xml_entrs[i] == NULL)
26 } else if (*q == '<') {
27 const char *p = strchr(q+1, '>');
34 q = g_utf8_next_char(q);
40 static gchar* toUtfPhonetic(const gchar *text, gsize len)
83 return g_markup_escape_text(p.c_str(), -1);
86 static gchar* toUtfPhonetic2(const gchar *text, glong len)
92 while (s-text < len) {
93 n = g_utf8_next_char(s);
179 return g_markup_escape_text(p.c_str(), -1);
182 static void powerword_markup_add_text(const gchar *text, gssize length, std::string *pango, std::string::size_type &cur_pos, LinksPosList *links_list)
190 str = g_string_sized_new (length);
194 bool previous_islink = false;
195 std::string marktags;
196 guint currentmarktag = 0;
199 next = g_utf8_next_char (p);
202 if (currentmarktag==0) {
203 g_string_append (str, "}");
204 previous_islink = false;
208 switch (marktags[currentmarktag]) {
211 g_string_append (str, "</b>");
212 previous_islink = false;
215 g_string_append (str, "</i>");
216 previous_islink = false;
219 g_string_append (str, "</sup>");
220 previous_islink = false;
223 g_string_append (str, "</sub>");
224 previous_islink = false;
227 g_string_append (str, "</span>");
228 previous_islink = false;
234 g_string_append (str, "</span>");
235 previous_islink = true;
238 previous_islink = false;
246 n = g_utf8_next_char(next);
247 if (n!=end && *n == '{') {
250 if (marktags.length()<currentmarktag)
253 marktags[currentmarktag-1]=*next;
257 g_string_append (str, "<b>");
261 g_string_append (str, "<i>");
265 g_string_append (str, "<sup>");
269 g_string_append (str, "<sub>");
273 g_string_append (str, "<span foreground=\"blue\" underline=\"single\">");
279 const gchar *tag_end = n+1;
280 while (tag_end!=end) {
286 g_string_append (str, "<span foreground=\"blue\">");
289 tag_str = toUtfPhonetic(n+1, tag_end - (n+1));
291 tag_str = toUtfPhonetic2(n+1, tag_end - (n+1));
293 g_string_append (str, tag_str);
295 g_string_append (str, "</span>");
301 previous_islink = false;
309 g_string_append (str, "\t");
310 if (*next == 'l' || *next == 'D')
311 g_string_append (str, "<span foreground=\"blue\" underline=\"single\">");
313 g_string_append (str, "<span foreground=\"#008080\" underline=\"single\">");
315 cur_pos += xml_strlen(str->str);
316 g_string_erase(str, 0, -1);
318 const gchar *tag_end = n+1;
319 while (tag_end!=end) {
325 char *tmpstr = g_markup_escape_text(n+1, tag_end - (n+1));
326 size_t xml_len = xml_strlen(tmpstr);
327 std::string link("query://");
328 link.append(n+1, tag_end - (n+1));
329 links_list->push_back(LinkDesc(cur_pos, xml_len, link));
333 g_string_append (str, "</span>");
339 previous_islink = true;
352 previous_islink = false;
353 g_string_append (str, "&");
357 previous_islink = false;
358 g_string_append (str, "<");
361 previous_islink = false;
362 g_string_append (str, ">");
365 previous_islink = false;
366 g_string_append (str, "'");
369 previous_islink = false;
370 g_string_append (str, """);
373 previous_islink = false;
374 g_string_append_len (str, p, next - p);
379 if (currentmarktag>0) {
382 switch (marktags[currentmarktag]) {
385 g_string_append (str, "</b>");
388 g_string_append (str, "</i>");
391 g_string_append (str, "</sup>");
394 g_string_append (str, "</sub>");
401 g_string_append (str, "</span>");
406 } while (currentmarktag>0);
409 cur_pos += xml_strlen(str->str);
410 g_string_free (str, TRUE);
413 typedef struct _PwUserData {
415 LinksPosList *links_list;
416 std::string::size_type cur_pos;
421 static void func_parse_passthrough(GMarkupParseContext *context, const gchar *passthrough_text, gsize text_len, gpointer user_data, GError **error)
423 if (!g_str_has_prefix(passthrough_text, "<![CDATA["))
425 const gchar *element = g_markup_parse_context_get_element(context);
428 const gchar *text = passthrough_text+9;
429 gsize len = text_len-9-3;
430 while (g_ascii_isspace(*text)) {
434 while (len>0 && g_ascii_isspace(*(text+len-1))) {
439 std::string *pango = ((PwUserData*)user_data)->pango;
440 std::string::size_type &cur_pos = ((PwUserData*)user_data)->cur_pos;
441 if (strcmp(element, "词典音标")==0) {
442 if (!pango->empty()) {
446 *pango+="[<span foreground=\"blue\">";
448 gchar *str = toUtfPhonetic(text, len);
450 cur_pos+=xml_strlen(str);
454 } else if (strcmp(element, "单词原型")==0) {
455 const gchar *oword = ((PwUserData*)user_data)->oword;
456 if (strncmp(oword, text, len)) {
457 if (!pango->empty()) {
462 gchar *str = g_markup_escape_text(text, len);
464 cur_pos+=xml_strlen(str);
468 } else if (strcmp(element, "单词词性")==0) {
469 if (!pango->empty()) {
474 powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
476 } else if (strcmp(element, "汉语拼音")==0) {
477 if (!pango->empty()) {
481 *pango+="<span foreground=\"blue\" underline=\"single\">";
482 powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
484 } else if (strcmp(element, "例句原型")==0) {
485 if (!pango->empty()) {
489 *pango+="<span foreground=\"#008080\">";
490 powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
492 } else if (strcmp(element, "例句解释")==0) {
493 if (!pango->empty()) {
497 *pango+="<span foreground=\"#01259A\">";
498 powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
500 /*} else if (strcmp(element, "相关词")==0) {
505 for (gsize i=1;i<len;i++) {
511 gchar *str = powerword_markup_escape_text(tabstr.c_str(), tabstr.length());
516 strcmp(element, "解释项")==0 ||
517 strcmp(element, "跟随解释")==0 ||
518 strcmp(element, "相关词")==0 ||
519 strcmp(element, "预解释")==0 ||
520 strcmp(element, "繁体写法")==0 ||
521 strcmp(element, "台湾音标")==0 ||
522 strcmp(element, "图片名称")==0 ||
523 strcmp(element, "跟随注释")==0 ||
524 strcmp(element, "音节分段")==0 ||
525 strcmp(element, "AHD音标")==0 ||
526 strcmp(element, "国际音标")==0 ||
527 strcmp(element, "美国音标")==0 ||
528 strcmp(element, "子解释项")==0 ||
529 strcmp(element, "同义词")==0 ||
530 strcmp(element, "日文发音")==0 ||
531 strcmp(element, "惯用型原型")==0 ||
532 strcmp(element, "惯用型解释")==0 ||
533 strcmp(element, "另见")==0
536 if (!pango->empty()) {
540 powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
544 static void func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
547 if (strcmp(element_name, "基本词义")==0) {
548 if (((PwUserData*)user_data)->first_jbcy) {
549 ((PwUserData*)user_data)->first_jbcy = false;
551 res="\n<span foreground=\"blue\"><基本词义></span>";
553 } else if (strcmp(element_name, "继承用法")==0) {
554 res="\n<span foreground=\"blue\"><继承用法></span>";
555 } else if (strcmp(element_name, "习惯用语")==0) {
556 res="\n<span foreground=\"blue\"><习惯用语></span>";
557 } else if (strcmp(element_name, "词性变化")==0) {
558 res="\n<span foreground=\"blue\"><词性变化></span>";
559 } else if (strcmp(element_name, "特殊用法")==0) {
560 res="\n<span foreground=\"blue\"><特殊用法></span>";
561 } else if (strcmp(element_name, "参考词汇")==0) {
562 res="\n<span foreground=\"blue\"><参考词汇></span>";
563 } else if (strcmp(element_name, "常用词组")==0) {
564 res="\n<span foreground=\"blue\"><常用词组></span>";
565 } else if (strcmp(element_name, "语源")==0) {
566 res="\n<span foreground=\"blue\"><语源></span>";
567 } else if (strcmp(element_name, "派生")==0) {
568 res="\n<span foreground=\"blue\"><派生></span>";
569 } else if (strcmp(element_name, "用法")==0) {
570 res="\n<span foreground=\"blue\"><用法></span>";
571 } else if (strcmp(element_name, "注释")==0) {
572 res="\n<span foreground=\"blue\"><注释></span>";
575 *(((PwUserData*)user_data)->pango) += res;
576 ((PwUserData*)user_data)->cur_pos += xml_strlen(res.c_str());
580 static void powerword2link(const char *p, guint32 sec_size, const gchar *oword, std::string *pango, LinksPosList *links_list)
584 Data.links_list = links_list;
587 Data.first_jbcy = true;
589 GMarkupParser parser;
590 parser.start_element = func_parse_start_element;
591 parser.end_element = NULL;
593 parser.passthrough = func_parse_passthrough;
595 GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
596 g_markup_parse_context_parse(context, p, sec_size, NULL);
597 g_markup_parse_context_end_parse(context, NULL);
598 g_markup_parse_context_free(context);
601 static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword)
606 size_t len = strlen(p);
609 LinksPosList links_list;
610 powerword2link(p, len, oword, &pango, &links_list);
611 ParseResultItem item;
612 item.type = ParseResultItemType_link;
613 item.link = new ParseResultLinkItem;
614 item.link->pango = pango;
615 item.link->links_list = links_list;
616 result.item_list.push_back(item);
618 *parsed_size = 1 + len + 1;
622 static void configure()
626 DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj)
628 if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) {
629 g_print("Error: PowerWord data parsing plugin version doesn't match!\n");
632 obj->type = StarDictPlugInType_PARSEDATA;
633 obj->info_xml = g_strdup_printf("<plugin_info><name>%s</name><version>1.0</version><short_desc>%s</short_desc><long_desc>%s</long_desc><author>Hu Zheng <huzheng_001@163.com></author><website>http://stardict.sourceforge.net</website></plugin_info>", _("PowerWord data parsing"), _("PowerWord data parsing engine."), _("Parse the PowerWord data."));
634 obj->configure_func = configure;
638 DLLIMPORT void stardict_plugin_exit(void)
642 DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj)
644 obj->parse_func = parse;
645 g_print(_("PowerWord data parsing plug-in loaded.\n"));
650 BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ ,
651 DWORD reason /* Reason this function is being called. */ ,
652 LPVOID reserved /* Not used. */ )
656 case DLL_PROCESS_ATTACH:
659 case DLL_PROCESS_DETACH:
662 case DLL_THREAD_ATTACH:
665 case DLL_THREAD_DETACH:
669 /* Returns TRUE on success, FALSE on failure */