From: Roman Moravcik Date: Thu, 18 Feb 2010 16:40:38 +0000 (+0100) Subject: Imported version 0.4-1 X-Git-Tag: v0.4-1 X-Git-Url: https://vcs.maemo.org/git/?p=mstardict;a=commitdiff_plain;h=918852f5133cec813a5b7ed602c6c76bdcb0bc3f Imported version 0.4-1 --- diff --git a/Makefile.am b/Makefile.am index d582cb5..136767b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = data m4 src po +SUBDIRS = data m4 po src stardict-plugins EXTRA_DIST = config.rpath mkinstalldirs BUGS config.rpath diff --git a/configure.ac b/configure.ac index 4a5f8e4..9a6910c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.52) -AC_INIT(mstardict, 0.3, roman.moravcik@gmail.com) +AC_INIT(mstardict, 0.4, roman.moravcik@gmail.com) AC_CONFIG_SRCDIR(src/mstardict.cpp) dnl Don't include maintainer make-rules by default @@ -17,6 +17,7 @@ AC_LANG([C++]) AC_PROG_CXXCPP AC_PROG_MAKE_SET AC_PROG_RANLIB +AM_PROG_LIBTOOL dnl ================================================================ dnl Gettext stuff. @@ -49,9 +50,14 @@ AC_OUTPUT([ data/Makefile data/mstardict.desktop.in m4/Makefile - src/lib/Makefile - src/Makefile po/Makefile.in + src/Makefile + src/lib/Makefile + stardict-plugins/Makefile + stardict-plugins/stardict-html-parsedata-plugin/Makefile + stardict-plugins/stardict-powerword-parsedata-plugin/Makefile + stardict-plugins/stardict-wiki-parsedata-plugin/Makefile + stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile ]) echo " diff --git a/debian/changelog b/debian/changelog index 00620c4..4df50fd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +mstardict (0.4-1) unstable; urgency=low + + * Fixed searching of synonyms. + * Added preliminary support of Stardict plugins. + * Added HTML, PowerWord, Wiki and XDXF parse plugins. + + -- Roman Moravcik Thu, 4 Feb 2010 18:01:14 +0100 + mstardict (0.3-2) unstable; urgency=low * Splited code of dictionary management. diff --git a/debian/rules b/debian/rules index 2deb34f..e530b11 100755 --- a/debian/rules +++ b/debian/rules @@ -24,7 +24,7 @@ configure-stamp: dh_testdir ./autogen.sh - ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --disable-static CFLAGS="$(CFLAGS)" LDFLAGS="-Wl,-z,defs" + ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --disable-static touch $@ @@ -44,7 +44,7 @@ clean: rm -f build-stamp configure-stamp [ ! -f Makefile ] || $(MAKE) distclean - rm -f Makefile.in aclocal.m4 compile config.guess config.h.in config.rpath config.sub configure data/Makefile.in depcomp install-sh intltool-extract.in intltool-merge.in intltool-update.in ltmain.sh m4/Makefile.in missing mkinstalldirs po/Makefile.in.in src/Makefile.in src/lib/Makefile.in + rm -f Makefile.in aclocal.m4 compile config.guess config.h.in config.rpath config.sub configure data/Makefile.in depcomp install-sh intltool-extract.in intltool-merge.in intltool-update.in ltmain.sh m4/Makefile.in missing mkinstalldirs po/Makefile.in.in src/Makefile.in src/lib/Makefile.in stardict-plugins/Makefile.in stardict-plugins/stardict-html-parsedata-plugin/Makefile.in stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.in stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.in stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.in dh_clean diff --git a/src/dictmngr.cpp b/src/dictmngr.cpp index 6c346d5..2dd7350 100644 --- a/src/dictmngr.cpp +++ b/src/dictmngr.cpp @@ -105,7 +105,12 @@ DictMngr::CreateDictMngrDialog() gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), selector); renderer = gtk_cell_renderer_text_new(); - g_object_set(G_OBJECT(renderer), "xpad", 10, NULL); + g_object_set(G_OBJECT(renderer), + "xpad", 10, + "ellipsize", PANGO_ELLIPSIZE_END, + "ellipsize-set", TRUE, + NULL); + column = hildon_touch_selector_append_column(HILDON_TOUCH_SELECTOR (selector), diff --git a/src/libwrapper.cpp b/src/libwrapper.cpp index a10c287..0cc7001 100644 --- a/src/libwrapper.cpp +++ b/src/libwrapper.cpp @@ -107,27 +107,82 @@ std::string xdxf2text(const char *p) } static -string parse_data(const gchar *data) +string parse_data(const gchar *data, + const gchar *oword) { if (!data) return ""; - string res; + string mark; guint32 data_size, sec_size = 0; gchar *m_str; const gchar *p = data; data_size = *((guint32 *) p); p += sizeof(guint32); + size_t iPlugin; + size_t nPlugins = pMStarDict->oStarDictPlugins->ParseDataPlugins.nplugins(); + unsigned int parsed_size; + ParseResult parse_result; + while (guint32(p - data) < data_size) { + for (iPlugin = 0; iPlugin < nPlugins; iPlugin++) { + parse_result.clear(); + if (pMStarDict->oStarDictPlugins->ParseDataPlugins.parse(iPlugin, p, &parsed_size, parse_result, oword)) { + p += parsed_size; + break; + } + } + if (iPlugin != nPlugins) { + for (std::list::iterator it = parse_result.item_list.begin(); it != parse_result.item_list.end(); ++it) { + switch (it->type) { + case ParseResultItemType_mark: + g_debug("ParseResultItemType_mark"); + mark += it->mark->pango; + break; + case ParseResultItemType_link: +// g_debug("ParseResultItemType_link: %s", it->mark->pango.c_str()); + mark += it->mark->pango; + break; + case ParseResultItemType_res: + { + g_debug("ParseResultItemType_res"); + bool loaded = false; + if (it->res->type == "image") { + } else if (it->res->type == "sound") { + } else if (it->res->type == "video") { + } else { + } + if (!loaded) { + mark += ""; + gchar *m_str = g_markup_escape_text(it->res->key.c_str(), -1); + mark += m_str; + g_free(m_str); + mark += ""; + } + break; + } + case ParseResultItemType_widget: + g_debug("ParseResultItemType_widget"); + break; + default: + g_debug("ParseResultItemType_default"); + break; + } + } + parse_result.clear(); + continue; + } + switch (*p++) { case 'g': + case 'h': case 'm': case 'l': //need more work... sec_size = strlen(p); if (sec_size) { - res += "\n"; + mark += "\n"; m_str = g_strndup(p, sec_size); - res += m_str; + mark += m_str; g_free(m_str); } sec_size++; @@ -135,9 +190,9 @@ string parse_data(const gchar *data) case 'x': sec_size = strlen(p); if (sec_size) { - res += "\n"; + mark += "\n"; m_str = g_strndup(p, sec_size); - res += xdxf2text(m_str); + mark += xdxf2text(m_str); g_free(m_str); } sec_size++; @@ -145,9 +200,9 @@ string parse_data(const gchar *data) case 't': sec_size = strlen(p); if (sec_size) { - res += "\n"; + mark += "\n"; m_str = g_strndup(p, sec_size); - res += "[" + string(m_str) + "]"; + mark += "[" + string(m_str) + "]"; g_free(m_str); } sec_size++; @@ -165,7 +220,7 @@ string parse_data(const gchar *data) p += sec_size; } - return res; + return mark; } void @@ -204,6 +259,7 @@ Library::BuildResultData(std::vector < InstantDictIndex > &dictmask, int iRealLib; bool bFound = false, bLookupWord = false, bLookupSynonymWord = false; gint nWord = 0, count = 0, i = 0, j = 0; + glong iWordIdx; iRealLib = dictmask[iLib].index; @@ -237,22 +293,20 @@ Library::BuildResultData(std::vector < InstantDictIndex > &dictmask, count = GetOrigWordCount(iIndex[iLib].idx, iRealLib, true); for (i = 0; i < count; i++) { res_list.push_back(TSearchResult(dict_name(iLib), - poGetWord(iIndex[iLib].idx, iRealLib, - 0), - parse_data - (poGetOrigWordData - (iIndex[iLib].idx + i, iRealLib)))); + poGetOrigWord(iIndex[iLib].idx, iRealLib), + parse_data(poGetOrigWordData(iIndex[iLib].idx + i, iRealLib), + poGetOrigWord(iIndex[iLib].idx, iRealLib)))); } i = 1; } else { i = 0; } for (j = 0; i < nWord; i++, j++) { + iWordIdx = poGetOrigSynonymWordIdx(iIndex[iLib].synidx + j, iRealLib); res_list.push_back(TSearchResult(dict_name(iLib), - poGetWord(iIndex[iLib].synidx + j, - iRealLib, 0), - parse_data(poGetOrigWordData - (iIndex[iLib].synidx + j, iRealLib)))); + poGetOrigWord(iWordIdx, iRealLib), + parse_data(poGetOrigWordData(iWordIdx, iRealLib), + poGetOrigWord(iWordIdx, iRealLib)))); } bFound = true; @@ -369,7 +423,7 @@ LookupProgressDialogUpdate(gpointer data, GtkWidget *dialog = GTK_WIDGET(data); GtkWidget *progress; - progress = GTK_WIDGET(g_object_get_data(G_OBJECT(dialog), "progress")); + progress = GTK_WIDGET(g_object_get_data(G_OBJECT(dialog), "progress_bar")); gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction); while (gtk_events_pending()) diff --git a/src/mstardict.cpp b/src/mstardict.cpp index 1ea7cb4..4e71e2d 100644 --- a/src/mstardict.cpp +++ b/src/mstardict.cpp @@ -51,6 +51,8 @@ #include "libwrapper.hpp" #include "mstardict.hpp" +MStarDict *pMStarDict; + enum { DEF_COLUMN, N_COLUMNS @@ -71,6 +73,13 @@ MStarDict::MStarDict() /* initialize configuration */ oConf = new Conf(); + /* initialize stardict plugins */ + std::list < std::string > plugin_order_list; + std::list < std::string > plugin_disable_list; + oStarDictPlugins = new StarDictPlugins("/usr/lib/mstardict/plugins", + plugin_order_list, + plugin_disable_list); + /* initialize dict manager */ oDict = new DictMngr(this); @@ -89,6 +98,9 @@ MStarDict::~MStarDict() /* deinitialize dict manager */ delete oDict; + /* deinitialize stardict plugins */ + delete oStarDictPlugins; + /* deinitialize configuration */ delete oConf; } @@ -138,7 +150,7 @@ MStarDict::onResultsViewSelectionChanged(GtkTreeSelection *selection, } /* grab focus to search entry */ - gtk_widget_grab_focus(GTK_WIDGET(mStarDict->search)); + mStarDict->GrabFocus(); return true; } @@ -240,6 +252,8 @@ MStarDict::onMainWindowKeyPressEvent(GtkWidget *window, { if (event->type == GDK_KEY_PRESS && event->keyval == GDK_KP_Enter) { mStarDict->SearchWord(); + } else if (event->type == GDK_KEY_PRESS && event->keyval >= 0x21 && event->keyval <= 0x7E) { + mStarDict->GrabFocus(); } return false; } @@ -260,7 +274,7 @@ MStarDict::CreateLookupProgressDialog(bool *cancel) /* add progress bar */ progress = gtk_progress_bar_new(); gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), progress); - g_object_set_data(G_OBJECT(dialog), "progress", progress); + g_object_set_data(G_OBJECT(dialog), "progress_bar", progress); /* show dialog */ gtk_widget_show_all(dialog); @@ -374,7 +388,11 @@ MStarDict::CreateMainWindow() gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW (results_view), -1, "Def", renderer, "text", DEF_COLUMN, NULL); - g_object_set(G_OBJECT(renderer), "xpad", 10, NULL); + g_object_set(G_OBJECT(renderer), + "xpad", 10, + "ellipsize", PANGO_ELLIPSIZE_END, + "ellipsize-set", TRUE, + NULL); /* search entry */ search = hildon_entry_new(HILDON_SIZE_FINGER_HEIGHT); @@ -389,7 +407,7 @@ MStarDict::CreateMainWindow() gtk_widget_show_all(GTK_WIDGET(main_window)); /* grab focus to search entry */ - gtk_widget_grab_focus(GTK_WIDGET(search)); + GrabFocus(); } void @@ -517,6 +535,12 @@ MStarDict::ShowProgressIndicator(bool bShow) hildon_gtk_window_set_progress_indicator(GTK_WINDOW(main_window), 0); } +void +MStarDict::GrabFocus() +{ + gtk_widget_grab_focus(GTK_WIDGET(search)); +} + int main(int argc, char **argv) @@ -532,6 +556,7 @@ main(int argc, /* create main window */ MStarDict mStarDict; + pMStarDict = &mStarDict; mStarDict.CreateMainWindow(); mStarDict.CreateMainMenu(); mStarDict.ShowNoResults(true); diff --git a/src/mstardict.hpp b/src/mstardict.hpp index f2af68a..42c2655 100644 --- a/src/mstardict.hpp +++ b/src/mstardict.hpp @@ -26,6 +26,10 @@ #include #include +#include "lib/pluginmanager.h" + +extern MStarDict *pMStarDict; + class Conf; class DictMngr; class Library; @@ -66,6 +70,7 @@ class MStarDict { Conf *oConf; DictMngr *oDict; Library *oLibs; + StarDictPlugins *oStarDictPlugins; GtkWidget *CreateLookupProgressDialog(bool *cancel); void DestroyLookupProgressDialog(GtkWidget *dialog); @@ -84,4 +89,5 @@ class MStarDict { void ShowNoResults(bool bNoResults); void ShowNoDictionary(bool bNoDictionary); void ShowProgressIndicator(bool bShow); + void GrabFocus(); }; diff --git a/stardict-plugins/Makefile.am b/stardict-plugins/Makefile.am new file mode 100644 index 0000000..e41a48b --- /dev/null +++ b/stardict-plugins/Makefile.am @@ -0,0 +1,3 @@ +DIST_SUBDIRS = stardict-html-parsedata-plugin stardict-powerword-parsedata-plugin stardict-wiki-parsedata-plugin stardict-xdxf-parsedata-plugin + +SUBDIRS = stardict-html-parsedata-plugin stardict-powerword-parsedata-plugin stardict-wiki-parsedata-plugin stardict-xdxf-parsedata-plugin diff --git a/stardict-plugins/stardict-html-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-html-parsedata-plugin/Makefile.am new file mode 100644 index 0000000..9e9b31a --- /dev/null +++ b/stardict-plugins/stardict-html-parsedata-plugin/Makefile.am @@ -0,0 +1,18 @@ +EXTRA_DIST = stardict_html_parsedata.v stardict_html_parsedata.dev + + +LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_html_parsedata.v" + +noinst_HEADERS = stardict_html_parsedata.h + +stardict_html_parsedata_LTLIBRARIES = stardict_html_parsedata.la + +stardict_html_parsedatadir = $(libdir)/mstardict/plugins + +stardict_html_parsedata_la_SOURCES = stardict_html_parsedata.cpp + +stardict_html_parsedata_la_LDFLAGS = -avoid-version \ + -module \ + $(LD_VERSION_SCRIPT_OPTION) + +INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.cpp b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.cpp new file mode 100644 index 0000000..8bb78e8 --- /dev/null +++ b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.cpp @@ -0,0 +1,498 @@ +#include "stardict_html_parsedata.h" +#include + +#ifdef _WIN32 +#include + +#ifdef _MSC_VER +# define strncasecmp _strnicmp +#endif + +static char *strcasestr (const char *phaystack, const char *pneedle) +{ + register const unsigned char *haystack, *needle; + register char b, c; + + haystack = (const unsigned char *) phaystack; + needle = (const unsigned char *) pneedle; + + b = tolower(*needle); + if (b != '\0') { + haystack--; /* possible ANSI violation */ + do { + c = *++haystack; + if (c == '\0') + goto ret0; + } while (tolower(c) != (int) b); + + c = tolower(*++needle); + if (c == '\0') + goto foundneedle; + ++needle; + goto jin; + + for (;;) { + register char a; + register const unsigned char *rhaystack, *rneedle; + + do { + a = *++haystack; + if (a == '\0') + goto ret0; + if (tolower(a) == (int) b) + break; + a = *++haystack; + if (a == '\0') + goto ret0; + shloop: + ; + } + while (tolower(a) != (int) b); + + jin: a = *++haystack; + if (a == '\0') + goto ret0; + + if (tolower(a) != (int) c) + goto shloop; + + rhaystack = haystack-- + 1; + rneedle = needle; + a = tolower(*rneedle); + + if (tolower(*rhaystack) == (int) a) + do { + if (a == '\0') + goto foundneedle; + ++rhaystack; + a = tolower(*++needle); + if (tolower(*rhaystack) != (int) a) + break; + if (a == '\0') + goto foundneedle; + ++rhaystack; + a = tolower(*++needle); + } while (tolower (*rhaystack) == (int) a); + + needle = rneedle; /* took the register-poor approach */ + + if (a == '\0') + break; + } + } + foundneedle: + return (char*) haystack; + ret0: + return 0; +} +#endif + +static void html_topango(const std::string& str, std::string &pango, size_t &pango_len) +{ + const char *q, *p; + static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 }; + static const int xml_ent_len[] = { 3, 3, 4, 5, 5 }; + static const char* html_entries[] = {"nbsp;", 0}; + static const int html_entry_len[] = {5}; + static const char* html_values[] = {" "}; + static const int html_value_len[] = {1}; + size_t cur_pos; + int i; + char *etext; + + pango.clear(); + for (cur_pos = 0, q = str.c_str(); *q; ++cur_pos) { + if (*q == '&') { + for (i = 0; xml_entrs[i]; ++i) { + if (strncasecmp(xml_entrs[i], q + 1, + xml_ent_len[i]) == 0) { + q += xml_ent_len[i] + 1; + pango += '&'; + pango += xml_entrs[i]; + break; + } + } + if (xml_entrs[i] == NULL) { + for (i = 0; html_entries[i]; ++i) { + if (strncasecmp(html_entries[i], q+1, html_entry_len[i])==0) { + q += html_entry_len[i] + 1; + pango += html_values[i]; + cur_pos += (html_value_len[i] -1); + break; + } + } + if (html_entries[i] == NULL) { + if (*(q+1)=='#' && (p = strchr(q+2, ';'))) { + std::string str(q+2, p-(q+2)); + gunichar uc; + uc = atoi(str.c_str()); + gchar utf8[7]; + gint n = g_unichar_to_utf8(uc, utf8); + utf8[n] = '\0'; + pango += utf8; + q = p+1; + } else { + ++q; + pango += "&"; + } + } + } + } else if (*q == '\r' || *q == '\n') { + q++; + cur_pos--; + } else { + p = g_utf8_next_char(q); + etext = g_markup_escape_text(q, p-q); + pango += etext; + g_free(etext); + q = p; + } + } + + pango_len = cur_pos; +} + +static void xml_decode(const char *str, std::string& decoded) +{ + static const char raw_entrs[] = { + '<', '>', '&', '\'', '\"', 0 + }; + static const char* xml_entrs[] = { + "lt;", "gt;", "amp;", "apos;", "quot;", 0 + }; + static const int xml_ent_len[] = { + 3, 3, 4, 5, 5 + }; + int ient; + const char *amp = strchr(str, '&'); + + if (amp == NULL) { + decoded = str; + return; + } + decoded.assign(str, amp - str); + + while (*amp) + if (*amp == '&') { + for (ient = 0; xml_entrs[ient] != 0; ++ient) + if (strncmp(amp + 1, xml_entrs[ient], + xml_ent_len[ient]) == 0) { + decoded += raw_entrs[ient]; + amp += xml_ent_len[ient]+1; + break; + } + if (xml_entrs[ient] == 0) // unrecognized sequence + decoded += *amp++; + + } else { + decoded += *amp++; + } +} + +static void html2result(const char *p, ParseResult &result) +{ + LinksPosList links_list; + std::string res; + const char *tag, *next; + std::string name; + std::string::size_type cur_pos; + int i; + + struct ReplaceTag { + const char *match_; + int match_len_; + const char *replace_; + int char_len_; + }; + static const ReplaceTag replace_arr[] = { + { "b>", 2, "", 0 }, + { "/b>", 3, "", 0 }, + { "big>", 4, "", 0}, + { "/big>", 5, "", 0}, + { "i>", 2, "", 0 }, + { "/i>", 3, "", 0 }, + { "s>", 2, "", 0 }, + { "/s>", 3, "", 0 }, + { "sub>", 4, "", 0 }, + { "/sub>", 5, "", 0}, + { "sup>", 4, "", 0}, + { "/sup>", 5, "", 0}, + { "small>", 6, "", 0}, + { "/small>", 7, "", 0}, + { "tt>", 3, "", 0}, + { "/tt>", 4, "", 0}, + { "u>", 2, "", 0 }, + { "/u>", 3, "", 0 }, + { "br>", 3, "\n", 1 }, + { "nl>", 3, "", 0 }, + { "hr>", 3, "\n \n", 7 }, + { "/font>", 6, "", 0 }, + { NULL, 0, NULL }, + }; + + for (cur_pos = 0; *p && (tag = strchr(p, '<')) != NULL;) { + std::string chunk(p, tag - p); + size_t pango_len; + std::string pango; + html_topango(chunk, pango, pango_len); + res += pango; + cur_pos += pango_len; + + p = tag; + for (i = 0; replace_arr[i].match_; ++i) + if (strncasecmp(replace_arr[i].match_, p + 1, + replace_arr[i].match_len_) == 0) { + res += replace_arr[i].replace_; + p += 1 + replace_arr[i].match_len_; + cur_pos += replace_arr[i].char_len_; + goto cycle_end; + } + + if (strncasecmp(p+1, "font ", 5)==0) { + next = strchr(p, '>'); + if (!next) { + ++p; + continue; + } + res += "') + break; + p2++; + } + if (p2) { + std::string color(p1, p2-p1); + if (pango_color_parse(NULL, color.c_str())) { + res += " foreground=\""; + res += color; + res += "\""; + } + } + } + res += ">"; + p = next + 1; + } else if ((*(p + 1) == 'a' || *(p + 1) == 'A') && *(p + 2) == ' ') { + next = strchr(p, '>'); + if (!next) { + p++; + continue; + } + p+=3; + name.assign(p, next - p); + const char *p1 = strcasestr(name.c_str(), "href="); + std::string link; + if (p1) { + p1 += sizeof("href=") -1 +1; + const char *p2 = p1; + while (true) { + if (*p2 == '\0') { + p2 = NULL; + break; + } + if (*p2 == '\'' || *p2 == '"') + break; + p2++; + } + if (p2) { + link.assign(p1, p2-p1); + } + } + p = next + 1; + next = strcasestr(p, ""); + if (!next) { + continue; + } + res += ""; + std::string::size_type link_len = next - p; + std::string chunk(p, link_len); + html_topango(chunk, pango, pango_len); + links_list.push_back(LinkDesc(cur_pos, pango_len, link)); + res += pango; + cur_pos += pango_len; + res += ""; + p = next + sizeof("") - 1; + } else if (strncasecmp(p+1, "ref>", 4)==0) { + next = strcasestr(p, ""); + if (!next) { + p++; + continue; + } + p+=5; + res += ""; + std::string::size_type link_len = next - p; + std::string chunk(p, link_len); + html_topango(chunk, pango, pango_len); + std::string xml_enc; + xml_decode(chunk.c_str(), xml_enc); + std::string link; + link = "query://"; + link += xml_enc; + links_list.push_back(LinkDesc(cur_pos, pango_len, link)); + res += pango; + cur_pos += pango_len; + res += ""; + p = next + sizeof("") - 1; + } else if (strncasecmp(p+1, "img ", 4)==0) { + next = strchr(p+5, '>'); + if (!next) { + p++; + continue; + } + name.assign(p+5, next - (p+5)); + p = next + 1; + const char *p1 = strcasestr(name.c_str(), "src="); + std::string src; + if (p1) { + p1 += sizeof("src=") -1 +1; + const char *p2 = p1; + while (true) { + if (*p2 == '\0') { + p2 = NULL; + break; + } + if (*p2 == '\'' || *p2 == '"') + break; + p2++; + } + if (p2) { + src.assign(p1, p2-p1); + } + } + if (!src.empty()) { + ParseResultItem item; + item.type = ParseResultItemType_link; + item.link = new ParseResultLinkItem; + item.link->pango = res; + item.link->links_list = links_list; + result.item_list.push_back(item); + res.clear(); + cur_pos = 0; + links_list.clear(); + item.type = ParseResultItemType_res; + item.res = new ParseResultResItem; + item.res->type = "image"; + int n = src.length(); + if (src[0]==0x1e && src[n-1]==0x1f) { + item.res->key.assign(src.c_str()+1, n-2); + } else { + item.res->key = src; + } + result.item_list.push_back(item); + } + } else { + next = strchr(p+1, '>'); + if (!next) { + p++; + res += "<"; + cur_pos++; + continue; + } + p = next + 1; + } +cycle_end: + ; + } + res += p; + ParseResultItem item; + item.type = ParseResultItemType_link; + item.link = new ParseResultLinkItem; + item.link->pango = res; + item.link->links_list = links_list; + result.item_list.push_back(item); +} + +static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword) +{ + if (*p != 'h') + return false; + p++; + size_t len = strlen(p); + if (len) { + html2result(p, result); + } + *parsed_size = 1 + len + 1; + return true; +} + +static void configure() +{ +} + +DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj) +{ + if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) { + g_print("Error: HTML data parsing plugin version doesn't match!\n"); + return true; + } + obj->type = StarDictPlugInType_PARSEDATA; + obj->info_xml = g_strdup_printf("%s1.0%s%sHu Zheng <huzheng_001@163.com>http://stardict.sourceforge.net", _("HTML data parsing"), _("HTML data parsing engine."), _("Parse the HTML data.")); + obj->configure_func = configure; + return false; +} + +DLLIMPORT void stardict_plugin_exit(void) +{ +} + +DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj) +{ + obj->parse_func = parse; + g_print(_("HTML data parsing plug-in loaded.\n")); + return false; +} + +#ifdef _WIN32 +BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ , + DWORD reason /* Reason this function is being called. */ , + LPVOID reserved /* Not used. */ ) +{ + switch (reason) + { + case DLL_PROCESS_ATTACH: + break; + + case DLL_PROCESS_DETACH: + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + } + + /* Returns TRUE on success, FALSE on failure */ + return TRUE; +} +#endif diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.dev b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.dev new file mode 100644 index 0000000..04de82f --- /dev/null +++ b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.dev @@ -0,0 +1,69 @@ +[Project] +FileName=stardict_html_parsedata.dev +Name=stardict_html_parsedata +UnitCount=2 +Type=3 +Ver=1 +ObjFiles= +Includes= +Libs= +PrivateResource= +ResourceIncludes= +MakeIncludes= +Compiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +CppCompiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_ +IsCpp=1 +Icon= +ExeOutput= +ObjectOutput= +OverrideOutput=0 +OverrideOutputName=stardict_html_parsedata.dll +HostApplication= +Folders= +CommandLine= +UseCustomMakefile=0 +CustomMakefile= +IncludeVersionInfo=0 +SupportXPThemes=0 +CompilerSet=0 +CompilerSettings=0000000000000000000100 + +[Unit1] +FileName=stardict_html_parsedata.h +CompileCpp=1 +Folder=stardict_html_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit2] +FileName=stardict_html_parsedata.cpp +CompileCpp=1 +Folder=stardict_html_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[VersionInfo] +Major=0 +Minor=1 +Release=1 +Build=1 +LanguageID=1033 +CharsetID=1252 +CompanyName= +FileVersion= +FileDescription=Developed using the Dev-C++ IDE +InternalName= +LegalCopyright= +LegalTrademarks= +OriginalFilename= +ProductName= +ProductVersion= +AutoIncBuildNr=0 + diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.h b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.h new file mode 100644 index 0000000..6f837e6 --- /dev/null +++ b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.h @@ -0,0 +1,23 @@ +#ifndef _STARDICT_XDXF_PARSEDATA_PLUGIN_H_ +#define _STARDICT_XDXF_PARSEDATA_PLUGIN_H_ + +#ifdef _WIN32 +#if BUILDING_DLL +# define DLLIMPORT __declspec (dllexport) +#else /* Not BUILDING_DLL */ +# define DLLIMPORT __declspec (dllimport) +#endif /* Not BUILDING_DLL */ +#else +# define DLLIMPORT +#endif + +#include "../../src/lib/plugin.h" +#include "../../src/lib/parsedata_plugin.h" + +extern "C" { + DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj); + DLLIMPORT extern void stardict_plugin_exit(void); + DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj); +} + +#endif diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.v b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.v new file mode 100644 index 0000000..683abb3 --- /dev/null +++ b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.v @@ -0,0 +1,10 @@ +{ + global: + extern "C" { + stardict_plugin_init; + stardict_plugin_exit; + stardict_parsedata_plugin_init; + }; + local: + *; +}; diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.am new file mode 100644 index 0000000..f849cc0 --- /dev/null +++ b/stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.am @@ -0,0 +1,18 @@ +EXTRA_DIST = stardict_powerword_parsedata.v stardict_powerword_parsedata.dev + + +LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_powerword_parsedata.v" + +noinst_HEADERS = stardict_powerword_parsedata.h + +stardict_powerword_parsedata_LTLIBRARIES = stardict_powerword_parsedata.la + +stardict_powerword_parsedatadir = $(libdir)/mstardict/plugins + +stardict_powerword_parsedata_la_SOURCES = stardict_powerword_parsedata.cpp + +stardict_powerword_parsedata_la_LDFLAGS = -avoid-version \ + -module \ + $(LD_VERSION_SCRIPT_OPTION) + +INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.cpp b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.cpp new file mode 100644 index 0000000..068ebbe --- /dev/null +++ b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.cpp @@ -0,0 +1,672 @@ +#include "stardict_powerword_parsedata.h" +#include + +#ifdef _WIN32 +#include +#endif + +static size_t xml_strlen(const char *xmlstr) +{ + const char *q; + static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 }; + static const int xml_ent_len[] = { 3, 3, 4, 5, 5 }; + size_t cur_pos; + int i; + + for (cur_pos = 0, q = xmlstr; *q; ++cur_pos) { + if (*q == '&') { + for (i = 0; xml_entrs[i]; ++i) + if (strncmp(xml_entrs[i], q + 1, + xml_ent_len[i]) == 0) { + q += xml_ent_len[i] + 1; + break; + } + if (xml_entrs[i] == NULL) + ++q; + } else if (*q == '<') { + const char *p = strchr(q+1, '>'); + if (p) + q = p + 1; + else + ++q; + --cur_pos; + } else + q = g_utf8_next_char(q); + } + + return cur_pos; +} + +static gchar* toUtfPhonetic(const gchar *text, gsize len) +{ + std::string p; + gsize i; + for (i=0;i"); + previous_islink = false; + break; + case 'I': + g_string_append (str, ""); + previous_islink = false; + break; + case '+': + g_string_append (str, ""); + previous_islink = false; + break; + case '-': + g_string_append (str, ""); + previous_islink = false; + break; + case 'x': + g_string_append (str, ""); + previous_islink = false; + break; + case 'l': + case 'D': + case 'L': + case 'U': + g_string_append (str, ""); + previous_islink = true; + break; + default: + previous_islink = false; + break; + } + } + break; + case '&': + find = false; + if (next!=end) { + n = g_utf8_next_char(next); + if (n!=end && *n == '{') { + find=true; + currentmarktag++; + if (marktags.length()"); + next = n+1; + break; + case 'I': + g_string_append (str, ""); + next = n+1; + break; + case '+': + g_string_append (str, ""); + next = n+1; + break; + case '-': + g_string_append (str, ""); + next = n+1; + break; + case 'x': + g_string_append (str, ""); + next = n+1; + break; + case 'X': + case '2': + { + const gchar *tag_end = n+1; + while (tag_end!=end) { + if (*tag_end=='}') + break; + else + tag_end++; + } + g_string_append (str, ""); + gchar *tag_str; + if (*next == 'X') { + tag_str = toUtfPhonetic(n+1, tag_end - (n+1)); + } else { + tag_str = toUtfPhonetic2(n+1, tag_end - (n+1)); + } + g_string_append (str, tag_str); + g_free(tag_str); + g_string_append (str, ""); + currentmarktag--; + if (tag_end!=end) + next = tag_end+1; + else + next = end; + previous_islink = false; + break; + } + case 'l': + case 'D': + case 'L': + case 'U': + if (previous_islink) + g_string_append (str, "\t"); + if (*next == 'l' || *next == 'D') + g_string_append (str, ""); + else + g_string_append (str, ""); + *pango += str->str; + cur_pos += xml_strlen(str->str); + g_string_erase(str, 0, -1); + { + const gchar *tag_end = n+1; + while (tag_end!=end) { + if (*tag_end=='}') + break; + else + tag_end++; + } + char *tmpstr = g_markup_escape_text(n+1, tag_end - (n+1)); + size_t xml_len = xml_strlen(tmpstr); + std::string link("query://"); + link.append(n+1, tag_end - (n+1)); + links_list->push_back(LinkDesc(cur_pos, xml_len, link)); + *pango += tmpstr; + cur_pos += xml_len; + g_free(tmpstr); + g_string_append (str, ""); + currentmarktag--; + if (tag_end!=end) + next = tag_end+1; + else + next = end; + previous_islink = true; + break; + } + /*case ' ': + case '9': + case 'S':*/ + default: + next = n+1; + break; + } + } + } + if (!find) { + previous_islink = false; + g_string_append (str, "&"); + } + break; + case '<': + previous_islink = false; + g_string_append (str, "<"); + break; + case '>': + previous_islink = false; + g_string_append (str, ">"); + break; + case '\'': + previous_islink = false; + g_string_append (str, "'"); + break; + case '"': + previous_islink = false; + g_string_append (str, """); + break; + default: + previous_islink = false; + g_string_append_len (str, p, next - p); + break; + } + p = next; + } + if (currentmarktag>0) { + do { + currentmarktag--; + switch (marktags[currentmarktag]) { + case 'b': + case 'B': + g_string_append (str, ""); + break; + case 'I': + g_string_append (str, ""); + break; + case '+': + g_string_append (str, ""); + break; + case '-': + g_string_append (str, ""); + break; + case 'x': + case 'l': + case 'D': + case 'L': + case 'U': + g_string_append (str, ""); + break; + default: + break; + } + } while (currentmarktag>0); + } + *pango += str->str; + cur_pos += xml_strlen(str->str); + g_string_free (str, TRUE); +} + +typedef struct _PwUserData { + std::string *pango; + LinksPosList *links_list; + std::string::size_type cur_pos; + const gchar *oword; + bool first_jbcy; +} PwUserData; + +static void func_parse_passthrough(GMarkupParseContext *context, const gchar *passthrough_text, gsize text_len, gpointer user_data, GError **error) +{ + if (!g_str_has_prefix(passthrough_text, "0 && g_ascii_isspace(*(text+len-1))) { + len--; + } + if (len==0) + return; + std::string *pango = ((PwUserData*)user_data)->pango; + std::string::size_type &cur_pos = ((PwUserData*)user_data)->cur_pos; + if (strcmp(element, "词典音标")==0) { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + *pango+="["; + cur_pos++; + gchar *str = toUtfPhonetic(text, len); + *pango+=str; + cur_pos+=xml_strlen(str); + g_free(str); + *pango+="]"; + cur_pos++; + } else if (strcmp(element, "单词原型")==0) { + const gchar *oword = ((PwUserData*)user_data)->oword; + if (strncmp(oword, text, len)) { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + *pango+=""; + gchar *str = g_markup_escape_text(text, len); + pango->append(str); + cur_pos+=xml_strlen(str); + g_free(str); + *pango+=""; + } + } else if (strcmp(element, "单词词性")==0) { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + *pango+=""; + powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list); + *pango+=""; + } else if (strcmp(element, "汉语拼音")==0) { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + *pango+=""; + powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list); + *pango+=""; + } else if (strcmp(element, "例句原型")==0) { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + *pango+=""; + powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list); + *pango+=""; + } else if (strcmp(element, "例句解释")==0) { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + *pango+=""; + powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list); + *pango+=""; + /*} else if (strcmp(element, "相关词")==0) { + if (!res->empty()) + *res+='\n'; + std::string tabstr; + tabstr+=text[0]; + for (gsize i=1;iappend(str); + g_free(str);*/ + } else + /*} else if ( + strcmp(element, "解释项")==0 || + strcmp(element, "跟随解释")==0 || + strcmp(element, "相关词")==0 || + strcmp(element, "预解释")==0 || + strcmp(element, "繁体写法")==0 || + strcmp(element, "台湾音标")==0 || + strcmp(element, "图片名称")==0 || + strcmp(element, "跟随注释")==0 || + strcmp(element, "音节分段")==0 || + strcmp(element, "AHD音标")==0 || + strcmp(element, "国际音标")==0 || + strcmp(element, "美国音标")==0 || + strcmp(element, "子解释项")==0 || + strcmp(element, "同义词")==0 || + strcmp(element, "日文发音")==0 || + strcmp(element, "惯用型原型")==0 || + strcmp(element, "惯用型解释")==0 || + strcmp(element, "另见")==0 + ) {*/ + { + if (!pango->empty()) { + *pango+='\n'; + cur_pos++; + } + powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list); + } +} + +static void func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error) +{ + std::string res; + if (strcmp(element_name, "基本词义")==0) { + if (((PwUserData*)user_data)->first_jbcy) { + ((PwUserData*)user_data)->first_jbcy = false; + } else { + res="\n<基本词义>"; + } + } else if (strcmp(element_name, "继承用法")==0) { + res="\n<继承用法>"; + } else if (strcmp(element_name, "习惯用语")==0) { + res="\n<习惯用语>"; + } else if (strcmp(element_name, "词性变化")==0) { + res="\n<词性变化>"; + } else if (strcmp(element_name, "特殊用法")==0) { + res="\n<特殊用法>"; + } else if (strcmp(element_name, "参考词汇")==0) { + res="\n<参考词汇>"; + } else if (strcmp(element_name, "常用词组")==0) { + res="\n<常用词组>"; + } else if (strcmp(element_name, "语源")==0) { + res="\n<语源>"; + } else if (strcmp(element_name, "派生")==0) { + res="\n<派生>"; + } else if (strcmp(element_name, "用法")==0) { + res="\n<用法>"; + } else if (strcmp(element_name, "注释")==0) { + res="\n<注释>"; + } + if (!res.empty()) { + *(((PwUserData*)user_data)->pango) += res; + ((PwUserData*)user_data)->cur_pos += xml_strlen(res.c_str()); + } +} + +static void powerword2link(const char *p, guint32 sec_size, const gchar *oword, std::string *pango, LinksPosList *links_list) +{ + PwUserData Data; + Data.pango = pango; + Data.links_list = links_list; + Data.cur_pos = 0; + Data.oword = oword; + Data.first_jbcy = true; + + GMarkupParser parser; + parser.start_element = func_parse_start_element; + parser.end_element = NULL; + parser.text = NULL; + parser.passthrough = func_parse_passthrough; + parser.error = NULL; + GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL); + g_markup_parse_context_parse(context, p, sec_size, NULL); + g_markup_parse_context_end_parse(context, NULL); + g_markup_parse_context_free(context); +} + +static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword) +{ + if (*p != 'k') + return false; + p++; + size_t len = strlen(p); + if (len) { + std::string pango; + LinksPosList links_list; + powerword2link(p, len, oword, &pango, &links_list); + ParseResultItem item; + item.type = ParseResultItemType_link; + item.link = new ParseResultLinkItem; + item.link->pango = pango; + item.link->links_list = links_list; + result.item_list.push_back(item); + } + *parsed_size = 1 + len + 1; + return true; +} + +static void configure() +{ +} + +DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj) +{ + if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) { + g_print("Error: PowerWord data parsing plugin version doesn't match!\n"); + return true; + } + obj->type = StarDictPlugInType_PARSEDATA; + obj->info_xml = g_strdup_printf("%s1.0%s%sHu Zheng <huzheng_001@163.com>http://stardict.sourceforge.net", _("PowerWord data parsing"), _("PowerWord data parsing engine."), _("Parse the PowerWord data.")); + obj->configure_func = configure; + return false; +} + +DLLIMPORT void stardict_plugin_exit(void) +{ +} + +DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj) +{ + obj->parse_func = parse; + g_print(_("PowerWord data parsing plug-in loaded.\n")); + return false; +} + +#ifdef _WIN32 +BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ , + DWORD reason /* Reason this function is being called. */ , + LPVOID reserved /* Not used. */ ) +{ + switch (reason) + { + case DLL_PROCESS_ATTACH: + break; + + case DLL_PROCESS_DETACH: + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + } + + /* Returns TRUE on success, FALSE on failure */ + return TRUE; +} +#endif diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.dev b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.dev new file mode 100644 index 0000000..2c0cab4 --- /dev/null +++ b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.dev @@ -0,0 +1,69 @@ +[Project] +FileName=stardict_powerword_parsedata.dev +Name=stardict_powerword_parsedata +UnitCount=2 +Type=3 +Ver=1 +ObjFiles= +Includes= +Libs= +PrivateResource= +ResourceIncludes= +MakeIncludes= +Compiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +CppCompiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_ +IsCpp=1 +Icon= +ExeOutput= +ObjectOutput= +OverrideOutput=0 +OverrideOutputName=stardict_powerword_parsedata.dll +HostApplication= +Folders= +CommandLine= +UseCustomMakefile=0 +CustomMakefile= +IncludeVersionInfo=0 +SupportXPThemes=0 +CompilerSet=0 +CompilerSettings=0000000000000000000100 + +[Unit1] +FileName=stardict_powerword_parsedata.h +CompileCpp=1 +Folder=stardict_powerword_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit2] +FileName=stardict_powerword_parsedata.cpp +CompileCpp=1 +Folder=stardict_powerword_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[VersionInfo] +Major=0 +Minor=1 +Release=1 +Build=1 +LanguageID=1033 +CharsetID=1252 +CompanyName= +FileVersion= +FileDescription=Developed using the Dev-C++ IDE +InternalName= +LegalCopyright= +LegalTrademarks= +OriginalFilename= +ProductName= +ProductVersion= +AutoIncBuildNr=0 + diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.h b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.h new file mode 100644 index 0000000..fe56acf --- /dev/null +++ b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.h @@ -0,0 +1,23 @@ +#ifndef _STARDICT_POWERWORD_PARSEDATA_PLUGIN_H_ +#define _STARDICT_POWERWORD_PARSEDATA_PLUGIN_H_ + +#ifdef _WIN32 +#if BUILDING_DLL +# define DLLIMPORT __declspec (dllexport) +#else /* Not BUILDING_DLL */ +# define DLLIMPORT __declspec (dllimport) +#endif /* Not BUILDING_DLL */ +#else +# define DLLIMPORT +#endif + +#include "../../src/lib/plugin.h" +#include "../../src/lib/parsedata_plugin.h" + +extern "C" { + DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj); + DLLIMPORT extern void stardict_plugin_exit(void); + DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj); +} + +#endif diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.v b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.v new file mode 100644 index 0000000..683abb3 --- /dev/null +++ b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.v @@ -0,0 +1,10 @@ +{ + global: + extern "C" { + stardict_plugin_init; + stardict_plugin_exit; + stardict_parsedata_plugin_init; + }; + local: + *; +}; diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.am new file mode 100644 index 0000000..3f824b7 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.am @@ -0,0 +1,22 @@ +EXTRA_DIST = stardict_wiki_parsedata.v stardict_wiki_parsedata.dev + + +LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_wiki_parsedata.v" + +noinst_HEADERS = stardict_wiki_parsedata.h + +stardict_wiki_parsedata_LTLIBRARIES = stardict_wiki_parsedata.la + +stardict_wiki_parsedatadir = $(libdir)/mstardict/plugins + +stardict_wiki_parsedata_la_SOURCES = stardict_wiki_parsedata.cpp \ + global.cpp global.h \ + stardict_wiki2xml.cpp stardict_wiki2xml.h \ + TXML.cpp TXML.h \ + WIKI2XML.cpp WIKI2XML.h + +stardict_wiki_parsedata_la_LDFLAGS = -avoid-version \ + -module \ + $(LD_VERSION_SCRIPT_OPTION) + +INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.cpp new file mode 100644 index 0000000..1306c57 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.cpp @@ -0,0 +1,82 @@ +#include "TXML.h" + +// ***************************************************************************** +// ***************************************************************************** +// +// TXML +// +// ***************************************************************************** +// ***************************************************************************** + +TXML::TXML ( int f , int t , string &s , bool fix_comments ) + { + from = f ; + to = t ; + name = s.substr ( from + 1 , to - (from+1) ) ; + name = trim ( name ) ; + name = before_first ( ' ' , name ) ; + closing = selfclosing = false ; + if ( left ( name , 1 ) == "/" ) + { + closing = true ; + name = name.substr ( 1 , name.length()-1 ) ; + } + if ( right ( name , 1 ) == "/" ) + { + selfclosing = true ; + name = name.substr ( 0 , name.length()-1 ) ; + } + name = trim ( name ) ; + + // This will replace < and > within a comment with the appropriate HTML entities + if ( fix_comments && left ( name , 1 ) == "!" ) + { + int a ; + for ( a = from+1 ; a < to ; a++ ) + { + if ( s[a] != '>' && s[a] != '<' ) continue ; + to += 3 ; + if ( s[a] == '>' ) s.insert ( a , ">" ) ; + if ( s[a] == '<' ) s.insert ( a , "<" ) ; + s[a+3] = ';' ; + } + } + } + +void TXML::remove_at ( int pos ) + { + if ( pos < from ) from-- ; + if ( pos < to ) to-- ; + } + +void TXML::insert_at ( int pos ) + { + if ( pos < from ) from++ ; + if ( pos < to ) to++ ; + } + +void TXML::add_key_value ( string k , string v ) + { + key.push_back ( trim ( k ) ) ; + value.push_back ( trim ( v ) ) ; + } + +string TXML::get_string () + { + string ret ; + ret = "<" + name ; + for ( size_t a = 0 ; a < key.size() ; a++ ) + { + for ( size_t b = 0 ; b < key[a].length() ; b++ ) + { + if ( key[a][b] == ' ' ) key[a][b] = '_' ; + } + ret += " " + key[a] ; + if ( value[a] != "" ) ret += "=\"" + unquote ( SINGLE_QUOTE , value[a] ) + "\"" ; + } + if ( text == "" ) ret += "/>" ; + else ret += ">" + text + "" ; + return ret ; + } + + diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.h b/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.h new file mode 100644 index 0000000..4055c98 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.h @@ -0,0 +1,25 @@ +#ifndef _TXML_H_ +#define _TXML_H_ + +#include "global.h" + +class TXML + { + public : + TXML () {} ; + virtual ~TXML() {}; + TXML ( int f , int t , string &s , bool fix_comments = true ) ; + virtual void remove_at ( int pos ) ; + virtual void insert_at ( int pos ) ; + + virtual void add_key_value ( string k , string v = "" ) ; + virtual string get_string () ; + + // Variables + int from , to ; + bool closing , selfclosing ; + string name , text ; + vector key , value ; + } ; + +#endif diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.cpp new file mode 100644 index 0000000..de7e9b0 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.cpp @@ -0,0 +1,573 @@ +#include "WIKI2XML.h" +#include + +TTableInfo::TTableInfo () + { + tr_open = false ; + td_open = false ; + } + +string TTableInfo::close () + { + string ret ; + if ( td_open ) ret += "" ; + if ( tr_open ) ret += "" ; + ret += "" ; + return ret ; + } + +string TTableInfo::new_row () + { + string ret ; + if ( td_open ) ret += "" ; + if ( tr_open ) ret += "" ; + ret += "" ; + td_open = false ; + tr_open = true ; + return ret ; + } + +string TTableInfo::new_cell ( string type ) + { + string ret ; + if ( !tr_open ) ret += new_row () ; + if ( td_open ) ret += "" ; + ret += "" ; + td_type = type ; + td_open = true ; + return ret ; + } + +// ***************************************************************************** +// ***************************************************************************** +// +// WIKI2XML +// +// ***************************************************************************** +// ***************************************************************************** + +void WIKI2XML::parse_symmetric ( string &l , size_t &from , + string s1 , string s2 , + string r1 , string r2 , + bool extend ) + { + int a , b ; + if ( !submatch ( l , s1 , from ) ) return ; // Left does not match + for ( a = from + s1.length() ; a + s2.length() <= l.length() ; a++ ) + { + if ( !submatch ( l , s2 , a ) ) continue ; + for ( b = a+1 ; extend && submatch ( l , s2 , b ) ; b++ ) ; + b-- ; + l = l.substr ( 0 , from ) + + r1 + + l.substr ( from + s1.length() , b - from - s1.length() ) + + r2 + + l.substr ( b + s2.length() , l.length() ) ; + if ( debug ) cout << "newl : " << l << endl ; + break ; + } + } + +void WIKI2XML::parse_link ( string &l , size_t &from , char mode ) + { + from += 1 ; + size_t a , cnt = 1 ; + chart par_open = '[' ; // mode 'L' + chart par_close = ']' ; // mode 'L' + if ( mode == 'T' ) { par_open = '{' ; par_close = '}' ; } + for ( a = from ; cnt > 0 && a+1 < l.length() ; a++ ) + { + if ( l[a] == par_open && l[a+1] == par_open ) + parse_link ( l , a ) ; + else if ( l[a] == par_close && l[a+1] == par_close ) + cnt-- ; + } + if ( cnt > 0 ) return ; // Not a valid link + + int to = a-1 ; // Without "]]" + string link = l.substr ( from+1 , to-from-1 ) ; + + TXML x ; + vector parts ; + explode ( '|' , link , parts ) ; + if ( mode == 'L' ) + { + x.name = "wikilink" ; + x.add_key_value ( "type" , "internal" ) ; + } + else if ( mode == 'T' ) x.name = "wikitemplate" ; + + for ( a = 0 ; a < parts.size() ; a++ ) + { + bool last = ( a + 1 == parts.size() ) ; + string p = parts[a] ; + parse_line_sub ( p ) ; + + if ( a > 0 && ( mode != 'L' || !last ) ) + { + string key , value ; + vector subparts ; + explode ( '=' , p , subparts ) ; + if ( subparts.size() == 1 ) + { + char *str = g_markup_escape_text(p.c_str(), p.length()); + value = xml_embed ( str , "value" ) ; + g_free(str); + } + else + { + key = xml_embed ( subparts[0] , "key" ) ; + subparts.erase ( subparts.begin() ) ; + string itmp = implode ( "=" , subparts ); + char *str = g_markup_escape_text(itmp.c_str(), itmp.length()); + value = xml_embed ( str , "value" ) ; + g_free(str); + } + p = key + value ; + } + else { + char *str = g_markup_escape_text(p.c_str(), p.length()); + p = xml_embed ( str , "value" ) ; + g_free(str); + } + + string param = "number=\"" + val ( a ) + "\"" ; + if ( last ) param += " last=\"1\"" ; + x.text += xml_embed ( p , "wikiparameter" , param ) ; + } + + if ( mode == 'L' ) // Try link trail + { + string trail ; + for ( a = to+2 ; a < l.length() && is_text_char ( l[a] ) ; a++ ) + trail += l[a] ; + to = a-2 ; + if ( trail != "" ) x.text += xml_embed ( trail , "trail" ) ; + } + + x.add_key_value ( "parameters" , val ( parts.size() ) ) ; + string replacement = x.get_string () ; + parse_line_sub ( replacement ) ; + + l.erase ( from-1 , to-from+3 ) ; + l.insert ( from-1 , replacement ) ; + if ( debug ) cout << "Link : " << link << endl << "Replacement : " << replacement << endl ; + if ( debug ) cout << "Result : " << l << endl << endl ; + from = from + replacement.length() - 2 ; + } + +bool WIKI2XML::is_list_char ( chart c ) // For now... + { + if ( c == '*' ) return true ; + if ( c == '#' ) return true ; + if ( c == ':' ) return true ; + return false ; + } + +string WIKI2XML::get_list_tag ( chart c , bool open ) + { + string ret ; + if ( debug ) cout << "get_list_tag : " << c << endl ; + if ( c == '*' ) ret = "ul" ; + if ( c == '#' ) ret = "ol" ; + if ( c == ':' ) ret = "dl" ; + if ( ret != "" ) + { + string itemname = "li" ; + if ( c == ':' ) itemname = "dd" ; + if ( open ) ret = "<" + ret + "><" + itemname + ">" ; + else ret = "" ; + } + return ret ; + } + +string WIKI2XML::fix_list ( string &l ) + { + size_t a , b ; + for ( a = 0 ; a < l.length() && is_list_char ( l[a] ) ; a++ ) ; + string newlist , pre ; + if ( a > 0 ) + { + newlist = left ( l , a ) ; + while ( a < l.length() && l[a] == ' ' ) a++ ; // Removing leading blanks + l = l.substr ( a , l.length() ) ; + } + if ( debug ) cout << "fix_list : " << l << endl ; + if ( list == "" && newlist == "" ) return "" ; + for ( a = 0 ; a < list.length() && + a < newlist.length() && + list[a] == newlist[a] ; a++ ) ; // The common part, if any + + for ( b = a ; b < list.length() ; b++ ) + pre = get_list_tag ( list[b] , false ) + pre ; // Close old list tags + for ( b = a ; b < newlist.length() ; b++ ) + pre += get_list_tag ( newlist[b] , true ) ; // Open new ones + + if ( debug ) cout << "pre : " << pre << endl ; + if ( debug ) cout << "newlist : " << newlist << endl ; + list = newlist ; + return pre ; + } + +void WIKI2XML::parse_line ( string &l ) + { + size_t a; + if ( debug ) cout << l << endl ; + string pre ; + string oldlist = list ; + pre += fix_list ( l ) ; + if ( list != "" && list == oldlist ) + { + string itemname = "li" ; + if ( right ( list , 1 ) == ":" ) itemname = "dd" ; + pre = "<" + itemname + ">" + pre ; + } + + if ( l == "" ) // Paragraph + { + l = "

" ; + } + else if ( left ( l , 4 ) == "----" ) //


+ { + for ( a = 0 ; a < l.length() && l[a] == l[0] ; a++ ) ; + pre += "
" ; + l = l.substr ( a , l.length() - a ) ; + } + else if ( l != "" && l[0] == '=' ) // Heading + { + for ( a = 0 ; a < l.length() && l[a] == '=' && l[l.length()-a-1] == '=' ; a++ ) ; + string h = "h0" ; + if ( a >= l.length() ) h = "" ; // No heading +// else if ( l[a] != ' ' ) h = "" ; +// else if ( l[l.length()-a-1] != ' ' ) h = "" ; + else if ( a < 1 || a > 9 ) h = "" ; + if ( h != "" ) + { + l = l.substr ( a , l.length() - a*2 ) ; + h[1] += a ; + l = xml_embed ( l , h ) ; + } + } + else if ( l != "" && l[0] == ' ' ) // Pre-formatted text + { + for ( a = 0 ; a < l.length() && l[a] == ' ' ; a++ ) ; + l = l.substr ( a , l.length() ) ; + if ( l != "" ) + { + pre += "
" + l + "
" ; + l = "" ; + } + } + else if ( left ( l , 2 ) == "{|" || (left ( l , 2 ) == "|}" && l[2] != '}' ) || + ( tables.size() > 0 && l != "" && ( l[0] == '|' || l[0] == '!' ) ) ) + { + pre += table_markup ( l ) ; + l = "" ; + } + + + if ( l != "" ) parse_line_sub ( l ) ; + + if ( pre != "" ) l = pre + l ; + } + +bool WIKI2XML::is_external_link_protocol ( string protocol ) + { + if ( protocol == "HTTP" ) return true ; + if ( protocol == "FTP" ) return true ; + if ( protocol == "MAILTO" ) return true ; + return false ; + } + +int WIKI2XML::scan_url ( string &l , size_t from ) + { + size_t a ; + for ( a = from ; a < l.length() ; a++ ) + { + if ( l[a] == ':' || l[a] == '/' || l[a] == '.' ) continue ; + if ( l[a] >= '0' && l[a] <= '9' ) continue ; + if ( is_text_char ( l[a] ) ) continue ; + break ; // End of URL + } + return a ; + } + +void WIKI2XML::parse_external_freelink ( string &l , size_t &from ) + { + int a ; + for ( a = from - 1 ; a >= 0 && is_text_char ( l[a] ) ; a-- ) ; + if ( a == -1 ) return ; + a++ ; + string protocol = upper ( l.substr ( a , from - a ) ) ; + if ( debug ) cout << "protocol : " << protocol << endl ; + if ( !is_external_link_protocol ( protocol ) ) return ; + int to = scan_url ( l , a ) ; + string url = l.substr ( a , to - a ) ; + string replacement ; + replacement += xml_embed ( url , "url" ) ; + replacement += xml_embed ( url , "title" ) ; + l = left ( l , a ) + replacement + l.substr ( to , l.length() - to ) ; + from = a + replacement.length() - 1 ; + } + +void WIKI2XML::parse_external_link ( string &l , size_t &from ) + { + string protocol = upper ( before_first ( ':' , l.substr ( from + 1 , l.length() - from ) ) ) ; + if ( !is_external_link_protocol ( protocol ) ) return ; + size_t to ; + for ( to = from + 1 ; to < l.length() && l[to] != ']' ; to++ ) ; + if ( to == l.length() ) return ; + string url = l.substr ( from + 1 , to - from - 1 ) ; + string title = after_first ( ' ' , url ) ; + url = before_first ( ' ' , url ) ; + string replacement ; + replacement += xml_embed ( url , "url" ) ; + if ( title == "" ) + replacement += xml_embed ( "" , "title" ) ; + else replacement += xml_embed ( title , "title" ) ; + replacement = xml_embed ( replacement , "wikilink" , "type='external' protocol='" + protocol + "'" ) ; + l = left ( l , from ) + replacement + l.substr ( to + 1 , l.length() - to ) ; + from = from + replacement.length() - 1 ; + } + +void WIKI2XML::parse_line_sub ( string &l ) + { + size_t a ; + for ( a = 0 ; a < l.length() ; a++ ) + { + if ( l[a] == '[' && a+1 < l.length() && l[a+1] == '[' ) // [[Link]] + parse_link ( l , a , 'L' ) ; + else if ( l[a] == '{' && a+1 < l.length() && l[a+1] == '{' ) // {{Template}} + parse_link ( l , a , 'T' ) ; + else if ( l[a] == '[' ) // External link + parse_external_link ( l , a ) ; + else if ( a+2 < l.length() && l[a] == ':' && l[a+1] == '/' && l[a+2] == '/' ) // External freelink + parse_external_freelink ( l , a ) ; + else if ( l[a] == SINGLE_QUOTE ) // Bold and italics + { + parse_symmetric ( l , a , "'''" , "'''" , "" , "" , true ) ; + parse_symmetric ( l , a , "''" , "''" , "" , "" ) ; + } + } + } + +void WIKI2XML::parse_lines ( vector &lines ) + { + size_t a ; + for ( a = 0 ; a < lines.size() ; a++ ) + { + parse_line ( lines[a] ) ; + } + + string end ; + + // Cleanup lists + end = fix_list ( end ) ; + if ( end != "" ) lines.push_back ( end ) ; + + // Cleanup tables + end = "" ; + while ( tables.size() ) + { + end += tables[tables.size()-1].close () ; + tables.pop_back () ; + } + if ( end != "" ) lines.push_back ( end ) ; + } + +void WIKI2XML::init ( string s ) + { + list = "" ; + lines.clear () ; + + // Now we remove evil HTML + allowed_html.clear () ; + allowed_html.push_back ( "b" ) ; + allowed_html.push_back ( "i" ) ; + allowed_html.push_back ( "p" ) ; + allowed_html.push_back ( "b" ) ; + allowed_html.push_back ( "br" ) ; + allowed_html.push_back ( "hr" ) ; + allowed_html.push_back ( "tt" ) ; + allowed_html.push_back ( "pre" ) ; + allowed_html.push_back ( "nowiki" ) ; + allowed_html.push_back ( "math" ) ; + allowed_html.push_back ( "strike" ) ; + allowed_html.push_back ( "u" ) ; + allowed_html.push_back ( "table" ) ; + allowed_html.push_back ( "caption" ) ; + allowed_html.push_back ( "tr" ) ; + allowed_html.push_back ( "td" ) ; + allowed_html.push_back ( "th" ) ; + allowed_html.push_back ( "li" ) ; + allowed_html.push_back ( "ul" ) ; + allowed_html.push_back ( "ol" ) ; + allowed_html.push_back ( "dl" ) ; + allowed_html.push_back ( "dd" ) ; + allowed_html.push_back ( "dt" ) ; + allowed_html.push_back ( "div" ) ; + allowed_html.push_back ( "h1" ) ; + allowed_html.push_back ( "h2" ) ; + allowed_html.push_back ( "h3" ) ; + allowed_html.push_back ( "h4" ) ; + allowed_html.push_back ( "h5" ) ; + allowed_html.push_back ( "h6" ) ; + allowed_html.push_back ( "h7" ) ; + allowed_html.push_back ( "h8" ) ; + allowed_html.push_back ( "h9" ) ; + allowed_html.push_back ( "small" ) ; + allowed_html.push_back ( "center" ) ; +// allowed_html.push_back ( "" ) ; + size_t a ; + for ( a = 0 ; a < allowed_html.size() ; a++ ) + allowed_html[a] = upper ( allowed_html[a] ) ; + + vector taglist ; + make_tag_list ( s , taglist ) ; + remove_evil_html ( s , taglist ) ; + + // Now evaluate each line + explode ( '\n' , s , lines ) ; + } + +string WIKI2XML::get_xml () + { + string ret = ""; + ret += implode ( "\n" , lines ); + ret += ""; + + // Invalidating mdash + /*size_t a = ret.find ( "—" ) ; + while ( a >= 0 && a < ret.length() ) + { + ret[a] = '!' ; + a = ret.find ( "—" , a ) ; + }*/ + + return ret ; + } + +void WIKI2XML::replace_part ( string &s , size_t from , size_t to , string with ) + { + s = s.substr ( 0 , from ) + with + s.substr ( to + 1 , s.length() - to - 1 ) ; + } + +void WIKI2XML::replace_part_sync ( string &s , size_t from , size_t to , string with , vector &list ) + { + size_t a , b ; + replace_part ( s , from , to , with ) ; + for ( a = 0 ; a < list.size() ; a++ ) + { + for ( b = 0 ; b < with.length() ; b++ ) list[a].insert_at ( from ) ; + for ( b = from ; b <= to ; b++ ) list[a].remove_at ( from ) ; + } + } + +// ATTENTION : this doesn't handle all HTML comments correctly! +void WIKI2XML::make_tag_list ( string &s , vector &list ) + { + list.clear () ; + size_t a; + int b; + for ( a = 0 ; a < s.length() ; a++ ) + { + if ( s[a] == '>' ) // Rouge > + { + s[a] = ';' ; + s.insert ( a , ">" ) ; + continue ; + } + else if ( s[a] != '<' ) continue ; + b = find_next_unquoted ( '>' , s , a ) ; + if ( b == -1 ) // Rouge < + { + s[a] = ';' ; + s.insert ( a , "<" ) ; + continue ; + } + list.push_back ( TXML ( a , b , s ) ) ; + a = list[list.size()-1].to ; + } + } + +void WIKI2XML::remove_evil_html ( string &s , vector &taglist ) + { + size_t a , b ; + for ( a = 0 ; a < taglist.size() ; a++ ) + { + string tag = upper ( taglist[a].name ) ; + for ( b = 0 ; b < allowed_html.size() && tag != allowed_html[b] ; b++ ) ; + if ( b < allowed_html.size() ) continue ; + replace_part_sync ( s , taglist[a].from , taglist[a].from , "<" , taglist ) ; + replace_part_sync ( s , taglist[a].to , taglist[a].to , ">" , taglist ) ; + } + } + +string WIKI2XML::table_markup ( string &l ) + { + size_t a ; + string ret ; + if ( left ( l , 2 ) == "{|" ) // Open table + { + ret = "" ; + ret += xml_embed ( l.substr ( 2 , l.length() - 2 ) , "wikiparameter" ) ; + tables.push_back ( TTableInfo () ) ; + } + else if ( left ( l , 2 ) == "|}" ) + { + ret = tables[tables.size()-1].close () ; + tables.pop_back () ; + } + else if ( left ( l , 2 ) == "|-" ) + { + ret = tables[tables.size()-1].new_row () ; + for ( a = 1 ; a < l.length() && l[a] == '-' ; a++ ) ; + ret += xml_params ( l.substr ( a , l.length() - a ) ) ; + } + else + { + string init ; + if ( left ( l , 2 ) == "|+" ) + { + init = "caption" ; + l = l.substr ( 2 , l.length() - 2 ) ; + } + else if ( l[0] == '!' ) + { + init = "header" ; + l = l.substr ( 1 , l.length() - 1 ) ; + } + else if ( l[0] == '|' ) + { + init = "cell" ; + l = l.substr ( 1 , l.length() - 1 ) ; + } + vector sublines ; + for ( a = 0 ; a + 1 < l.length() ; a++ ) + { + if ( l[a] == '|' && l[a+1] == '|' ) + { + sublines.push_back ( left ( l , a ) ) ; + l = l.substr ( a + 2 , l.length() - a ) ; + a = (size_t)(-1) ; + } + } + if ( l != "" ) sublines.push_back ( l ) ; + for ( a = 0 ; a < sublines.size() ; a++ ) + { + l = sublines[a] ; + parse_line_sub ( l ) ; + string params ; + int b = find_next_unquoted ( '|' , l ) ; + if ( b != -1 ) + { + params = left ( l , b ) ; + l = l.substr ( b + 1 , l.length() - b ) ; + } + if ( params != "" ) l = xml_params ( params ) + l ; + ret += tables[tables.size()-1].new_cell ( init ) ; + ret += l ; + } + } + return ret ; + } diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.h b/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.h new file mode 100644 index 0000000..e86f35b --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.h @@ -0,0 +1,58 @@ +#ifndef _WIKI2XML_H_ +#define _WIKI2XML_H_ + +#include "global.h" +#include "TXML.h" + +class TTableInfo + { + public : + TTableInfo () ; + virtual ~TTableInfo () {}; + virtual string new_cell ( string type ) ; + virtual string new_row () ; + virtual string close () ; + bool tr_open , td_open ; + string td_type ; + } ; + +class WIKI2XML + { + public : + WIKI2XML () {} ; + virtual ~WIKI2XML () {}; + WIKI2XML ( string &s ) { init ( s ) ; } + WIKI2XML ( vector &l ) { init ( l ) ; } + virtual void init ( string s ) ; + virtual void init ( vector &l ) { init ( implode ( "\n" , l ) ) ; } + virtual void parse () { parse_lines ( lines ) ; } + virtual string get_xml () ; + + private : + virtual void make_tag_list ( string &s , vector &list ) ; + virtual void parse_symmetric ( string &l , size_t &from , + string s1 , string s2 , + string r1 , string r2 , bool extend = false ) ; + virtual void parse_link ( string &l , size_t &from , char mode = 'L' ) ; + virtual void parse_line_sub ( string &l ) ; + virtual void parse_line ( string &l ) ; + virtual void parse_lines ( vector &lines ) ; + virtual string fix_list ( string &l ) ; + virtual string get_list_tag ( chart c , bool open ) ; + virtual bool is_list_char ( chart c ) ; + virtual void remove_evil_html ( string &s , vector &taglist ) ; + virtual void replace_part ( string &s , size_t from , size_t to , string with ) ; + virtual void replace_part_sync ( string &s , size_t from , size_t to , string with , vector &list ) ; + virtual void parse_external_freelink ( string &l , size_t &from ) ; + virtual void parse_external_link ( string &l , size_t &from ) ; + virtual bool is_external_link_protocol ( string protocol ) ; + virtual int scan_url ( string &l , size_t from ) ; + virtual string table_markup ( string &l ) ; + + // Variables + vector lines , allowed_html ; + vector tables ; + string list ; + } ; + +#endif diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/global.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/global.cpp new file mode 100644 index 0000000..f2afd9d --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/global.cpp @@ -0,0 +1,227 @@ +#include "global.h" + +// ***************************************************************************** +// ***************************************************************************** +// +// global string functions +// +// ***************************************************************************** +// ***************************************************************************** + +// The following functions should be language specific +bool is_text_char ( chart ch ) + { + if ( ch >= 'a' && ch <= 'z' ) return true ; + if ( ch >= 'A' && ch <= 'Z' ) return true ; + return false ; + } + + +// These are not : + +string left ( string &s , size_t num ) + { + if ( num <= 0 ) return "" ; + if ( num >= s.length() ) return s ; + return s.substr ( 0 , num ) ; + } + +string right ( string &s , int num ) + { + if ( num <= 0 ) return "" ; + int from = s.length() - num ; + string ret ; + if ( from <= 0 ) ret = s ; + else ret = s.substr ( from , s.length() ) ; + return ret ; + } + +string upper ( string s ) // For internal purposes, will do... + { + size_t a ; + for ( a = 0 ; a < s.length() ; a++ ) + { + if ( s[a] >= 'a' && s[a] <= 'z' ) s[a] = s[a] - 'a' + 'A' ; + } + return s ; + } + +void explode ( chart ch , string &l , vector &parts ) + { + parts.clear () ; + size_t a , b ; + for ( a = b = 0 ; a < l.length() ; a++ ) + { + if ( l[a] == ch ) + { + parts.push_back ( l.substr ( b , a - b ) ) ; + b = a+1 ; + } + } + parts.push_back ( l.substr ( b , a - b ) ) ; + + if ( debug ) cout << "Explode : " << l << endl ; + for ( a = 0 ; a < parts.size() ; a++ ) + if ( debug ) cout << a << " " << parts[a] << endl ; + if ( debug ) cout << endl ; + } + +string implode ( string mid , vector &parts ) + { + if ( parts.size() == 0 ) return "" ; + if ( parts.size() == 1 ) return parts[0] ; + string ret = parts[0] ; + for ( size_t a = 1 ; a < parts.size() ; a++ ) + ret += mid + parts[a] ; + return ret ; + } + +string unquote ( chart quote , string &s ) + { + size_t a ; + for ( a = 0 ; a < s.length() ; a++ ) + { + if ( s[a] == quote && ( a == 0 || ( a > 0 && s[a-1] != '\\' ) ) ) + { + s.insert ( a , "\\" ) ; + a++ ; + } + } + return s ; + } + +bool submatch ( string &main , string &sub , int from ) + { + if ( from + sub.length() > main.length() ) return false ; + size_t a ; + for ( a = 0 ; a < sub.length() ; a++ ) + { + if ( sub[a] != main[a+from] ) return false ; + } + return true ; + } + +int find_first ( chart c , string &s ) + { + size_t a ; + for ( a = 0 ; a < s.length() && s[a] != c ; a++ ) ; + if ( a == s.length() ) return -1 ; + return a ; + } + +int find_last ( chart c , string &s ) + { + size_t a; + int b = -1 ; + for ( a = 0 ; a < s.length() ; a++ ) + { + if ( s[a] == c ) b = a ; + } + return b ; + } + +string before_first ( chart c , string s ) + { + int pos = find_first ( c , s ) ; + if ( pos == -1 ) return s ; + return s.substr ( 0 , pos ) ; + } + +string before_last ( chart c , string s ) + { + int pos = find_last ( c , s ) ; + if ( pos == -1 ) return "" ; + return s.substr ( 0 , pos ) ; + } + +string after_first ( chart c , string s ) + { + int pos = find_first ( c , s ) ; + if ( pos == -1 ) return "" ; + return s.substr ( pos+1 , s.length() ) ; + } + +string after_last ( chart c , string s ) + { + int pos = find_last ( c , s ) ; + if ( pos == -1 ) return s ; + return s.substr ( pos+1 , s.length() ) ; + } + +string trim ( string &s ) + { + if ( s.length() == 0 ) return s ; + if ( s[0] != ' ' && s[s.length()-1] != ' ' ) return s ; + size_t a; + int b ; + for ( a = 0 ; a < s.length() && s[a] == ' ' ; a++ ) ; + for ( b = s.length()-1 ; b >= 0 && s[b] == ' ' ; b-- ) ; + return s.substr ( a , b - a + 1 ) ; + } + +int find_next_unquoted ( chart c , string &s , int start ) + { + size_t a ; + chart lastquote = ' ' ; + for ( a = start ; a < s.length() ; a++ ) + { + if ( s[a] == c && lastquote == ' ' ) return a ; // Success! + if ( s[a] != SINGLE_QUOTE && s[a] != DOUBLE_QUOTE ) continue ; // No quotes, next + if ( a > 0 && s[a-1] == '\\' ) continue ; // Ignore \' and \" + if ( lastquote == ' ' ) lastquote = s[a] ; // Remember opening quote, text now quoted + else if ( lastquote == s[a] ) lastquote = ' ' ; // Close quote, not quoted anymore + } + return -1 ; + } + +string val ( int a ) + { + char t[20] ; + sprintf ( t , "%d" , a ) ; + return string ( t ) ; + } + +string xml_embed ( string inside , string tag , string param ) + { + string ret ; + ret = "<" + tag ; + if ( param != "" ) ret += " " + param ; + if ( inside == "" ) return ret + "/>" ; + return ret + ">" + trim ( inside ) + "" ; + } + +string xml_params ( string l ) // Yes, this function is thin... + { + string ret ; + vector params ; + while ( l != "" ) + { + int p = find_next_unquoted ( ' ' , l ) ; + string first ; + if ( p == -1 ) + { + first = l ; + l = "" ; + } + else + { + first = left ( l , p ) ; + l = l.substr ( p , l.length() - p ) ; + } + first = trim ( first ) ; + l = trim ( l ) ; + if ( first == "" ) continue ; + + p = find_next_unquoted ( '=' , first ) ; + if ( p == -1 ) first = xml_embed ( first , "value" ) ; + else + { + first = xml_embed ( left ( first , p ) , "key" ) + + xml_embed ( first.substr ( p + 1 , first.length() - p ) , "value" ) ; + } + first = xml_embed ( first , "wikiparameter" ) ; + ret += first ; + } + return ret ; + } + diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/global.h b/stardict-plugins/stardict-wiki-parsedata-plugin/global.h new file mode 100644 index 0000000..c9c053d --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/global.h @@ -0,0 +1,38 @@ +#ifndef _GLOBAL_FUNCTIONS_H_ +#define _GLOBAL_FUNCTIONS_H_ + +#define debug 0 + +#include +#include +#include +#include +#include + +using namespace std; + +#define SINGLE_QUOTE 39 +#define DOUBLE_QUOTE '"' + +typedef string::value_type chart ; // Char type + +string right ( string &s , int num ) ; +string left ( string &s , size_t num ) ; +string upper ( string s ) ; +bool is_text_char ( chart ch ) ; +void explode ( chart ch , string &l , vector &parts ) ; +string implode ( string mid , vector &parts ) ; +string unquote ( chart quote , string &s ) ; +bool submatch ( string &main , string &sub , int from ) ; +string before_first ( chart c , string s ) ; +string before_last ( chart c , string s ) ; +string after_first ( chart c , string s ) ; +string after_last ( chart c , string s ) ; +string trim ( string &s ) ; +string val ( int a ) ; +int find_next_unquoted ( chart c , string &s , int start = 0 ) ; +string xml_embed ( string inside , string tag , string param = "" ) ; +string xml_params ( string l ) ; + +#endif + diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.cpp new file mode 100644 index 0000000..0cfade9 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.cpp @@ -0,0 +1,56 @@ +#include "stardict_wiki2xml.h" +#include "WIKI2XML.h" +#include + +std::string wiki2xml(std::string &str) +{ + WIKI2XML w2x(str); + w2x.parse () ; + return w2x.get_xml (); +} + +struct WikiXmlParseUserData { + std::string *res; +}; + +static void wikixml_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error) +{ + WikiXmlParseUserData *Data = (WikiXmlParseUserData *)user_data; + if (strcmp(element_name, "wikilink")==0) { + Data->res->append(""); + } +} + +static void wikixml_parse_end_element(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error) +{ + WikiXmlParseUserData *Data = (WikiXmlParseUserData *)user_data; + if (strcmp(element_name, "wikilink")==0) { + Data->res->append(""); + } +} + +static void wikixml_parse_text(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error) +{ + WikiXmlParseUserData *Data = (WikiXmlParseUserData *)user_data; + char *estr = g_markup_escape_text(text, text_len); + Data->res->append(estr); + g_free(estr); +} + +std::string wikixml2pango(std::string &str) +{ + std::string res; + WikiXmlParseUserData Data; + Data.res = &res; + GMarkupParser parser; + parser.start_element = wikixml_parse_start_element; + parser.end_element = wikixml_parse_end_element; + parser.text = wikixml_parse_text; + parser.passthrough = NULL; + parser.error = NULL; + GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL); + g_markup_parse_context_parse(context, str.c_str(), str.length(), NULL); + g_markup_parse_context_end_parse(context, NULL); + g_markup_parse_context_free(context); + return res; +} diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.h b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.h new file mode 100644 index 0000000..b3133c0 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.h @@ -0,0 +1,9 @@ +#ifndef _STARDICT_WIKI2XML_H_ +#define _STARDICT_WIKI2XML_H_ + +#include + +extern std::string wiki2xml(std::string &str); +extern std::string wikixml2pango(std::string &str); + +#endif diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.cpp new file mode 100644 index 0000000..fdf5270 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.cpp @@ -0,0 +1,78 @@ +#include "stardict_wiki_parsedata.h" +#include "stardict_wiki2xml.h" +#include + +#ifdef _WIN32 +#include +#endif + +static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword) +{ + if (*p != 'w') + return false; + p++; + size_t len = strlen(p); + if (len) { + ParseResultItem item; + item.type = ParseResultItemType_mark; + item.mark = new ParseResultMarkItem; + std::string res(p, len); + std::string xml = wiki2xml(res); + item.mark->pango = wikixml2pango(xml); + result.item_list.push_back(item); + } + *parsed_size = 1 + len + 1; + return true; +} + +static void configure() +{ +} + +DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj) +{ + if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) { + g_print("Error: Wiki data parsing plugin version doesn't match!\n"); + return true; + } + obj->type = StarDictPlugInType_PARSEDATA; + obj->info_xml = g_strdup_printf("%s1.0%s%sHu Zheng <huzheng_001@163.com>http://stardict.sourceforge.net", _("Wiki data parsing"), _("Wiki data parsing engine."), _("Parse the wiki data.")); + obj->configure_func = configure; + return false; +} + +DLLIMPORT void stardict_plugin_exit(void) +{ +} + +DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj) +{ + obj->parse_func = parse; + g_print(_("Wiki data parsing plug-in loaded.\n")); + return false; +} + +#ifdef _WIN32 +BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ , + DWORD reason /* Reason this function is being called. */ , + LPVOID reserved /* Not used. */ ) +{ + switch (reason) + { + case DLL_PROCESS_ATTACH: + break; + + case DLL_PROCESS_DETACH: + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + } + + /* Returns TRUE on success, FALSE on failure */ + return TRUE; +} +#endif diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.dev b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.dev new file mode 100644 index 0000000..1213950 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.dev @@ -0,0 +1,149 @@ +[Project] +FileName=stardict_wiki_parsedata.dev +Name=stardict_wiki_parsedata +UnitCount=10 +Type=3 +Ver=1 +ObjFiles= +Includes= +Libs= +PrivateResource= +ResourceIncludes= +MakeIncludes= +Compiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +CppCompiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_ +IsCpp=1 +Icon= +ExeOutput= +ObjectOutput= +OverrideOutput=0 +OverrideOutputName=stardict_wiki_parsedata.dll +HostApplication= +Folders= +CommandLine= +UseCustomMakefile=0 +CustomMakefile= +IncludeVersionInfo=0 +SupportXPThemes=0 +CompilerSet=0 +CompilerSettings=0000000000000000000100 + +[Unit1] +FileName=WIKI2XML.h +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit2] +FileName=global.cpp +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit3] +FileName=global.h +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit4] +FileName=stardict_wiki2xml.cpp +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit5] +FileName=stardict_wiki2xml.h +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit6] +FileName=stardict_wiki_parsedata.cpp +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit7] +FileName=stardict_wiki_parsedata.h +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit8] +FileName=TXML.cpp +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit9] +FileName=TXML.h +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit10] +FileName=WIKI2XML.cpp +CompileCpp=1 +Folder=stardict_wiki_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[VersionInfo] +Major=0 +Minor=1 +Release=1 +Build=1 +LanguageID=1033 +CharsetID=1252 +CompanyName= +FileVersion= +FileDescription=Developed using the Dev-C++ IDE +InternalName= +LegalCopyright= +LegalTrademarks= +OriginalFilename= +ProductName= +ProductVersion= +AutoIncBuildNr=0 + diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.h b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.h new file mode 100644 index 0000000..7a9faf6 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.h @@ -0,0 +1,23 @@ +#ifndef _STARDICT_WIKI_PARSEDATA_PLUGIN_H_ +#define _STARDICT_WIKI_PARSEDATA_PLUGIN_H_ + +#ifdef _WIN32 +#if BUILDING_DLL +# define DLLIMPORT __declspec (dllexport) +#else /* Not BUILDING_DLL */ +# define DLLIMPORT __declspec (dllimport) +#endif /* Not BUILDING_DLL */ +#else +# define DLLIMPORT +#endif + +#include "../../src/lib/plugin.h" +#include "../../src/lib/parsedata_plugin.h" + +extern "C" { + DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj); + DLLIMPORT extern void stardict_plugin_exit(void); + DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj); +} + +#endif diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.v b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.v new file mode 100644 index 0000000..683abb3 --- /dev/null +++ b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.v @@ -0,0 +1,10 @@ +{ + global: + extern "C" { + stardict_plugin_init; + stardict_plugin_exit; + stardict_parsedata_plugin_init; + }; + local: + *; +}; diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.am new file mode 100644 index 0000000..f8e944b --- /dev/null +++ b/stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.am @@ -0,0 +1,18 @@ +EXTRA_DIST = stardict_xdxf_parsedata.v stardict_xdxf_parsedata.dev + + +LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_xdxf_parsedata.v" + +noinst_HEADERS = stardict_xdxf_parsedata.h + +stardict_xdxf_parsedata_LTLIBRARIES = stardict_xdxf_parsedata.la + +stardict_xdxf_parsedatadir = $(libdir)/mstardict/plugins + +stardict_xdxf_parsedata_la_SOURCES = stardict_xdxf_parsedata.cpp + +stardict_xdxf_parsedata_la_LDFLAGS = -avoid-version \ + -module \ + $(LD_VERSION_SCRIPT_OPTION) + +INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.cpp b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.cpp new file mode 100644 index 0000000..905a01d --- /dev/null +++ b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.cpp @@ -0,0 +1,360 @@ +#include "stardict_xdxf_parsedata.h" +#include + +#ifdef _WIN32 +#include +#endif + +static size_t xml_strlen(const std::string& str) +{ + const char *q; + static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 }; + static const int xml_ent_len[] = { 3, 3, 4, 5, 5 }; + size_t cur_pos; + int i; + + for (cur_pos = 0, q = str.c_str(); *q; ++cur_pos) { + if (*q == '&') { + for (i = 0; xml_entrs[i]; ++i) + if (strncmp(xml_entrs[i], q + 1, + xml_ent_len[i]) == 0) { + q += xml_ent_len[i] + 1; + break; + } + if (xml_entrs[i] == NULL) + ++q; + } else if (*q == '<') { + const char *p = strchr(q+1, '>'); + if (p) + q = p + 1; + else + ++q; + --cur_pos; + } else + q = g_utf8_next_char(q); + } + + return cur_pos; +} + +static void xml_decode(const char *str, std::string& decoded) +{ + static const char raw_entrs[] = { + '<', '>', '&', '\'', '\"', 0 + }; + static const char* xml_entrs[] = { + "lt;", "gt;", "amp;", "apos;", "quot;", 0 + }; + static const int xml_ent_len[] = { + 3, 3, 4, 5, 5 + }; + int ient; + const char *amp = strchr(str, '&'); + + if (amp == NULL) { + decoded = str; + return; + } + decoded.assign(str, amp - str); + + while (*amp) + if (*amp == '&') { + for (ient = 0; xml_entrs[ient] != 0; ++ient) + if (strncmp(amp + 1, xml_entrs[ient], + xml_ent_len[ient]) == 0) { + decoded += raw_entrs[ient]; + amp += xml_ent_len[ient]+1; + break; + } + if (xml_entrs[ient] == 0) // unrecognized sequence + decoded += *amp++; + + } else { + decoded += *amp++; + } +} + +static void xdxf2result(const char *p, ParseResult &result) +{ + LinksPosList links_list; + std::string res; + const char *tag, *next; + std::string name; + std::string::size_type cur_pos; + int i; + + struct ReplaceTag { + const char *match_; + int match_len_; + const char *replace_; + int char_len_; + }; + static const ReplaceTag replace_arr[] = { + { "abr>", 4, "", 0 }, + { "/abr>", 5, "", 0 }, + { "b>", 2, "", 0 }, + { "/b>", 3, "", 0 }, + { "i>", 2, "", 0 }, + { "/i>", 3, "", 0 }, + { "sub>", 4, "", 0 }, + { "/sub>", 5, "", 0}, + { "sup>", 4, "", 0}, + { "/sup>", 5, "", 0}, + { "tt>", 3, "", 0}, + { "/tt>", 4, "", 0}, + { "big>", 4, "", 0}, + { "/big>", 5, "", 0}, + { "small>", 6, "", 0}, + { "/small>", 7, "", 0}, + { "tr>", 3, "[", 1 }, + { "/tr>", 4, "]", 1 }, + { "ex>", 3, "", 0 }, + { "/ex>", 4, "", 0 }, + { "/c>", 3, "", 0 }, + { NULL, 0, NULL }, + }; + + bool is_first_k = true; + for (cur_pos = 0; *p && (tag = strchr(p, '<')) != NULL;) { + //TODO: do not create chunk + std::string chunk(p, tag - p); + res += chunk; + cur_pos += xml_strlen(chunk); + + p = tag; + for (i = 0; replace_arr[i].match_; ++i) + if (strncmp(replace_arr[i].match_, p + 1, + replace_arr[i].match_len_) == 0) { + res += replace_arr[i].replace_; + p += 1 + replace_arr[i].match_len_; + cur_pos += replace_arr[i].char_len_; + goto cycle_end; + } + + if (strncmp("k>", p + 1, 2) == 0) { + next = strstr(p + 3, ""); + if (next) { + if (is_first_k) { + is_first_k = false; + if (*(next + 4) == '\n') + next++; + } else { + res += ""; + std::string chunk(p+3, next-(p+3)); + res += chunk; + size_t xml_len = xml_strlen(chunk); + cur_pos += xml_len; + res += ""; + } + p = next + sizeof("") - 1; + } else + p += sizeof("") - 1; + } else if (*(p + 1) == 'c' && (*(p + 2) == ' ' || *(p + 2) == '>')) { + next = strchr(p, '>'); + if (!next) { + ++p; + continue; + } + name.assign(p + 1, next - p - 1); + std::string::size_type pos = name.find("c=\""); + if (pos != std::string::npos) { + pos += sizeof("c=\"") - 1; + std::string::size_type end_pos = name.find("\"", pos); + if (end_pos == std::string::npos) + end_pos = name.length(); + + std::string color(name, pos, end_pos - pos); + if (pango_color_parse(NULL, color.c_str())) + res += ""; + else + res += ""; + } else + res += ""; + p = next + 1; + } else if (*(p + 1) == 'r' && *(p + 2) == 'r' && *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) { + next = strchr(p, '>'); + if (!next) { + ++p; + continue; + } + name.assign(p + 1, next - p - 1); + std::string type; + std::string::size_type pos = name.find("type=\""); + if (pos != std::string::npos) { + pos += sizeof("type=\"") - 1; + std::string::size_type end_pos = name.find("\"", pos); + if (end_pos == std::string::npos) + end_pos = name.length(); + type.assign(name, pos, end_pos - pos); + } + p = next + 1; + next = strstr(p, ""); + if (!next) + continue; + std::string chunk(p, next - p); + p = next + sizeof("") - 1; + if (type.empty()) { + if (g_str_has_suffix(chunk.c_str(), ".jpg") || g_str_has_suffix(chunk.c_str(), ".png")) { + type = "image"; + } else if (g_str_has_suffix(chunk.c_str(), ".wav") || g_str_has_suffix(chunk.c_str(), ".mp3") || g_str_has_suffix(chunk.c_str(), ".ogg")) { + type = "sound"; + } else if (g_str_has_suffix(chunk.c_str(), ".avi") || g_str_has_suffix(chunk.c_str(), ".mpeg")) { + type = "video"; + } else { + type = "attach"; + } + } + ParseResultItem item; + item.type = ParseResultItemType_link; + item.link = new ParseResultLinkItem; + item.link->pango = res; + item.link->links_list = links_list; + result.item_list.push_back(item); + res.clear(); + cur_pos = 0; + links_list.clear(); + item.type = ParseResultItemType_res; + item.res = new ParseResultResItem; + item.res->type = type; + item.res->key = chunk; + result.item_list.push_back(item); + } else if ((*(p + 1) == 'k' || *(p + 1) == 'i') && *(p + 2) == 'r' && *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) { + bool is_k_or_i = (*(p + 1) == 'k'); + next = strchr(p, '>'); + if (!next) { + ++p; + continue; + } + name.assign(p + 1, next - p - 1); + std::string key; + std::string::size_type pos; + if (is_k_or_i) + pos = name.find("k=\""); + else + pos = name.find("href=\""); + if (pos != std::string::npos) { + if (is_k_or_i) + pos += sizeof("k=\"") - 1; + else + pos += sizeof("href=\"") - 1; + std::string::size_type end_pos = name.find("\"", pos); + if (end_pos == std::string::npos) + end_pos = name.length(); + key.assign(name, pos, end_pos - pos); + } + + p = next + 1; + if (is_k_or_i) + next = strstr(p, ""); + else + next = strstr(p, ""); + if (!next) + continue; + + res += ""; + std::string::size_type link_len = next - p; + std::string chunk(p, link_len); + size_t xml_len = xml_strlen(chunk); + std::string xml_enc; + if (key.empty()) + xml_decode(chunk.c_str(), xml_enc); + else + xml_decode(key.c_str(), xml_enc); + std::string link; + if (is_k_or_i) + link = "query://"; + link += xml_enc; + links_list.push_back(LinkDesc(cur_pos, xml_len, link)); + res += chunk; + cur_pos += xml_len; + res += ""; + if (is_k_or_i) + p = next + sizeof("") - 1; + else + p = next + sizeof("") - 1; + } else { + next = strchr(p+1, '>'); + if (!next) { + p++; + res += "<"; + cur_pos++; + continue; + } + p = next + 1; + } +cycle_end: + ; + } + res += p; + ParseResultItem item; + item.type = ParseResultItemType_link; + item.link = new ParseResultLinkItem; + item.link->pango = res; + item.link->links_list = links_list; + result.item_list.push_back(item); +} + +static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword) +{ + if (*p != 'x') + return false; + p++; + size_t len = strlen(p); + if (len) { + xdxf2result(p, result); + } + *parsed_size = 1 + len + 1; + return true; +} + +static void configure() +{ +} + +DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj) +{ + if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) { + g_print("Error: XDXF data parsing plugin version doesn't match!\n"); + return true; + } + obj->type = StarDictPlugInType_PARSEDATA; + obj->info_xml = g_strdup_printf("%s1.0%s%sHu Zheng <huzheng_001@163.com>http://stardict.sourceforge.net", _("XDXF data parsing"), _("XDXF data parsing engine."), _("Parse the XDXF data.")); + obj->configure_func = configure; + return false; +} + +DLLIMPORT void stardict_plugin_exit(void) +{ +} + +DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj) +{ + obj->parse_func = parse; + g_print(_("XDXF data parsing plug-in loaded.\n")); + return false; +} + +#ifdef _WIN32 +BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ , + DWORD reason /* Reason this function is being called. */ , + LPVOID reserved /* Not used. */ ) +{ + switch (reason) + { + case DLL_PROCESS_ATTACH: + break; + + case DLL_PROCESS_DETACH: + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + } + + /* Returns TRUE on success, FALSE on failure */ + return TRUE; +} +#endif diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.dev b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.dev new file mode 100644 index 0000000..c54bcfe --- /dev/null +++ b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.dev @@ -0,0 +1,69 @@ +[Project] +FileName=stardict_xdxf_parsedata.dev +Name=stardict_xdxf_parsedata +UnitCount=2 +Type=3 +Ver=1 +ObjFiles= +Includes= +Libs= +PrivateResource= +ResourceIncludes= +MakeIncludes= +Compiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +CppCompiler=-DBUILDING_DLL=1 -I"\cairo" -I"\gtk-2.0" -I"\gtkdeps-2.0" -I"\gtk-2.0\include" -I"\atk-1.0" -I"\pango-1.0" -I"\glib-2.0" -I"\glib-2.0" -I"\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_ +Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_ +IsCpp=1 +Icon= +ExeOutput= +ObjectOutput= +OverrideOutput=0 +OverrideOutputName=stardict_xdxf_parsedata.dll +HostApplication= +Folders= +CommandLine= +UseCustomMakefile=0 +CustomMakefile= +IncludeVersionInfo=0 +SupportXPThemes=0 +CompilerSet=0 +CompilerSettings=0000000000000000000100 + +[Unit1] +FileName=stardict_xdxf_parsedata.cpp +CompileCpp=1 +Folder=stardict_xdxf_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[Unit2] +FileName=stardict_xdxf_parsedata.h +CompileCpp=1 +Folder=stardict_xdxf_parsedata +Compile=1 +Link=1 +Priority=1000 +OverrideBuildCmd=0 +BuildCmd= + +[VersionInfo] +Major=0 +Minor=1 +Release=1 +Build=1 +LanguageID=1033 +CharsetID=1252 +CompanyName= +FileVersion= +FileDescription=Developed using the Dev-C++ IDE +InternalName= +LegalCopyright= +LegalTrademarks= +OriginalFilename= +ProductName= +ProductVersion= +AutoIncBuildNr=0 + diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.h b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.h new file mode 100644 index 0000000..6f837e6 --- /dev/null +++ b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.h @@ -0,0 +1,23 @@ +#ifndef _STARDICT_XDXF_PARSEDATA_PLUGIN_H_ +#define _STARDICT_XDXF_PARSEDATA_PLUGIN_H_ + +#ifdef _WIN32 +#if BUILDING_DLL +# define DLLIMPORT __declspec (dllexport) +#else /* Not BUILDING_DLL */ +# define DLLIMPORT __declspec (dllimport) +#endif /* Not BUILDING_DLL */ +#else +# define DLLIMPORT +#endif + +#include "../../src/lib/plugin.h" +#include "../../src/lib/parsedata_plugin.h" + +extern "C" { + DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj); + DLLIMPORT extern void stardict_plugin_exit(void); + DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj); +} + +#endif diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.v b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.v new file mode 100644 index 0000000..683abb3 --- /dev/null +++ b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.v @@ -0,0 +1,10 @@ +{ + global: + extern "C" { + stardict_plugin_init; + stardict_plugin_exit; + stardict_parsedata_plugin_init; + }; + local: + *; +};