Imported version 0.4-1 v0.4-1
authorRoman Moravcik <roman.moravcik@gmail.com>
Thu, 18 Feb 2010 16:40:38 +0000 (17:40 +0100)
committerRoman Moravcik <roman.moravcik@gmail.com>
Thu, 18 Feb 2010 16:40:38 +0000 (17:40 +0100)
37 files changed:
Makefile.am
configure.ac
debian/changelog
debian/rules
src/dictmngr.cpp
src/libwrapper.cpp
src/mstardict.cpp
src/mstardict.hpp
stardict-plugins/Makefile.am [new file with mode: 0644]
stardict-plugins/stardict-html-parsedata-plugin/Makefile.am [new file with mode: 0644]
stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.cpp [new file with mode: 0644]
stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.dev [new file with mode: 0644]
stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.h [new file with mode: 0644]
stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.v [new file with mode: 0644]
stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.am [new file with mode: 0644]
stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.cpp [new file with mode: 0644]
stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.dev [new file with mode: 0644]
stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.h [new file with mode: 0644]
stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.v [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.am [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/TXML.cpp [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/TXML.h [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.cpp [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.h [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/global.cpp [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/global.h [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.cpp [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.h [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.cpp [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.dev [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.h [new file with mode: 0644]
stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.v [new file with mode: 0644]
stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.am [new file with mode: 0644]
stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.cpp [new file with mode: 0644]
stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.dev [new file with mode: 0644]
stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.h [new file with mode: 0644]
stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.v [new file with mode: 0644]

index d582cb5..136767b 100644 (file)
@@ -1,4 +1,4 @@
-SUBDIRS = data m4 src po
+SUBDIRS = data m4 po src stardict-plugins
 
 EXTRA_DIST = config.rpath mkinstalldirs BUGS config.rpath 
 
index 4a5f8e4..9a6910c 100644 (file)
@@ -1,7 +1,7 @@
 dnl Process this file with autoconf to produce a configure script.
 AC_PREREQ(2.52)
 
-AC_INIT(mstardict, 0.3, roman.moravcik@gmail.com)
+AC_INIT(mstardict, 0.4, roman.moravcik@gmail.com)
 AC_CONFIG_SRCDIR(src/mstardict.cpp)
 
 dnl Don't include maintainer make-rules by default
@@ -17,6 +17,7 @@ AC_LANG([C++])
 AC_PROG_CXXCPP
 AC_PROG_MAKE_SET
 AC_PROG_RANLIB
+AM_PROG_LIBTOOL
 
 dnl ================================================================
 dnl Gettext stuff.
@@ -49,9 +50,14 @@ AC_OUTPUT([
  data/Makefile
  data/mstardict.desktop.in
  m4/Makefile
- src/lib/Makefile
- src/Makefile
  po/Makefile.in
+ src/Makefile
+ src/lib/Makefile
+ stardict-plugins/Makefile
+ stardict-plugins/stardict-html-parsedata-plugin/Makefile
+ stardict-plugins/stardict-powerword-parsedata-plugin/Makefile
+ stardict-plugins/stardict-wiki-parsedata-plugin/Makefile
+ stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile
 ])
 
 echo "
index 00620c4..4df50fd 100644 (file)
@@ -1,3 +1,11 @@
+mstardict (0.4-1) unstable; urgency=low
+
+  * Fixed searching of synonyms.
+  * Added preliminary support of Stardict plugins.
+  * Added HTML, PowerWord, Wiki and XDXF parse plugins.
+
+ -- Roman Moravcik <roman.moravcik@gmail.com>  Thu,  4 Feb 2010 18:01:14 +0100
+
 mstardict (0.3-2) unstable; urgency=low
 
   * Splited code of dictionary management.
index 2deb34f..e530b11 100755 (executable)
@@ -24,7 +24,7 @@ configure-stamp:
        dh_testdir
 
        ./autogen.sh
-       ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --disable-static CFLAGS="$(CFLAGS)" LDFLAGS="-Wl,-z,defs"
+       ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --disable-static
 
        touch $@
 
@@ -44,7 +44,7 @@ clean:
        rm -f build-stamp configure-stamp
 
        [ ! -f Makefile ] || $(MAKE) distclean
-       rm -f Makefile.in aclocal.m4 compile config.guess config.h.in config.rpath config.sub configure data/Makefile.in depcomp install-sh intltool-extract.in intltool-merge.in intltool-update.in ltmain.sh m4/Makefile.in missing mkinstalldirs po/Makefile.in.in src/Makefile.in src/lib/Makefile.in
+       rm -f Makefile.in aclocal.m4 compile config.guess config.h.in config.rpath config.sub configure data/Makefile.in depcomp install-sh intltool-extract.in intltool-merge.in intltool-update.in ltmain.sh m4/Makefile.in missing mkinstalldirs po/Makefile.in.in src/Makefile.in src/lib/Makefile.in stardict-plugins/Makefile.in stardict-plugins/stardict-html-parsedata-plugin/Makefile.in stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.in stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.in stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.in
 
        dh_clean 
 
index 6c346d5..2dd7350 100644 (file)
@@ -105,7 +105,12 @@ DictMngr::CreateDictMngrDialog()
     gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), selector);
 
     renderer = gtk_cell_renderer_text_new();
-    g_object_set(G_OBJECT(renderer), "xpad", 10, NULL);
+    g_object_set(G_OBJECT(renderer),
+                "xpad", 10,
+                "ellipsize", PANGO_ELLIPSIZE_END,
+                "ellipsize-set", TRUE,
+                NULL);
+
     column =
        hildon_touch_selector_append_column(HILDON_TOUCH_SELECTOR
                                            (selector),
index a10c287..0cc7001 100644 (file)
@@ -107,27 +107,82 @@ std::string xdxf2text(const char *p)
 }
 
 static
-string parse_data(const gchar *data)
+string parse_data(const gchar *data,
+                 const gchar *oword)
 {
     if (!data)
        return "";
 
-    string res;
+    string mark;
     guint32 data_size, sec_size = 0;
     gchar *m_str;
     const gchar *p = data;
     data_size = *((guint32 *) p);
     p += sizeof(guint32);
+    size_t iPlugin;
+    size_t nPlugins = pMStarDict->oStarDictPlugins->ParseDataPlugins.nplugins();
+    unsigned int parsed_size;
+    ParseResult parse_result;
+
     while (guint32(p - data) < data_size) {
+       for (iPlugin = 0; iPlugin < nPlugins; iPlugin++) {
+           parse_result.clear();
+           if (pMStarDict->oStarDictPlugins->ParseDataPlugins.parse(iPlugin, p, &parsed_size, parse_result, oword)) {
+               p += parsed_size;
+               break;
+           }
+       }
+       if (iPlugin != nPlugins) {
+           for (std::list<ParseResultItem>::iterator it = parse_result.item_list.begin(); it != parse_result.item_list.end(); ++it) {
+               switch (it->type) {
+                   case ParseResultItemType_mark:
+                       g_debug("ParseResultItemType_mark");
+                       mark += it->mark->pango;
+                       break;
+                   case ParseResultItemType_link:
+//                     g_debug("ParseResultItemType_link: %s", it->mark->pango.c_str());
+                       mark += it->mark->pango;
+                       break;
+                   case ParseResultItemType_res:
+                   {
+                       g_debug("ParseResultItemType_res");
+                       bool loaded = false;
+                       if (it->res->type == "image") {
+                       } else if (it->res->type == "sound") {
+                       } else if (it->res->type == "video") {
+                       } else {
+                       }
+                       if (!loaded) {
+                           mark += "<span foreground=\"red\">";
+                           gchar *m_str = g_markup_escape_text(it->res->key.c_str(), -1);
+                           mark += m_str;
+                           g_free(m_str);
+                           mark += "</span>";
+                       }
+                       break;
+                   }
+                   case ParseResultItemType_widget:
+                       g_debug("ParseResultItemType_widget");
+                       break;
+                   default:
+                       g_debug("ParseResultItemType_default");
+                       break;
+               }
+           }
+           parse_result.clear();
+           continue;
+       }
+
        switch (*p++) {
        case 'g':
+       case 'h':
        case 'm':
        case 'l':               //need more work...
            sec_size = strlen(p);
            if (sec_size) {
-               res += "\n";
+               mark += "\n";
                m_str = g_strndup(p, sec_size);
-               res += m_str;
+               mark += m_str;
                g_free(m_str);
            }
            sec_size++;
@@ -135,9 +190,9 @@ string parse_data(const gchar *data)
        case 'x':
            sec_size = strlen(p);
            if (sec_size) {
-               res += "\n";
+               mark += "\n";
                m_str = g_strndup(p, sec_size);
-               res += xdxf2text(m_str);
+               mark += xdxf2text(m_str);
                g_free(m_str);
            }
            sec_size++;
@@ -145,9 +200,9 @@ string parse_data(const gchar *data)
        case 't':
            sec_size = strlen(p);
            if (sec_size) {
-               res += "\n";
+               mark += "\n";
                m_str = g_strndup(p, sec_size);
-               res += "[" + string(m_str) + "]";
+               mark += "[" + string(m_str) + "]";
                g_free(m_str);
            }
            sec_size++;
@@ -165,7 +220,7 @@ string parse_data(const gchar *data)
        p += sec_size;
     }
 
-    return res;
+    return mark;
 }
 
 void
@@ -204,6 +259,7 @@ Library::BuildResultData(std::vector < InstantDictIndex > &dictmask,
     int iRealLib;
     bool bFound = false, bLookupWord = false, bLookupSynonymWord = false;
     gint nWord = 0, count = 0, i = 0, j = 0;
+    glong iWordIdx;
 
     iRealLib = dictmask[iLib].index;
 
@@ -237,22 +293,20 @@ Library::BuildResultData(std::vector < InstantDictIndex > &dictmask,
            count = GetOrigWordCount(iIndex[iLib].idx, iRealLib, true);
            for (i = 0; i < count; i++) {
                res_list.push_back(TSearchResult(dict_name(iLib),
-                                                poGetWord(iIndex[iLib].idx, iRealLib,
-                                                          0),
-                                                parse_data
-                                                (poGetOrigWordData
-                                                 (iIndex[iLib].idx + i, iRealLib))));
+                                                poGetOrigWord(iIndex[iLib].idx, iRealLib),
+                                                parse_data(poGetOrigWordData(iIndex[iLib].idx + i, iRealLib),
+                                                           poGetOrigWord(iIndex[iLib].idx, iRealLib))));
            }
            i = 1;
        } else {
            i = 0;
        }
        for (j = 0; i < nWord; i++, j++) {
+           iWordIdx = poGetOrigSynonymWordIdx(iIndex[iLib].synidx + j, iRealLib);
            res_list.push_back(TSearchResult(dict_name(iLib),
-                                            poGetWord(iIndex[iLib].synidx + j,
-                                                      iRealLib, 0),
-                                            parse_data(poGetOrigWordData
-                                                       (iIndex[iLib].synidx + j, iRealLib))));
+                                            poGetOrigWord(iWordIdx, iRealLib),
+                                            parse_data(poGetOrigWordData(iWordIdx, iRealLib),
+                                                       poGetOrigWord(iWordIdx, iRealLib))));
        }
 
        bFound = true;
@@ -369,7 +423,7 @@ LookupProgressDialogUpdate(gpointer data,
     GtkWidget *dialog = GTK_WIDGET(data);
     GtkWidget *progress;
 
-    progress = GTK_WIDGET(g_object_get_data(G_OBJECT(dialog), "progress"));
+    progress = GTK_WIDGET(g_object_get_data(G_OBJECT(dialog), "progress_bar"));
     gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction);
 
     while (gtk_events_pending())
index 1ea7cb4..4e71e2d 100644 (file)
@@ -51,6 +51,8 @@
 #include "libwrapper.hpp"
 #include "mstardict.hpp"
 
+MStarDict *pMStarDict;
+
 enum {
     DEF_COLUMN,
     N_COLUMNS
@@ -71,6 +73,13 @@ MStarDict::MStarDict()
     /* initialize configuration */
     oConf = new Conf();
 
+    /* initialize stardict plugins */
+    std::list < std::string > plugin_order_list;
+    std::list < std::string > plugin_disable_list;
+    oStarDictPlugins = new StarDictPlugins("/usr/lib/mstardict/plugins",
+                                          plugin_order_list,
+                                          plugin_disable_list);
+
     /* initialize dict manager */
     oDict = new DictMngr(this);
 
@@ -89,6 +98,9 @@ MStarDict::~MStarDict()
     /* deinitialize dict manager */
     delete oDict;
 
+    /* deinitialize stardict plugins */
+    delete oStarDictPlugins;
+
     /* deinitialize configuration */
     delete oConf;
 }
@@ -138,7 +150,7 @@ MStarDict::onResultsViewSelectionChanged(GtkTreeSelection *selection,
     }
 
     /* grab focus to search entry */
-    gtk_widget_grab_focus(GTK_WIDGET(mStarDict->search));
+    mStarDict->GrabFocus();
 
     return true;
 }
@@ -240,6 +252,8 @@ MStarDict::onMainWindowKeyPressEvent(GtkWidget *window,
 {
     if (event->type == GDK_KEY_PRESS && event->keyval == GDK_KP_Enter) {
        mStarDict->SearchWord();
+    } else if (event->type == GDK_KEY_PRESS && event->keyval >= 0x21 && event->keyval <= 0x7E) {
+       mStarDict->GrabFocus();
     }
     return false;
 }
@@ -260,7 +274,7 @@ MStarDict::CreateLookupProgressDialog(bool *cancel)
     /* add progress bar */
     progress = gtk_progress_bar_new();
     gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), progress);
-    g_object_set_data(G_OBJECT(dialog), "progress", progress);
+    g_object_set_data(G_OBJECT(dialog), "progress_bar", progress);
 
     /* show dialog */
     gtk_widget_show_all(dialog);
@@ -374,7 +388,11 @@ MStarDict::CreateMainWindow()
     gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW
                                                (results_view), -1, "Def",
                                                renderer, "text", DEF_COLUMN, NULL);
-    g_object_set(G_OBJECT(renderer), "xpad", 10, NULL);
+    g_object_set(G_OBJECT(renderer),
+                "xpad", 10,
+                "ellipsize", PANGO_ELLIPSIZE_END,
+                "ellipsize-set", TRUE,
+                NULL);
 
     /* search entry */
     search = hildon_entry_new(HILDON_SIZE_FINGER_HEIGHT);
@@ -389,7 +407,7 @@ MStarDict::CreateMainWindow()
     gtk_widget_show_all(GTK_WIDGET(main_window));
 
     /* grab focus to search entry */
-    gtk_widget_grab_focus(GTK_WIDGET(search));
+    GrabFocus();
 }
 
 void
@@ -517,6 +535,12 @@ MStarDict::ShowProgressIndicator(bool bShow)
        hildon_gtk_window_set_progress_indicator(GTK_WINDOW(main_window), 0);
 }
 
+void
+MStarDict::GrabFocus()
+{
+    gtk_widget_grab_focus(GTK_WIDGET(search));
+}
+
 int
 main(int argc,
      char **argv)
@@ -532,6 +556,7 @@ main(int argc,
 
     /* create main window */
     MStarDict mStarDict;
+    pMStarDict = &mStarDict;
     mStarDict.CreateMainWindow();
     mStarDict.CreateMainMenu();
     mStarDict.ShowNoResults(true);
index f2af68a..42c2655 100644 (file)
 #include <gtk/gtk.h>
 #include <hildon/hildon.h>
 
+#include "lib/pluginmanager.h"
+
+extern MStarDict *pMStarDict;
+
 class Conf;
 class DictMngr;
 class Library;
@@ -66,6 +70,7 @@ class MStarDict {
     Conf *oConf;
     DictMngr *oDict;
     Library *oLibs;
+    StarDictPlugins *oStarDictPlugins;
 
     GtkWidget *CreateLookupProgressDialog(bool *cancel);
     void DestroyLookupProgressDialog(GtkWidget *dialog);
@@ -84,4 +89,5 @@ class MStarDict {
     void ShowNoResults(bool bNoResults);
     void ShowNoDictionary(bool bNoDictionary);
     void ShowProgressIndicator(bool bShow);
+    void GrabFocus();
 };
diff --git a/stardict-plugins/Makefile.am b/stardict-plugins/Makefile.am
new file mode 100644 (file)
index 0000000..e41a48b
--- /dev/null
@@ -0,0 +1,3 @@
+DIST_SUBDIRS = stardict-html-parsedata-plugin stardict-powerword-parsedata-plugin stardict-wiki-parsedata-plugin stardict-xdxf-parsedata-plugin
+
+SUBDIRS = stardict-html-parsedata-plugin stardict-powerword-parsedata-plugin stardict-wiki-parsedata-plugin stardict-xdxf-parsedata-plugin
diff --git a/stardict-plugins/stardict-html-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-html-parsedata-plugin/Makefile.am
new file mode 100644 (file)
index 0000000..9e9b31a
--- /dev/null
@@ -0,0 +1,18 @@
+EXTRA_DIST = stardict_html_parsedata.v stardict_html_parsedata.dev
+
+
+LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_html_parsedata.v"
+
+noinst_HEADERS = stardict_html_parsedata.h
+
+stardict_html_parsedata_LTLIBRARIES = stardict_html_parsedata.la
+
+stardict_html_parsedatadir = $(libdir)/mstardict/plugins
+
+stardict_html_parsedata_la_SOURCES = stardict_html_parsedata.cpp
+
+stardict_html_parsedata_la_LDFLAGS =   -avoid-version \
+                                       -module \
+                                       $(LD_VERSION_SCRIPT_OPTION)
+
+INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src
diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.cpp b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.cpp
new file mode 100644 (file)
index 0000000..8bb78e8
--- /dev/null
@@ -0,0 +1,498 @@
+#include "stardict_html_parsedata.h"
+#include <glib/gi18n.h>
+
+#ifdef _WIN32
+#include <windows.h>
+
+#ifdef _MSC_VER
+#  define strncasecmp _strnicmp
+#endif
+
+static char *strcasestr (const char *phaystack, const char *pneedle)
+{
+       register const unsigned char *haystack, *needle;
+       register char b, c;
+
+       haystack = (const unsigned char *) phaystack;
+       needle = (const unsigned char *) pneedle;
+
+       b = tolower(*needle);
+       if (b != '\0') {
+               haystack--;             /* possible ANSI violation */
+               do {
+                       c = *++haystack;
+                       if (c == '\0')
+                               goto ret0;
+               } while (tolower(c) != (int) b);
+
+               c = tolower(*++needle);
+               if (c == '\0')
+                       goto foundneedle;
+               ++needle;
+               goto jin;
+
+               for (;;) {
+                       register char a;
+                       register const unsigned char *rhaystack, *rneedle;
+
+                       do {
+                               a = *++haystack;
+                               if (a == '\0')
+                                       goto ret0;
+                               if (tolower(a) == (int) b)
+                                       break;
+                               a = *++haystack;
+                               if (a == '\0')
+                                       goto ret0;
+                       shloop:
+                               ;
+                       }
+                       while (tolower(a) != (int) b);
+
+               jin:      a = *++haystack;
+                       if (a == '\0')
+                               goto ret0;
+
+                       if (tolower(a) != (int) c)
+                               goto shloop;
+
+                       rhaystack = haystack-- + 1;
+                       rneedle = needle;
+                       a = tolower(*rneedle);
+
+                       if (tolower(*rhaystack) == (int) a)
+                               do {
+                                       if (a == '\0')
+                                               goto foundneedle;
+                                       ++rhaystack;
+                                       a = tolower(*++needle);
+                                       if (tolower(*rhaystack) != (int) a)
+                                               break;
+                                       if (a == '\0')
+                                               goto foundneedle;
+                                       ++rhaystack;
+                                       a = tolower(*++needle);
+                               } while (tolower (*rhaystack) == (int) a);
+
+                       needle = rneedle;             /* took the register-poor approach */
+
+                       if (a == '\0')
+                               break;
+               }
+       }
+ foundneedle:
+       return (char*) haystack;
+ ret0:
+       return 0;
+}
+#endif
+
+static void html_topango(const std::string& str, std::string &pango, size_t &pango_len)
+{
+       const char *q, *p;
+       static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
+       static const int xml_ent_len[] = { 3,     3,     4,      5,       5 };
+       static const char* html_entries[] = {"nbsp;", 0};
+       static const int html_entry_len[] = {5};
+       static const char* html_values[] = {" "};
+       static const int html_value_len[] = {1};
+       size_t cur_pos;
+       int i;
+       char *etext;
+
+       pango.clear();
+       for (cur_pos = 0, q = str.c_str(); *q; ++cur_pos) {
+               if (*q == '&') {
+                       for (i = 0; xml_entrs[i]; ++i) {
+                               if (strncasecmp(xml_entrs[i], q + 1,
+                                           xml_ent_len[i]) == 0) {
+                                       q += xml_ent_len[i] + 1;
+                                       pango += '&';
+                                       pango += xml_entrs[i];
+                                       break;
+                               }
+                       }
+                       if (xml_entrs[i] == NULL) {
+                               for (i = 0; html_entries[i]; ++i) {
+                                       if (strncasecmp(html_entries[i], q+1, html_entry_len[i])==0) {
+                                               q += html_entry_len[i] + 1;
+                                               pango += html_values[i];
+                                               cur_pos += (html_value_len[i] -1);
+                                               break;
+                                       }
+                               }
+                               if (html_entries[i] == NULL) {
+                                       if (*(q+1)=='#' && (p = strchr(q+2, ';'))) {
+                                               std::string str(q+2, p-(q+2));
+                                               gunichar uc;
+                                               uc = atoi(str.c_str());
+                                               gchar utf8[7];
+                                               gint n = g_unichar_to_utf8(uc, utf8);
+                                               utf8[n] = '\0';
+                                               pango += utf8;
+                                               q = p+1;
+                                       } else {
+                                               ++q;
+                                               pango += "&amp;";
+                                       }
+                               }
+                       }
+               } else if (*q == '\r' || *q == '\n') {
+                       q++;
+                       cur_pos--;
+               } else {
+                       p = g_utf8_next_char(q);
+                       etext = g_markup_escape_text(q, p-q);
+                       pango += etext;
+                       g_free(etext);
+                       q = p;
+               }
+       }
+
+       pango_len = cur_pos;
+}
+
+static void xml_decode(const char *str, std::string& decoded)
+{
+       static const char raw_entrs[] = { 
+               '<',   '>',   '&',    '\'',    '\"',    0 
+       };
+       static const char* xml_entrs[] = { 
+               "lt;", "gt;", "amp;", "apos;", "quot;", 0 
+       };
+       static const int xml_ent_len[] = { 
+               3,     3,     4,      5,       5 
+       };
+       int ient;
+        const char *amp = strchr(str, '&');
+
+        if (amp == NULL) {
+               decoded = str;
+                return;
+        }
+        decoded.assign(str, amp - str);
+        
+        while (*amp)
+                if (*amp == '&') {
+                        for (ient = 0; xml_entrs[ient] != 0; ++ient)
+                                if (strncmp(amp + 1, xml_entrs[ient],
+                                           xml_ent_len[ient]) == 0) {
+                                        decoded += raw_entrs[ient];
+                                        amp += xml_ent_len[ient]+1;
+                                        break;
+                                }
+                        if (xml_entrs[ient] == 0)    // unrecognized sequence
+                                decoded += *amp++;
+
+                } else {
+                        decoded += *amp++;
+                }        
+}
+
+static void html2result(const char *p, ParseResult &result)
+{
+       LinksPosList links_list;
+       std::string res;
+       const char *tag, *next;
+       std::string name;
+       std::string::size_type cur_pos;
+       int i;
+
+       struct ReplaceTag {
+               const char *match_;
+               int match_len_;
+               const char *replace_;
+               int char_len_;
+       };
+       static const ReplaceTag replace_arr[] = {
+               { "b>", 2, "<b>", 0 },
+               { "/b>", 3, "</b>", 0 },
+               { "big>", 4, "<big>", 0},
+               { "/big>", 5, "</big>", 0},
+               { "i>", 2, "<i>", 0  },
+               { "/i>", 3, "</i>", 0 },
+               { "s>", 2, "<s>", 0  },
+               { "/s>", 3, "</s>", 0 },
+               { "sub>", 4, "<sub>", 0 },
+               { "/sub>", 5, "</sub>", 0},
+               { "sup>", 4, "<sup>", 0},
+               { "/sup>", 5, "</sup>", 0},
+               { "small>", 6, "<small>", 0},
+               { "/small>", 7, "</small>", 0},
+               { "tt>", 3, "<tt>", 0},
+               { "/tt>", 4, "</tt>", 0},
+               { "u>", 2, "<u>", 0  },
+               { "/u>", 3, "</u>", 0 },
+               { "br>", 3, "\n", 1 },
+               { "nl>", 3, "", 0 },
+               { "hr>", 3, "\n<span foreground=\"gray\"><s>     </s></span>\n", 7 },
+               { "/font>", 6, "</span>", 0 },
+               { NULL, 0, NULL },
+       };
+
+       for (cur_pos = 0; *p && (tag = strchr(p, '<')) != NULL;) {
+               std::string chunk(p, tag - p);
+               size_t pango_len;
+               std::string pango;
+               html_topango(chunk, pango, pango_len);
+               res += pango;
+               cur_pos += pango_len;
+
+               p = tag;
+               for (i = 0; replace_arr[i].match_; ++i)
+                       if (strncasecmp(replace_arr[i].match_, p + 1,
+                                               replace_arr[i].match_len_) == 0) {
+                               res += replace_arr[i].replace_;
+                               p += 1 + replace_arr[i].match_len_;
+                               cur_pos += replace_arr[i].char_len_;
+                               goto cycle_end;
+                       }
+
+               if (strncasecmp(p+1, "font ", 5)==0) {
+                       next = strchr(p, '>');
+                       if (!next) {
+                               ++p;
+                               continue;
+                       }
+                       res += "<span";
+                       name.assign(p + 6, next - (p + 6));
+                       const char *p1 = strcasestr(name.c_str(), "face=");
+                       if (p1) {
+                               p1 += sizeof("face=") -1 +1;
+                               const char *p2 = p1;
+                               while (true) {
+                                       if (*p2 == '\0') {
+                                               p2 = NULL;
+                                               break;
+                                       }
+                                       if (*p2 == '\'' || *p2 == '"')
+                                               break;
+                                       p2++;
+                               }
+                               if (p2) {
+                                       std::string face(p1, p2-p1);
+                                       res += " face=\"";
+                                       res += face;
+                                       res += "\"";
+                               }
+                       }
+                       p1 = strcasestr(name.c_str(), "color=");
+                       if (p1) {
+                               p1 += sizeof("color=") -1;
+                               if (*p1 == '\'' || *p1 == '\"')
+                                       p1++;
+                               const char *p2 = p1;
+                               while (true) {
+                                       if (*p2 == '\0') {
+                                               p2 = NULL;
+                                               break;
+                                       }
+                                       if (*p2 == '\'' || *p2 == '"' || *p2 == ' ' || *p2 == '>')
+                                               break;
+                                       p2++;
+                               }
+                               if (p2) {
+                                       std::string color(p1, p2-p1);
+                                       if (pango_color_parse(NULL, color.c_str())) {
+                                               res += " foreground=\"";
+                                               res += color;
+                                               res += "\"";
+                                       }
+                               }
+                       }
+                       res += ">";
+                       p = next + 1;
+               } else if ((*(p + 1) == 'a' || *(p + 1) == 'A') && *(p + 2) == ' ') {
+                       next = strchr(p, '>');
+                       if (!next) {
+                               p++;
+                               continue;
+                       }
+                       p+=3;
+                       name.assign(p, next - p);
+                       const char *p1 = strcasestr(name.c_str(), "href=");
+                       std::string link;
+                       if (p1) {
+                               p1 += sizeof("href=") -1 +1;
+                               const char *p2 = p1;
+                               while (true) {
+                                       if (*p2 == '\0') {
+                                               p2 = NULL;
+                                               break;
+                                       }
+                                       if (*p2 == '\'' || *p2 == '"')
+                                               break;
+                                       p2++;
+                               }
+                               if (p2) {
+                                       link.assign(p1, p2-p1);
+                               }
+                       }
+                       p = next + 1;
+                       next = strcasestr(p, "</a>");
+                       if (!next) {
+                               continue;
+                       }
+                       res += "<span foreground=\"blue\" underline=\"single\">";
+                       std::string::size_type link_len = next - p;
+                       std::string chunk(p, link_len);
+                       html_topango(chunk, pango, pango_len);
+                       links_list.push_back(LinkDesc(cur_pos, pango_len, link));
+                       res += pango;
+                       cur_pos += pango_len;
+                       res += "</span>";
+                       p = next + sizeof("</a>") - 1;
+               } else if (strncasecmp(p+1, "ref>", 4)==0) {
+                       next = strcasestr(p, "</ref>");
+                       if (!next) {
+                               p++;
+                               continue;
+                       }
+                       p+=5;
+                       res += "<span foreground=\"blue\" underline=\"single\">";
+                       std::string::size_type link_len = next - p;
+                       std::string chunk(p, link_len);
+                       html_topango(chunk, pango, pango_len);
+                       std::string xml_enc;
+                       xml_decode(chunk.c_str(), xml_enc);
+                       std::string link;
+                       link = "query://";
+                       link += xml_enc;
+                       links_list.push_back(LinkDesc(cur_pos, pango_len, link));
+                       res += pango;
+                       cur_pos += pango_len;
+                       res += "</span>";
+                       p = next + sizeof("</ref>") - 1;
+               } else if (strncasecmp(p+1, "img ", 4)==0) {
+                       next = strchr(p+5, '>');
+                       if (!next) {
+                               p++;
+                               continue;
+                       }
+                       name.assign(p+5, next - (p+5));
+                       p = next + 1;
+                       const char *p1 = strcasestr(name.c_str(), "src=");
+                       std::string src;
+                       if (p1) {
+                               p1 += sizeof("src=") -1 +1;
+                               const char *p2 = p1;
+                               while (true) {
+                                       if (*p2 == '\0') {
+                                               p2 = NULL;
+                                               break;
+                                       }
+                                       if (*p2 == '\'' || *p2 == '"')
+                                               break;
+                                       p2++;
+                               }
+                               if (p2) {
+                                       src.assign(p1, p2-p1);
+                               }
+                       }
+                       if (!src.empty()) {
+                               ParseResultItem item;
+                               item.type = ParseResultItemType_link;
+                               item.link = new ParseResultLinkItem;
+                               item.link->pango = res;
+                               item.link->links_list = links_list;
+                               result.item_list.push_back(item);
+                               res.clear();
+                               cur_pos = 0;
+                               links_list.clear();
+                               item.type = ParseResultItemType_res;
+                               item.res = new ParseResultResItem;
+                               item.res->type = "image";
+                               int n = src.length();
+                               if (src[0]==0x1e && src[n-1]==0x1f) {
+                                       item.res->key.assign(src.c_str()+1, n-2);
+                               } else {
+                                       item.res->key = src;
+                               }
+                               result.item_list.push_back(item);
+                       }
+               } else {
+                       next = strchr(p+1, '>');
+                       if (!next) {
+                               p++;
+                               res += "&lt;";
+                               cur_pos++;
+                               continue;
+                       }
+                       p = next + 1;
+               }
+cycle_end:
+               ;
+       }
+       res += p;
+       ParseResultItem item;
+       item.type = ParseResultItemType_link;
+       item.link = new ParseResultLinkItem;
+       item.link->pango = res;
+       item.link->links_list = links_list;
+       result.item_list.push_back(item);
+}
+
+static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword)
+{
+       if (*p != 'h')
+               return false;
+       p++;
+       size_t len = strlen(p);
+       if (len) {
+               html2result(p, result);
+       }
+       *parsed_size = 1 + len + 1;
+       return true;
+}
+
+static void configure()
+{
+}
+
+DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj)
+{
+       if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) {
+               g_print("Error: HTML data parsing plugin version doesn't match!\n");
+               return true;
+       }
+       obj->type = StarDictPlugInType_PARSEDATA;
+       obj->info_xml = g_strdup_printf("<plugin_info><name>%s</name><version>1.0</version><short_desc>%s</short_desc><long_desc>%s</long_desc><author>Hu Zheng &lt;huzheng_001@163.com&gt;</author><website>http://stardict.sourceforge.net</website></plugin_info>", _("HTML data parsing"), _("HTML data parsing engine."), _("Parse the HTML data."));
+       obj->configure_func = configure;
+       return false;
+}
+
+DLLIMPORT void stardict_plugin_exit(void)
+{
+}
+
+DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj)
+{
+       obj->parse_func = parse;
+       g_print(_("HTML data parsing plug-in loaded.\n"));
+       return false;
+}
+
+#ifdef _WIN32
+BOOL APIENTRY DllMain (HINSTANCE hInst     /* Library instance handle. */ ,
+                       DWORD reason        /* Reason this function is being called. */ ,
+                       LPVOID reserved     /* Not used. */ )
+{
+    switch (reason)
+    {
+      case DLL_PROCESS_ATTACH:
+        break;
+
+      case DLL_PROCESS_DETACH:
+        break;
+
+      case DLL_THREAD_ATTACH:
+        break;
+
+      case DLL_THREAD_DETACH:
+        break;
+    }
+
+    /* Returns TRUE on success, FALSE on failure */
+    return TRUE;
+}
+#endif
diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.dev b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.dev
new file mode 100644 (file)
index 0000000..04de82f
--- /dev/null
@@ -0,0 +1,69 @@
+[Project]\r
+FileName=stardict_html_parsedata.dev\r
+Name=stardict_html_parsedata\r
+UnitCount=2\r
+Type=3\r
+Ver=1\r
+ObjFiles=\r
+Includes=\r
+Libs=\r
+PrivateResource=\r
+ResourceIncludes=\r
+MakeIncludes=\r
+Compiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+CppCompiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_\r
+IsCpp=1\r
+Icon=\r
+ExeOutput=\r
+ObjectOutput=\r
+OverrideOutput=0\r
+OverrideOutputName=stardict_html_parsedata.dll\r
+HostApplication=\r
+Folders=\r
+CommandLine=\r
+UseCustomMakefile=0\r
+CustomMakefile=\r
+IncludeVersionInfo=0\r
+SupportXPThemes=0\r
+CompilerSet=0\r
+CompilerSettings=0000000000000000000100\r
+\r
+[Unit1]\r
+FileName=stardict_html_parsedata.h\r
+CompileCpp=1\r
+Folder=stardict_html_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit2]\r
+FileName=stardict_html_parsedata.cpp\r
+CompileCpp=1\r
+Folder=stardict_html_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[VersionInfo]\r
+Major=0\r
+Minor=1\r
+Release=1\r
+Build=1\r
+LanguageID=1033\r
+CharsetID=1252\r
+CompanyName=\r
+FileVersion=\r
+FileDescription=Developed using the Dev-C++ IDE\r
+InternalName=\r
+LegalCopyright=\r
+LegalTrademarks=\r
+OriginalFilename=\r
+ProductName=\r
+ProductVersion=\r
+AutoIncBuildNr=0\r
+\r
diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.h b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.h
new file mode 100644 (file)
index 0000000..6f837e6
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _STARDICT_XDXF_PARSEDATA_PLUGIN_H_
+#define _STARDICT_XDXF_PARSEDATA_PLUGIN_H_
+
+#ifdef _WIN32
+#if BUILDING_DLL
+# define DLLIMPORT __declspec (dllexport)
+#else /* Not BUILDING_DLL */
+# define DLLIMPORT __declspec (dllimport)
+#endif /* Not BUILDING_DLL */
+#else
+# define DLLIMPORT
+#endif
+
+#include "../../src/lib/plugin.h"
+#include "../../src/lib/parsedata_plugin.h"
+
+extern "C" {
+       DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj);
+       DLLIMPORT extern void stardict_plugin_exit(void);
+       DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj);
+}
+
+#endif
diff --git a/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.v b/stardict-plugins/stardict-html-parsedata-plugin/stardict_html_parsedata.v
new file mode 100644 (file)
index 0000000..683abb3
--- /dev/null
@@ -0,0 +1,10 @@
+{
+       global:
+               extern "C" {
+                       stardict_plugin_init;
+                       stardict_plugin_exit;
+                       stardict_parsedata_plugin_init;
+               };
+       local:
+               *;
+};
diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-powerword-parsedata-plugin/Makefile.am
new file mode 100644 (file)
index 0000000..f849cc0
--- /dev/null
@@ -0,0 +1,18 @@
+EXTRA_DIST = stardict_powerword_parsedata.v stardict_powerword_parsedata.dev
+
+
+LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_powerword_parsedata.v"
+
+noinst_HEADERS = stardict_powerword_parsedata.h
+
+stardict_powerword_parsedata_LTLIBRARIES = stardict_powerword_parsedata.la
+
+stardict_powerword_parsedatadir = $(libdir)/mstardict/plugins
+
+stardict_powerword_parsedata_la_SOURCES = stardict_powerword_parsedata.cpp
+
+stardict_powerword_parsedata_la_LDFLAGS =      -avoid-version \
+                                       -module \
+                                       $(LD_VERSION_SCRIPT_OPTION)
+
+INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src
diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.cpp b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.cpp
new file mode 100644 (file)
index 0000000..068ebbe
--- /dev/null
@@ -0,0 +1,672 @@
+#include "stardict_powerword_parsedata.h"
+#include <glib/gi18n.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+static size_t xml_strlen(const char *xmlstr)
+{
+       const char *q;
+       static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
+       static const int xml_ent_len[] = { 3,     3,     4,      5,       5 };
+       size_t cur_pos;
+       int i;
+
+       for (cur_pos = 0, q = xmlstr; *q; ++cur_pos) {
+               if (*q == '&') {
+                       for (i = 0; xml_entrs[i]; ++i)
+                               if (strncmp(xml_entrs[i], q + 1,
+                                           xml_ent_len[i]) == 0) {
+                                       q += xml_ent_len[i] + 1;
+                                       break;
+                               }
+                       if (xml_entrs[i] == NULL)
+                               ++q;
+               } else if (*q == '<') {
+                       const char *p = strchr(q+1, '>');
+                       if (p)
+                               q = p + 1;
+                       else
+                               ++q;
+                       --cur_pos;
+               } else
+                       q = g_utf8_next_char(q);
+       }
+
+       return cur_pos;
+}
+
+static gchar* toUtfPhonetic(const gchar *text, gsize len)
+{
+       std::string p;
+       gsize i;
+       for (i=0;i<len;i++) {
+               switch (text[i]) {
+                       case 'A':
+                               p+="æ"; break;
+                       case 'B':
+                               p+="ɑ"; break;
+                       case 'C':
+                               p+="ɔ"; break;
+                       case 'Q':
+                               p+="ʌ"; break;
+                       case 'E':
+                               p+="ә"; break;
+                       case 'Z':
+                               p+="є"; break;
+                       case 'N':
+                               p+="ŋ"; break;
+                       case 'W':
+                               p+="θ"; break;
+                       case 'T':
+                               p+="ð"; break;
+                       case 'F':
+                               p+="ʃ"; break;
+                       case 'V':
+                               p+="ʒ"; break;
+                       case 'L':
+                               p+="ɚ"; break;
+                       case 'I':
+                               p+="i"; break;
+                       case '^':
+                               p+="ɡ"; break;
+                       case '9':
+                               p+="ˏ"; break;
+                       case '5':
+                               p+="'"; break;
+                       default:
+                               p+=text[i];
+                               break;
+               }
+       }
+       return g_markup_escape_text(p.c_str(), -1);
+}
+
+static gchar* toUtfPhonetic2(const gchar *text, glong len)
+{
+       std::string p;
+       const char *s = text;
+       const char *n;
+       std::string uc;
+       while (s-text < len) {
+               n = g_utf8_next_char(s);
+               uc.assign(s, n-s);
+               if (uc == "8")
+                       p+=":";
+               else if (uc == "0")
+                       p+="Ŋ";
+               else if (uc == "¾")
+                       p+="ǔ";
+               else if (uc == "%")
+                       p+="ɔ";
+               else if (uc == "µ")
+                       p+="ě";
+               else if (uc == "³")
+                       p+="ā";
+               else if (uc == "!")
+                       p+="I";
+               else if (uc == "W")
+                       p+="ɛ";
+               else if (uc == "&")
+                       p+="U";
+               else if (uc == "…")
+                       p+="ə";
+               else if (uc == "¹")
+                       p+="ǐ";
+               else if (uc == "“")
+                       p+="′";
+               else if (uc == "*")
+                       p+="ə";
+               else if (uc == "6")
+                       p+="ˋ";
+               else if (uc == "+")
+                       p+="ɚ";
+               else if (uc == "”")
+                       p+="´";
+               else if (uc == "‘")
+                       p+="KH";
+               else if (uc == "$")
+                       p+="ɑ";
+               else if (uc == "7")
+                       p+="͵";
+               else if (uc == "'")
+                       p+="KH";
+               else if (uc == "½")
+                       p+="ō";
+               else if (uc == "¼")
+                       p+="ǒ";
+               else if (uc == "¶")
+                       p+="ē";
+               else if (uc == "º")
+                       p+="ī";
+               else if (uc == "G")
+                       p+="θ";
+               else if (uc == "9")
+                       p+="ʒ";
+               else if (uc == ".")
+                       p+="ʃ";
+               else if (uc == "/")
+                       p+="ʒ";
+               else if (uc == "²")
+                       p+="ǎ";
+               else if (uc == "#")
+                       p+="æ";
+               else if (uc == "’")
+                       p+="N";
+               else if (uc == "Y")
+                       p+="t";
+               else if (uc == "H")
+                       p+="ð";
+               else if (uc == "÷")
+                       p+="ń";
+               else if (uc == "é")
+                       p+="ê";
+               else if (uc == "¿")
+                       p+="ū";
+               else if (uc == ")")
+                       p+="ɜ";
+               else if (uc == "Ó")
+                       p+="ǒ";
+               else if (uc == "ï")
+                       p+="Ś";
+               else if (uc == "Ä")
+                       p+="ǐ";
+               else
+                       p+= uc;
+               s = n;
+       }
+       return g_markup_escape_text(p.c_str(), -1);
+}
+
+static void powerword_markup_add_text(const gchar *text, gssize length, std::string *pango, std::string::size_type &cur_pos, LinksPosList *links_list)
+{
+       const gchar *p;
+       const gchar *end;
+       p = text;
+       end = text + length;
+
+       GString *str;
+       str = g_string_sized_new (length);
+
+       const gchar *n;
+       bool find;
+       bool previous_islink = false;
+       std::string marktags;
+       guint currentmarktag = 0;
+       while (p != end) {
+               const gchar *next;
+               next = g_utf8_next_char (p);
+               switch (*p) {
+                       case '}':
+                               if (currentmarktag==0) {
+                                       g_string_append (str, "}");
+                                       previous_islink = false;
+                               }
+                               else {
+                                       currentmarktag--;
+                                       switch (marktags[currentmarktag]) {
+                                               case 'b':
+                                               case 'B':
+                                                       g_string_append (str, "</b>");
+                                                       previous_islink = false;
+                                                       break;
+                                               case 'I':
+                                                       g_string_append (str, "</i>");
+                                                       previous_islink = false;
+                                                       break;
+                                               case '+':
+                                                       g_string_append (str, "</sup>");
+                                                       previous_islink = false;
+                                                       break;
+                                               case '-':
+                                                       g_string_append (str, "</sub>");
+                                                       previous_islink = false;
+                                                       break;
+                                               case 'x':
+                                                       g_string_append (str, "</span>");
+                                                       previous_islink = false;
+                                                       break;
+                                               case 'l':
+                                               case 'D':
+                                               case 'L':
+                                               case 'U':
+                                                       g_string_append (str, "</span>");
+                                                       previous_islink = true;
+                                                       break;
+                                               default:
+                                                       previous_islink = false;
+                                                       break;
+                                       }
+                               }
+                               break;
+                       case '&':
+                               find = false;
+                               if (next!=end) {
+                                       n = g_utf8_next_char(next);
+                                       if (n!=end && *n == '{') {
+                                               find=true;
+                                               currentmarktag++;
+                                               if (marktags.length()<currentmarktag)
+                                                       marktags+=*next;
+                                               else
+                                                       marktags[currentmarktag-1]=*next;
+                                               switch (*next) {
+                                                       case 'b':
+                                                       case 'B':
+                                                               g_string_append (str, "<b>");
+                                                               next = n+1;
+                                                               break;
+                                                       case 'I':
+                                                               g_string_append (str, "<i>");
+                                                               next = n+1;
+                                                               break;
+                                                       case '+':
+                                                               g_string_append (str, "<sup>");
+                                                               next = n+1;
+                                                               break;
+                                                       case '-':
+                                                               g_string_append (str, "<sub>");
+                                                               next = n+1;
+                                                               break;
+                                                       case 'x':
+                                                               g_string_append (str, "<span foreground=\"blue\" underline=\"single\">");
+                                                               next = n+1;
+                                                               break;
+                                                       case 'X':
+                                                       case '2':
+                                                               {
+                                                               const gchar *tag_end = n+1;
+                                                               while (tag_end!=end) {
+                                                                       if (*tag_end=='}')
+                                                                               break;
+                                                                       else
+                                                                               tag_end++;
+                                                               }
+                                                               g_string_append (str, "<span foreground=\"blue\">");
+                                                               gchar *tag_str;
+                                                               if (*next == 'X') {
+                                                                       tag_str = toUtfPhonetic(n+1, tag_end - (n+1));
+                                                               } else {
+                                                                       tag_str = toUtfPhonetic2(n+1, tag_end - (n+1));
+                                                               }
+                                                               g_string_append (str, tag_str);
+                                                               g_free(tag_str);
+                                                               g_string_append (str, "</span>");
+                                                               currentmarktag--;
+                                                               if (tag_end!=end)
+                                                                       next = tag_end+1;
+                                                               else
+                                                                       next = end;
+                                                               previous_islink = false;
+                                                               break;
+                                                               }
+                                                       case 'l':
+                                                       case 'D':
+                                                       case 'L':
+                                                       case 'U':
+                                                               if (previous_islink)
+                                                                       g_string_append (str, "\t");
+                                                               if (*next == 'l' || *next == 'D')
+                                                                       g_string_append (str, "<span foreground=\"blue\" underline=\"single\">");
+                                                               else
+                                                                       g_string_append (str, "<span foreground=\"#008080\" underline=\"single\">");
+                                                               *pango += str->str;
+                                                               cur_pos += xml_strlen(str->str);
+                                                               g_string_erase(str, 0, -1);
+                                                               {
+                                                               const gchar *tag_end = n+1;
+                                                               while (tag_end!=end) {
+                                                                       if (*tag_end=='}')
+                                                                               break;
+                                                                       else
+                                                                               tag_end++;
+                                                               }
+                                                               char *tmpstr = g_markup_escape_text(n+1, tag_end - (n+1));
+                                                               size_t xml_len = xml_strlen(tmpstr);
+                                                               std::string link("query://");
+                                                               link.append(n+1, tag_end - (n+1));
+                                                               links_list->push_back(LinkDesc(cur_pos, xml_len, link));
+                                                               *pango += tmpstr;
+                                                               cur_pos += xml_len;
+                                                               g_free(tmpstr);
+                                                               g_string_append (str, "</span>");
+                                                               currentmarktag--;
+                                                               if (tag_end!=end)
+                                                                       next = tag_end+1;
+                                                               else
+                                                                       next = end;
+                                                               previous_islink = true;
+                                                               break;
+                                                               }
+                                                       /*case ' ':
+                                                       case '9':
+                                                       case 'S':*/
+                                                       default:
+                                                               next = n+1;
+                                                               break;
+                                               }
+                                       }
+                               }
+                               if (!find) {
+                                       previous_islink = false;
+                                       g_string_append (str, "&amp;");
+                               }
+                               break;
+                       case '<':
+                               previous_islink = false;
+                               g_string_append (str, "&lt;");
+                               break;
+                       case '>':
+                               previous_islink = false;
+                               g_string_append (str, "&gt;");
+                               break;
+                       case '\'':
+                               previous_islink = false;
+                               g_string_append (str, "&apos;");
+                               break;
+                       case '"':
+                               previous_islink = false;
+                               g_string_append (str, "&quot;");
+                               break;
+                       default:
+                               previous_islink = false;
+                               g_string_append_len (str, p, next - p);
+                               break;
+               }
+               p = next;
+       }
+       if (currentmarktag>0) {
+               do {
+                       currentmarktag--;
+                       switch (marktags[currentmarktag]) {
+                               case 'b':
+                               case 'B':
+                                       g_string_append (str, "</b>");
+                                       break;
+                               case 'I':
+                                       g_string_append (str, "</i>");
+                                       break;
+                               case '+':
+                                       g_string_append (str, "</sup>");
+                                       break;
+                               case '-':
+                                       g_string_append (str, "</sub>");
+                                       break;
+                               case 'x':
+                               case 'l':
+                               case 'D':
+                               case 'L':
+                               case 'U':
+                                       g_string_append (str, "</span>");
+                                       break;
+                               default:
+                                       break;
+                       }
+               } while (currentmarktag>0);
+       }
+       *pango += str->str;
+       cur_pos += xml_strlen(str->str);
+       g_string_free (str, TRUE);
+}
+
+typedef struct _PwUserData {
+       std::string *pango;
+       LinksPosList *links_list;
+       std::string::size_type cur_pos;
+       const gchar *oword;
+       bool first_jbcy;
+} PwUserData;
+
+static void func_parse_passthrough(GMarkupParseContext *context, const gchar *passthrough_text, gsize text_len, gpointer user_data, GError **error)
+{
+       if (!g_str_has_prefix(passthrough_text, "<![CDATA["))
+               return;
+       const gchar *element = g_markup_parse_context_get_element(context);
+       if (!element)
+               return;
+       const gchar *text = passthrough_text+9;
+       gsize len = text_len-9-3;
+       while (g_ascii_isspace(*text)) {
+               text++;
+               len--;
+       }
+       while (len>0 && g_ascii_isspace(*(text+len-1))) {
+               len--;
+       }
+       if (len==0)
+               return;
+       std::string *pango = ((PwUserData*)user_data)->pango;
+       std::string::size_type &cur_pos = ((PwUserData*)user_data)->cur_pos;
+       if (strcmp(element, "词典音标")==0) {
+               if (!pango->empty()) {
+                       *pango+='\n';
+                       cur_pos++;
+               }
+               *pango+="[<span foreground=\"blue\">";
+               cur_pos++;
+               gchar *str = toUtfPhonetic(text, len);
+               *pango+=str;
+               cur_pos+=xml_strlen(str);
+               g_free(str);
+               *pango+="</span>]";
+               cur_pos++;
+       } else if (strcmp(element, "单词原型")==0) {
+               const gchar *oword = ((PwUserData*)user_data)->oword;
+               if (strncmp(oword, text, len)) {
+                       if (!pango->empty()) {
+                               *pango+='\n';
+                               cur_pos++;
+                       }
+                       *pango+="<b>";
+                       gchar *str = g_markup_escape_text(text, len);
+                       pango->append(str);
+                       cur_pos+=xml_strlen(str);
+                       g_free(str);
+                       *pango+="</b>";
+               }
+       } else if (strcmp(element, "单词词性")==0) {
+               if (!pango->empty()) {
+                       *pango+='\n';
+                       cur_pos++;
+               }
+               *pango+="<i>";
+               powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
+               *pango+="</i>";
+       } else if (strcmp(element, "汉语拼音")==0) {
+               if (!pango->empty()) {
+                       *pango+='\n';
+                       cur_pos++;
+               }
+               *pango+="<span foreground=\"blue\" underline=\"single\">";
+               powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
+               *pango+="</span>";
+       } else if (strcmp(element, "例句原型")==0) {
+               if (!pango->empty()) {
+                       *pango+='\n';
+                       cur_pos++;
+               }
+               *pango+="<span foreground=\"#008080\">";
+               powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
+               *pango+="</span>";
+       } else if (strcmp(element, "例句解释")==0) {
+               if (!pango->empty()) {
+                       *pango+='\n';
+                       cur_pos++;
+               }
+               *pango+="<span foreground=\"#01259A\">";
+               powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
+               *pango+="</span>";
+       /*} else if (strcmp(element, "相关词")==0) {
+               if (!res->empty())
+                       *res+='\n';
+               std::string tabstr;
+               tabstr+=text[0];
+               for (gsize i=1;i<len;i++) {
+                       if (text[i]=='&')
+                               tabstr+="\t&";
+                       else
+                               tabstr+=text[i];
+               }
+               gchar *str = powerword_markup_escape_text(tabstr.c_str(), tabstr.length());
+               res->append(str);
+               g_free(str);*/
+       } else
+       /*} else if (
+       strcmp(element, "解释项")==0 ||
+       strcmp(element, "跟随解释")==0 ||
+       strcmp(element, "相关词")==0 ||
+       strcmp(element, "预解释")==0 ||
+       strcmp(element, "繁体写法")==0 ||
+       strcmp(element, "台湾音标")==0 ||
+       strcmp(element, "图片名称")==0 ||
+       strcmp(element, "跟随注释")==0 ||
+       strcmp(element, "音节分段")==0 ||
+       strcmp(element, "AHD音标")==0 ||
+       strcmp(element, "国际音标")==0 ||
+       strcmp(element, "美国音标")==0 ||
+       strcmp(element, "子解释项")==0 ||
+       strcmp(element, "同义词")==0 ||
+       strcmp(element, "日文发音")==0 ||
+       strcmp(element, "惯用型原型")==0 ||
+       strcmp(element, "惯用型解释")==0 ||
+       strcmp(element, "另见")==0
+       ) {*/
+       {
+               if (!pango->empty()) {
+                       *pango+='\n';
+                       cur_pos++;
+               }
+               powerword_markup_add_text(text, len, pango, cur_pos, ((PwUserData*)user_data)->links_list);
+       }
+}
+
+static void func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
+{
+       std::string res;
+       if (strcmp(element_name, "基本词义")==0) {
+               if (((PwUserData*)user_data)->first_jbcy) {
+                       ((PwUserData*)user_data)->first_jbcy = false;
+               } else {
+                       res="\n<span foreground=\"blue\">&lt;基本词义&gt;</span>";
+               }
+       } else if (strcmp(element_name, "继承用法")==0) {
+               res="\n<span foreground=\"blue\">&lt;继承用法&gt;</span>";
+       } else if (strcmp(element_name, "习惯用语")==0) {
+               res="\n<span foreground=\"blue\">&lt;习惯用语&gt;</span>";
+       } else if (strcmp(element_name, "词性变化")==0) {
+               res="\n<span foreground=\"blue\">&lt;词性变化&gt;</span>";
+       } else if (strcmp(element_name, "特殊用法")==0) {
+               res="\n<span foreground=\"blue\">&lt;特殊用法&gt;</span>";
+       } else if (strcmp(element_name, "参考词汇")==0) {
+               res="\n<span foreground=\"blue\">&lt;参考词汇&gt;</span>";
+       } else if (strcmp(element_name, "常用词组")==0) {
+               res="\n<span foreground=\"blue\">&lt;常用词组&gt;</span>";
+       } else if (strcmp(element_name, "语源")==0) {
+               res="\n<span foreground=\"blue\">&lt;语源&gt;</span>";
+       } else if (strcmp(element_name, "派生")==0) {
+               res="\n<span foreground=\"blue\">&lt;派生&gt;</span>";
+       } else if (strcmp(element_name, "用法")==0) {
+               res="\n<span foreground=\"blue\">&lt;用法&gt;</span>";
+       } else if (strcmp(element_name, "注释")==0) {
+               res="\n<span foreground=\"blue\">&lt;注释&gt;</span>";
+       }
+       if (!res.empty()) {
+               *(((PwUserData*)user_data)->pango) += res;
+               ((PwUserData*)user_data)->cur_pos += xml_strlen(res.c_str());
+       }
+}
+
+static void powerword2link(const char *p, guint32 sec_size, const gchar *oword, std::string *pango, LinksPosList *links_list)
+{
+       PwUserData Data;
+       Data.pango = pango;
+       Data.links_list = links_list;
+       Data.cur_pos = 0;
+       Data.oword = oword;
+       Data.first_jbcy = true;
+
+       GMarkupParser parser;
+       parser.start_element = func_parse_start_element;
+       parser.end_element = NULL;
+       parser.text = NULL;
+       parser.passthrough = func_parse_passthrough;
+       parser.error = NULL;
+       GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
+       g_markup_parse_context_parse(context, p, sec_size, NULL);
+       g_markup_parse_context_end_parse(context, NULL);
+       g_markup_parse_context_free(context);
+}
+
+static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword)
+{
+       if (*p != 'k')
+               return false;
+       p++;
+       size_t len = strlen(p);
+       if (len) {
+               std::string pango;
+               LinksPosList links_list;
+               powerword2link(p, len, oword, &pango, &links_list);
+               ParseResultItem item;
+               item.type = ParseResultItemType_link;
+               item.link = new ParseResultLinkItem;
+               item.link->pango = pango;
+               item.link->links_list = links_list;
+               result.item_list.push_back(item);
+       }
+       *parsed_size = 1 + len + 1;
+       return true;
+}
+
+static void configure()
+{
+}
+
+DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj)
+{
+       if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) {
+               g_print("Error: PowerWord data parsing plugin version doesn't match!\n");
+               return true;
+       }
+       obj->type = StarDictPlugInType_PARSEDATA;
+       obj->info_xml = g_strdup_printf("<plugin_info><name>%s</name><version>1.0</version><short_desc>%s</short_desc><long_desc>%s</long_desc><author>Hu Zheng &lt;huzheng_001@163.com&gt;</author><website>http://stardict.sourceforge.net</website></plugin_info>", _("PowerWord data parsing"), _("PowerWord data parsing engine."), _("Parse the PowerWord data."));
+       obj->configure_func = configure;
+       return false;
+}
+
+DLLIMPORT void stardict_plugin_exit(void)
+{
+}
+
+DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj)
+{
+       obj->parse_func = parse;
+       g_print(_("PowerWord data parsing plug-in loaded.\n"));
+       return false;
+}
+
+#ifdef _WIN32
+BOOL APIENTRY DllMain (HINSTANCE hInst     /* Library instance handle. */ ,
+                       DWORD reason        /* Reason this function is being called. */ ,
+                       LPVOID reserved     /* Not used. */ )
+{
+    switch (reason)
+    {
+      case DLL_PROCESS_ATTACH:
+        break;
+
+      case DLL_PROCESS_DETACH:
+        break;
+
+      case DLL_THREAD_ATTACH:
+        break;
+
+      case DLL_THREAD_DETACH:
+        break;
+    }
+
+    /* Returns TRUE on success, FALSE on failure */
+    return TRUE;
+}
+#endif
diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.dev b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.dev
new file mode 100644 (file)
index 0000000..2c0cab4
--- /dev/null
@@ -0,0 +1,69 @@
+[Project]\r
+FileName=stardict_powerword_parsedata.dev\r
+Name=stardict_powerword_parsedata\r
+UnitCount=2\r
+Type=3\r
+Ver=1\r
+ObjFiles=\r
+Includes=\r
+Libs=\r
+PrivateResource=\r
+ResourceIncludes=\r
+MakeIncludes=\r
+Compiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+CppCompiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_\r
+IsCpp=1\r
+Icon=\r
+ExeOutput=\r
+ObjectOutput=\r
+OverrideOutput=0\r
+OverrideOutputName=stardict_powerword_parsedata.dll\r
+HostApplication=\r
+Folders=\r
+CommandLine=\r
+UseCustomMakefile=0\r
+CustomMakefile=\r
+IncludeVersionInfo=0\r
+SupportXPThemes=0\r
+CompilerSet=0\r
+CompilerSettings=0000000000000000000100\r
+\r
+[Unit1]\r
+FileName=stardict_powerword_parsedata.h\r
+CompileCpp=1\r
+Folder=stardict_powerword_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit2]\r
+FileName=stardict_powerword_parsedata.cpp\r
+CompileCpp=1\r
+Folder=stardict_powerword_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[VersionInfo]\r
+Major=0\r
+Minor=1\r
+Release=1\r
+Build=1\r
+LanguageID=1033\r
+CharsetID=1252\r
+CompanyName=\r
+FileVersion=\r
+FileDescription=Developed using the Dev-C++ IDE\r
+InternalName=\r
+LegalCopyright=\r
+LegalTrademarks=\r
+OriginalFilename=\r
+ProductName=\r
+ProductVersion=\r
+AutoIncBuildNr=0\r
+\r
diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.h b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.h
new file mode 100644 (file)
index 0000000..fe56acf
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _STARDICT_POWERWORD_PARSEDATA_PLUGIN_H_
+#define _STARDICT_POWERWORD_PARSEDATA_PLUGIN_H_
+
+#ifdef _WIN32
+#if BUILDING_DLL
+# define DLLIMPORT __declspec (dllexport)
+#else /* Not BUILDING_DLL */
+# define DLLIMPORT __declspec (dllimport)
+#endif /* Not BUILDING_DLL */
+#else
+# define DLLIMPORT
+#endif
+
+#include "../../src/lib/plugin.h"
+#include "../../src/lib/parsedata_plugin.h"
+
+extern "C" {
+       DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj);
+       DLLIMPORT extern void stardict_plugin_exit(void);
+       DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj);
+}
+
+#endif
diff --git a/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.v b/stardict-plugins/stardict-powerword-parsedata-plugin/stardict_powerword_parsedata.v
new file mode 100644 (file)
index 0000000..683abb3
--- /dev/null
@@ -0,0 +1,10 @@
+{
+       global:
+               extern "C" {
+                       stardict_plugin_init;
+                       stardict_plugin_exit;
+                       stardict_parsedata_plugin_init;
+               };
+       local:
+               *;
+};
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-wiki-parsedata-plugin/Makefile.am
new file mode 100644 (file)
index 0000000..3f824b7
--- /dev/null
@@ -0,0 +1,22 @@
+EXTRA_DIST = stardict_wiki_parsedata.v stardict_wiki_parsedata.dev
+
+
+LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_wiki_parsedata.v"
+
+noinst_HEADERS = stardict_wiki_parsedata.h
+
+stardict_wiki_parsedata_LTLIBRARIES = stardict_wiki_parsedata.la
+
+stardict_wiki_parsedatadir = $(libdir)/mstardict/plugins
+
+stardict_wiki_parsedata_la_SOURCES = stardict_wiki_parsedata.cpp       \
+                                       global.cpp global.h     \
+                                       stardict_wiki2xml.cpp stardict_wiki2xml.h       \
+                                       TXML.cpp TXML.h \
+                                       WIKI2XML.cpp WIKI2XML.h
+
+stardict_wiki_parsedata_la_LDFLAGS =   -avoid-version \
+                                       -module \
+                                       $(LD_VERSION_SCRIPT_OPTION)
+
+INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.cpp
new file mode 100644 (file)
index 0000000..1306c57
--- /dev/null
@@ -0,0 +1,82 @@
+#include "TXML.h"
+
+// *****************************************************************************
+// *****************************************************************************
+//
+// TXML
+//
+// *****************************************************************************
+// *****************************************************************************
+
+TXML::TXML ( int f , int t , string &s , bool fix_comments )
+       {
+       from = f ;
+       to = t ;
+       name = s.substr ( from + 1 , to - (from+1) ) ;
+       name = trim ( name ) ;
+       name = before_first ( ' ' , name ) ;
+       closing = selfclosing = false ;
+       if ( left ( name , 1 ) == "/" )
+          {
+          closing = true ;
+          name = name.substr ( 1 , name.length()-1 ) ;
+          }
+       if ( right ( name , 1 ) == "/" )
+          {
+          selfclosing = true ;
+          name = name.substr ( 0 , name.length()-1 ) ;
+          }
+    name = trim ( name ) ;
+       
+       // This will replace < and > within a comment with the appropriate HTML entities
+       if ( fix_comments && left ( name , 1 ) == "!" )
+               {
+               int a ;
+               for ( a = from+1 ; a < to ; a++ )
+                       {
+                       if ( s[a] != '>' && s[a] != '<' ) continue ;
+                       to += 3 ;
+                       if ( s[a] == '>' ) s.insert ( a , "&gt" ) ;
+                       if ( s[a] == '<' ) s.insert ( a , "&lt" ) ;
+                       s[a+3] = ';' ;
+                       }    
+               }    
+       }    
+
+void TXML::remove_at ( int pos )
+       {
+       if ( pos < from ) from-- ;
+       if ( pos < to ) to-- ;
+       }    
+
+void TXML::insert_at ( int pos )
+       {
+       if ( pos < from ) from++ ;
+       if ( pos < to ) to++ ;
+       }    
+
+void TXML::add_key_value ( string k , string v )
+       {
+    key.push_back ( trim ( k ) ) ;
+    value.push_back ( trim ( v ) ) ;
+       }    
+       
+string TXML::get_string ()
+       {
+    string ret ;
+    ret = "<" + name ;
+    for ( size_t a = 0 ; a < key.size() ; a++ )
+       {
+           for ( size_t b = 0 ; b < key[a].length() ; b++ )
+               {
+               if ( key[a][b] == ' ' ) key[a][b] = '_' ;
+               }               
+           ret += " " + key[a] ;
+           if ( value[a] != "" ) ret += "=\"" + unquote ( SINGLE_QUOTE , value[a] ) + "\"" ;
+       }    
+       if ( text == "" ) ret += "/>" ;
+       else ret += ">" + text + "</" + name + ">" ;
+       return ret ;
+       }
+
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.h b/stardict-plugins/stardict-wiki-parsedata-plugin/TXML.h
new file mode 100644 (file)
index 0000000..4055c98
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _TXML_H_
+#define _TXML_H_
+
+#include "global.h"
+
+class TXML
+       {
+    public :
+    TXML () {} ;
+    virtual ~TXML() {};
+       TXML ( int f , int t , string &s , bool fix_comments = true ) ;
+       virtual void remove_at ( int pos ) ;
+       virtual void insert_at ( int pos ) ;
+       
+    virtual void add_key_value ( string k , string v = "" ) ;
+    virtual string get_string () ;
+    
+       // Variables
+       int from , to ;
+       bool closing , selfclosing ;
+    string name , text ;
+    vector <string> key , value ;
+       } ;    
+       
+#endif
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.cpp
new file mode 100644 (file)
index 0000000..de7e9b0
--- /dev/null
@@ -0,0 +1,573 @@
+#include "WIKI2XML.h"
+#include <glib.h>
+
+TTableInfo::TTableInfo ()
+       {
+       tr_open = false ;
+       td_open = false ;
+       }    
+
+string TTableInfo::close ()
+       {
+       string ret ;
+       if ( td_open ) ret += "</wikitablecell>" ;
+       if ( tr_open ) ret += "</wikitablerow>" ;
+       ret += "</wikitable>" ;
+       return ret ;
+       }    
+       
+string TTableInfo::new_row ()
+       {
+       string ret ;
+       if ( td_open ) ret += "</wikitablecell>" ;
+       if ( tr_open ) ret += "</wikitablerow>" ;
+       ret += "<wikitablerow>" ;
+       td_open = false ;
+       tr_open = true ;
+       return ret ;
+       }    
+
+string TTableInfo::new_cell ( string type )
+       {
+       string ret ;
+       if ( !tr_open ) ret += new_row () ;
+       if ( td_open ) ret += "</wikitablecell>" ;
+       ret += "<wikitablecell type=\"" + upper ( type ) + "\">" ;
+       td_type = type ;
+       td_open = true ;
+       return ret ;
+       }    
+
+// *****************************************************************************
+// *****************************************************************************
+//
+// WIKI2XML
+//
+// *****************************************************************************
+// *****************************************************************************
+
+void WIKI2XML::parse_symmetric ( string &l , size_t &from , 
+                                                               string s1 , string s2 ,
+                                                               string r1 , string r2 ,
+                                    bool extend )
+       {
+       int a , b ;
+       if ( !submatch ( l , s1 , from ) ) return ; // Left does not match
+       for ( a = from + s1.length() ; a + s2.length() <= l.length() ; a++ )
+               {
+               if ( !submatch ( l , s2 , a ) ) continue ;
+               for ( b = a+1 ; extend && submatch ( l , s2 , b ) ; b++ ) ;
+               b-- ;
+               l = l.substr ( 0 , from ) +
+                       r1 +
+                       l.substr ( from + s1.length() , b - from - s1.length() )  +
+                       r2 +
+                       l.substr ( b + s2.length() , l.length() ) ;
+               if ( debug ) cout << "newl : " << l << endl ;
+               break ;
+               }    
+       }
+     
+void WIKI2XML::parse_link ( string &l , size_t &from , char mode )
+       {
+    from += 1 ;
+    size_t a , cnt = 1 ;
+    chart par_open = '[' ; // mode 'L'
+    chart par_close = ']' ; // mode 'L'
+    if ( mode == 'T' ) { par_open = '{' ; par_close = '}' ; }
+    for ( a = from ; cnt > 0 && a+1 < l.length() ; a++ )
+       {
+           if ( l[a] == par_open && l[a+1] == par_open )
+               parse_link ( l , a ) ;
+       else if ( l[a] == par_close && l[a+1] == par_close )
+               cnt-- ;
+       }    
+       if ( cnt > 0 ) return ; // Not a valid link
+       
+       int to = a-1 ; // Without "]]"
+       string link = l.substr ( from+1 , to-from-1 ) ;
+       
+       TXML x ;
+       vector <string> parts ;
+       explode ( '|' , link , parts ) ;
+       if ( mode == 'L' )
+        {
+        x.name = "wikilink" ;
+        x.add_key_value ( "type" , "internal" ) ;
+        }
+       else if ( mode == 'T' ) x.name = "wikitemplate" ;
+       
+       for ( a = 0 ; a < parts.size() ; a++ )
+           {
+           bool last = ( a + 1 == parts.size() ) ;
+           string p = parts[a] ;
+           parse_line_sub ( p ) ;
+
+           if ( a > 0 && ( mode != 'L' || !last ) )
+           {
+           string key , value ;
+           vector <string> subparts ;
+           explode ( '=' , p , subparts ) ;
+           if ( subparts.size() == 1 )
+              {
+              char *str = g_markup_escape_text(p.c_str(), p.length());
+              value = xml_embed ( str , "value" ) ;
+              g_free(str);
+              }
+           else
+              {
+              key = xml_embed ( subparts[0] , "key" ) ;
+              subparts.erase ( subparts.begin() ) ;
+              string itmp = implode ( "=" , subparts );
+              char *str = g_markup_escape_text(itmp.c_str(), itmp.length());
+              value = xml_embed ( str , "value" ) ;
+              g_free(str);
+              }
+           p = key + value ;
+           }
+        else {
+               char *str = g_markup_escape_text(p.c_str(), p.length());
+               p = xml_embed ( str , "value" ) ;
+               g_free(str);
+       }
+
+           string param = "number=\"" + val ( a ) + "\"" ;
+           if ( last ) param += " last=\"1\"" ;
+           x.text += xml_embed ( p , "wikiparameter" , param ) ;
+           }
+
+       if ( mode == 'L' ) // Try link trail
+          {
+          string trail ;
+          for ( a = to+2 ; a < l.length() && is_text_char ( l[a] ) ; a++ )
+              trail += l[a] ;
+       to = a-2 ;
+       if ( trail != "" ) x.text += xml_embed ( trail , "trail" ) ;
+       }
+       
+       x.add_key_value ( "parameters" , val ( parts.size() ) ) ;
+       string replacement = x.get_string () ;
+       parse_line_sub ( replacement ) ;
+       
+       l.erase ( from-1 , to-from+3 ) ;
+       l.insert ( from-1 , replacement ) ;
+       if ( debug ) cout << "Link : " << link << endl << "Replacement : " << replacement << endl ;
+       if ( debug ) cout << "Result : " << l << endl << endl ;
+       from = from + replacement.length() - 2 ;
+       }    
+       
+bool WIKI2XML::is_list_char ( chart c ) // For now...
+       {
+       if ( c == '*' ) return true ;
+       if ( c == '#' ) return true ;
+       if ( c == ':' ) return true ;
+       return false ;
+       }    
+
+string WIKI2XML::get_list_tag ( chart c , bool open )
+    {
+    string ret ;
+    if ( debug ) cout << "get_list_tag : " << c << endl ;
+    if ( c == '*' ) ret = "ul" ;
+    if ( c == '#' ) ret = "ol" ;
+    if ( c == ':' ) ret = "dl" ;
+    if ( ret != "" )
+       {
+           string itemname = "li" ;
+           if ( c == ':' ) itemname = "dd" ;
+           if ( open ) ret = "<" + ret + "><" + itemname + ">" ;
+           else ret = "</" + itemname + "></" + ret + ">" ;
+       }    
+       return ret ;
+    }
+        
+string WIKI2XML::fix_list ( string &l )
+    {
+    size_t a , b ;
+    for ( a = 0 ; a < l.length() && is_list_char ( l[a] ) ; a++ ) ;
+    string newlist , pre ;
+    if ( a > 0 )
+       {
+        newlist = left ( l , a ) ;
+        while ( a < l.length() && l[a] == ' ' ) a++ ; // Removing leading blanks
+        l = l.substr ( a , l.length() ) ;
+        }
+    if ( debug ) cout << "fix_list : " << l << endl ;
+    if ( list == "" && newlist == "" ) return "" ;
+    for ( a = 0 ; a < list.length() && 
+                               a < newlist.length() && 
+                               list[a] == newlist[a] ; a++ ) ; // The common part, if any
+                               
+    for ( b = a ; b < list.length() ; b++ )
+       pre = get_list_tag ( list[b] , false ) + pre ; // Close old list tags
+    for ( b = a ; b < newlist.length() ; b++ )
+       pre += get_list_tag ( newlist[b] , true ) ; // Open new ones
+       
+    if ( debug ) cout << "pre : " << pre << endl ;
+    if ( debug ) cout << "newlist : " << newlist << endl ;
+    list = newlist ;
+    return pre ;
+    }    
+
+void WIKI2XML::parse_line ( string &l )
+    {
+    size_t a;
+    if ( debug ) cout << l << endl ;
+    string pre ;
+    string oldlist = list ;
+    pre += fix_list ( l ) ;
+    if ( list != "" && list == oldlist )
+       {
+           string itemname = "li" ;
+           if ( right ( list , 1 ) == ":" ) itemname = "dd" ;
+        pre = "</" + itemname + "><" + itemname + ">" + pre ;
+        }
+    
+    if ( l == "" ) // Paragraph
+       {
+           l = "<p/>" ;
+       }
+       else if ( left ( l , 4 ) == "----" ) // <hr>
+           {
+           for ( a = 0 ; a < l.length() && l[a] == l[0] ; a++ ) ;
+           pre += "<wikiurlcounter action=\"reset\"/><hr/>" ;
+           l = l.substr ( a , l.length() - a ) ;
+           }
+       else if ( l != "" && l[0] == '=' ) // Heading
+               {
+           for ( a = 0 ; a < l.length() && l[a] == '=' && l[l.length()-a-1] == '=' ; a++ ) ;
+           string h = "h0" ;
+           if ( a >= l.length() ) h = "" ; // No heading
+//         else if ( l[a] != ' ' ) h = "" ;
+//         else if ( l[l.length()-a-1] != ' ' ) h = "" ;
+           else if ( a < 1 || a > 9 ) h = "" ;
+           if ( h != "" )
+               {
+               l = l.substr ( a , l.length() - a*2 ) ;
+           h[1] += a ;
+           l = xml_embed ( l , h ) ;
+           }    
+               }    
+    else if ( l != "" && l[0] == ' ' ) // Pre-formatted text
+       {
+           for ( a = 0 ; a < l.length() && l[a] == ' ' ; a++ ) ;
+           l = l.substr ( a , l.length() ) ;
+           if ( l != "" )
+               {
+            pre += "<pre>" + l + "</pre>" ;
+            l = "" ;
+            }    
+       }
+       else if ( left ( l , 2 ) == "{|" || (left ( l , 2 ) == "|}" && l[2] != '}' ) ||
+                               ( tables.size() > 0 && l != "" && ( l[0] == '|' || l[0] == '!' ) ) )
+        {
+        pre += table_markup ( l ) ;
+        l = "" ;
+        }    
+               
+       
+       if ( l != "" ) parse_line_sub ( l ) ;
+    
+    if ( pre != "" ) l = pre + l ;   
+    }    
+
+bool WIKI2XML::is_external_link_protocol ( string protocol )
+    {
+    if ( protocol == "HTTP" ) return true ;
+    if ( protocol == "FTP" ) return true ;
+    if ( protocol == "MAILTO" ) return true ;
+    return false ;
+    }
+    
+int WIKI2XML::scan_url ( string &l , size_t from )
+    {
+    size_t a ;
+    for ( a = from ; a < l.length() ; a++ )
+        {
+        if ( l[a] == ':' || l[a] == '/' || l[a] == '.' ) continue ;
+        if ( l[a] >= '0' && l[a] <= '9' ) continue ;
+        if ( is_text_char ( l[a] ) ) continue ;
+        break ; // End of URL
+        }
+    return a ;
+    }
+       
+void WIKI2XML::parse_external_freelink ( string &l , size_t &from )
+       {
+       int a ;
+       for ( a = from - 1 ; a >= 0 && is_text_char ( l[a] ) ; a-- ) ;
+       if ( a == -1 ) return ;
+       a++ ;
+       string protocol = upper ( l.substr ( a , from - a ) ) ;
+       if ( debug ) cout << "protocol : " << protocol << endl ;
+       if ( !is_external_link_protocol ( protocol ) ) return ; 
+       int to = scan_url ( l , a ) ;
+       string url = l.substr ( a , to - a ) ;
+       string replacement ;
+    replacement += xml_embed ( url , "url" ) ;
+    replacement += xml_embed ( url , "title" ) ;
+       l = left ( l , a ) + replacement + l.substr ( to , l.length() - to ) ;
+       from = a + replacement.length() - 1 ;
+       }
+       
+void WIKI2XML::parse_external_link ( string &l , size_t &from )
+       {
+       string protocol = upper ( before_first ( ':' , l.substr ( from + 1 , l.length() - from ) ) ) ;
+       if ( !is_external_link_protocol ( protocol ) ) return ;
+    size_t to ;
+    for ( to = from + 1 ; to < l.length() && l[to] != ']' ; to++ ) ;
+    if ( to == l.length() ) return ;
+    string url = l.substr ( from + 1 , to - from - 1 ) ;
+    string title = after_first ( ' ' , url ) ;
+    url = before_first ( ' ' , url ) ;
+    string replacement ;
+    replacement += xml_embed ( url , "url" ) ;
+    if ( title == "" )
+        replacement += xml_embed ( "<wikiurlcounter action=\"add\"/>" , "title" ) ;
+    else replacement += xml_embed ( title , "title" ) ;
+    replacement = xml_embed ( replacement , "wikilink" , "type='external' protocol='" + protocol + "'" ) ;
+    l = left ( l , from ) + replacement + l.substr ( to + 1 , l.length() - to ) ;
+    from = from + replacement.length() - 1 ;
+       }
+       
+void WIKI2XML::parse_line_sub ( string &l )
+       {
+       size_t a ;
+    for ( a = 0 ; a < l.length() ; a++ )
+        {
+        if ( l[a] == '[' && a+1 < l.length() && l[a+1] == '[' ) // [[Link]]
+               parse_link ( l , a , 'L' ) ;
+        else if ( l[a] == '{' && a+1 < l.length() && l[a+1] == '{' ) // {{Template}}
+               parse_link ( l , a , 'T' ) ;
+               else if ( l[a] == '[' ) // External link
+            parse_external_link ( l , a ) ;
+        else if ( a+2 < l.length() && l[a] == ':' && l[a+1] == '/' && l[a+2] == '/' ) // External freelink
+            parse_external_freelink ( l , a ) ;
+       else if ( l[a] == SINGLE_QUOTE ) // Bold and italics
+                       {
+               parse_symmetric ( l , a , "'''" , "'''" , "<b>" , "</b>" , true ) ; 
+               parse_symmetric ( l , a , "''" , "''" , "<i>" , "</i>" ) ; 
+               }
+        } 
+       }
+     
+void WIKI2XML::parse_lines ( vector <string> &lines )
+    {
+    size_t a ;
+    for ( a = 0 ; a < lines.size() ; a++ )
+        {
+        parse_line ( lines[a] ) ;
+        }
+        
+    string end ;
+    
+    // Cleanup lists
+    end = fix_list ( end ) ;
+    if ( end != "" ) lines.push_back ( end ) ;
+    
+    // Cleanup tables
+    end = "" ;
+    while ( tables.size() )
+       {
+           end += tables[tables.size()-1].close () ;
+           tables.pop_back () ;
+       }    
+       if ( end != "" ) lines.push_back ( end ) ;
+    }    
+
+void WIKI2XML::init ( string s )
+       {
+       list = "" ;
+       lines.clear () ;
+       
+       // Now we remove evil HTML
+       allowed_html.clear () ;
+       allowed_html.push_back ( "b" ) ;
+       allowed_html.push_back ( "i" ) ;
+       allowed_html.push_back ( "p" ) ;
+       allowed_html.push_back ( "b" ) ;
+       allowed_html.push_back ( "br" ) ;
+       allowed_html.push_back ( "hr" ) ;
+       allowed_html.push_back ( "tt" ) ;
+       allowed_html.push_back ( "pre" ) ;
+       allowed_html.push_back ( "nowiki" ) ;
+       allowed_html.push_back ( "math" ) ;
+       allowed_html.push_back ( "strike" ) ;
+       allowed_html.push_back ( "u" ) ;
+       allowed_html.push_back ( "table" ) ;
+       allowed_html.push_back ( "caption" ) ;
+       allowed_html.push_back ( "tr" ) ;
+       allowed_html.push_back ( "td" ) ;
+       allowed_html.push_back ( "th" ) ;
+       allowed_html.push_back ( "li" ) ;
+       allowed_html.push_back ( "ul" ) ;
+       allowed_html.push_back ( "ol" ) ;
+       allowed_html.push_back ( "dl" ) ;
+       allowed_html.push_back ( "dd" ) ;
+       allowed_html.push_back ( "dt" ) ;
+       allowed_html.push_back ( "div" ) ;
+       allowed_html.push_back ( "h1" ) ;
+       allowed_html.push_back ( "h2" ) ;
+       allowed_html.push_back ( "h3" ) ;
+       allowed_html.push_back ( "h4" ) ;
+       allowed_html.push_back ( "h5" ) ;
+       allowed_html.push_back ( "h6" ) ;
+       allowed_html.push_back ( "h7" ) ;
+       allowed_html.push_back ( "h8" ) ;
+       allowed_html.push_back ( "h9" ) ;
+       allowed_html.push_back ( "small" ) ;
+       allowed_html.push_back ( "center" ) ;
+//     allowed_html.push_back ( "" ) ;
+       size_t a ;
+       for ( a = 0 ; a < allowed_html.size() ; a++ )
+               allowed_html[a] = upper ( allowed_html[a] ) ;
+       
+       vector <TXML> taglist ;
+       make_tag_list ( s , taglist ) ;
+       remove_evil_html ( s , taglist ) ;
+       
+       // Now evaluate each line
+       explode ( '\n' , s , lines ) ;
+       }    
+
+string WIKI2XML::get_xml ()
+       {
+       string ret = "<text>";
+       ret += implode ( "\n" , lines );
+       ret += "</text>";
+       
+       // Invalidating mdash
+       /*size_t a = ret.find ( "&mdash;" ) ;
+       while ( a >= 0 && a < ret.length() )
+               {
+               ret[a] = '!' ;
+               a = ret.find ( "&mdash;" , a ) ;
+               }*/
+               
+       return ret ;
+       }
+       
+void WIKI2XML::replace_part ( string &s , size_t from , size_t to , string with )
+       {
+       s = s.substr ( 0 , from ) + with + s.substr ( to + 1 , s.length() - to - 1 ) ;
+       }    
+    
+void WIKI2XML::replace_part_sync ( string &s , size_t from , size_t to , string with , vector <TXML> &list )
+       {
+       size_t a , b ;
+       replace_part ( s , from , to , with ) ;
+       for ( a = 0 ; a < list.size() ; a++ )
+               {
+               for ( b = 0 ; b < with.length() ; b++ ) list[a].insert_at ( from ) ;
+               for ( b = from ; b <= to ; b++ ) list[a].remove_at ( from ) ;
+               }    
+       }    
+    
+// ATTENTION : this doesn't handle all HTML comments correctly!
+void WIKI2XML::make_tag_list ( string &s , vector <TXML> &list )
+       {
+       list.clear () ;
+       size_t a;
+       int b;
+       for ( a = 0 ; a < s.length() ; a++ )
+               {
+               if ( s[a] == '>' ) // Rouge >
+                       {
+               s[a] = ';' ;
+               s.insert ( a , "&gt" ) ;
+                       continue ;
+                       }
+               else if ( s[a] != '<' ) continue ;
+               b = find_next_unquoted ( '>' , s , a ) ;
+               if ( b == -1 ) // Rouge <
+                       {
+               s[a] = ';' ;
+               s.insert ( a , "&lt" ) ;
+               continue ;
+               }       
+               list.push_back ( TXML ( a , b , s ) ) ;
+               a = list[list.size()-1].to ;
+               }    
+       }
+void WIKI2XML::remove_evil_html ( string &s , vector <TXML> &taglist )
+       {
+       size_t a , b ;
+       for ( a = 0 ; a < taglist.size() ; a++ )
+               {
+               string tag = upper ( taglist[a].name ) ;
+               for ( b = 0 ; b < allowed_html.size() && tag != allowed_html[b] ; b++ ) ;
+               if ( b < allowed_html.size() ) continue ;
+               replace_part_sync ( s , taglist[a].from , taglist[a].from , "&lt;" , taglist ) ;
+               replace_part_sync ( s , taglist[a].to , taglist[a].to , "&gt;" , taglist ) ;
+               }    
+       }
+
+string WIKI2XML::table_markup ( string &l )
+       {
+       size_t a ;
+       string ret ;
+       if ( left ( l , 2 ) == "{|" ) // Open table
+               {
+               ret = "<wikitable>" ;
+               ret += xml_embed ( l.substr ( 2 , l.length() - 2 ) , "wikiparameter" ) ;
+               tables.push_back ( TTableInfo () ) ;
+               }
+       else if ( left ( l , 2 ) == "|}" ) 
+               {
+               ret = tables[tables.size()-1].close () ;
+               tables.pop_back () ;
+               }
+       else if ( left ( l , 2 ) == "|-" ) 
+               {
+               ret = tables[tables.size()-1].new_row () ;
+               for ( a = 1 ; a < l.length() && l[a] == '-' ; a++ ) ;
+               ret += xml_params ( l.substr ( a , l.length() - a ) ) ;
+               }
+       else
+               {
+               string init ;
+               if ( left ( l , 2 ) == "|+" )
+                       {
+                       init = "caption" ;
+                       l = l.substr ( 2 , l.length() - 2 ) ;
+                       }    
+               else if ( l[0] == '!' )
+                       {
+                       init = "header" ;
+                       l = l.substr ( 1 , l.length() - 1 ) ;
+                       }    
+               else if ( l[0] == '|' )
+                       {
+                       init = "cell" ;
+                       l = l.substr ( 1 , l.length() - 1 ) ;
+                       }
+               vector <string> sublines ;
+               for ( a = 0 ; a + 1 < l.length() ; a++ )
+                       {
+                       if ( l[a] == '|' && l[a+1] == '|' )
+                          {
+                          sublines.push_back ( left ( l , a ) ) ;
+                          l = l.substr ( a + 2 , l.length() - a ) ;
+                          a = (size_t)(-1) ;
+                          }    
+                       }    
+               if ( l != "" ) sublines.push_back ( l ) ;
+               for ( a = 0 ; a < sublines.size() ; a++ )
+                       {
+                       l = sublines[a] ;
+                       parse_line_sub ( l ) ;
+                       string params ;
+                       int b = find_next_unquoted ( '|' , l ) ;
+                       if ( b != -1 )
+                               {
+                           params = left ( l , b ) ;
+                           l = l.substr ( b + 1 , l.length() - b ) ;
+                               }        
+                       if ( params != "" ) l = xml_params ( params ) + l ;
+                       ret += tables[tables.size()-1].new_cell ( init ) ;
+                       ret += l ;
+                       }    
+               }    
+       return ret ;
+       }    
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.h b/stardict-plugins/stardict-wiki-parsedata-plugin/WIKI2XML.h
new file mode 100644 (file)
index 0000000..e86f35b
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef _WIKI2XML_H_
+#define _WIKI2XML_H_
+
+#include "global.h"
+#include "TXML.h"
+
+class TTableInfo
+       {
+       public :
+       TTableInfo () ;
+       virtual ~TTableInfo () {};
+       virtual string new_cell ( string type ) ;
+       virtual string new_row () ;
+       virtual string close () ;
+       bool tr_open , td_open ;
+       string td_type ;
+       } ;    
+
+class WIKI2XML
+       {
+       public :
+       WIKI2XML () {} ;
+       virtual ~WIKI2XML () {};
+       WIKI2XML ( string &s ) { init ( s ) ; }
+       WIKI2XML ( vector <string> &l ) { init ( l ) ; }
+       virtual void init ( string s ) ;
+       virtual void init ( vector <string> &l ) { init ( implode ( "\n" , l ) ) ; }
+       virtual void parse () { parse_lines ( lines ) ; }
+       virtual string get_xml () ;
+       
+       private :
+       virtual void make_tag_list ( string &s , vector <TXML> &list ) ;
+       virtual void parse_symmetric ( string &l , size_t &from , 
+                                               string s1 , string s2 , 
+                        string r1 , string r2 , bool extend = false ) ;
+       virtual void parse_link ( string &l , size_t &from , char mode = 'L' ) ;
+       virtual void parse_line_sub ( string &l ) ;
+       virtual void parse_line ( string &l ) ;
+       virtual void parse_lines ( vector <string> &lines ) ;
+       virtual string fix_list ( string &l ) ;
+       virtual string get_list_tag ( chart c , bool open ) ;
+       virtual bool is_list_char ( chart c ) ;
+       virtual void remove_evil_html ( string &s , vector <TXML> &taglist ) ;
+       virtual void replace_part ( string &s , size_t from , size_t to , string with ) ;
+       virtual void replace_part_sync ( string &s , size_t from , size_t to , string with , vector <TXML> &list ) ;
+       virtual void parse_external_freelink ( string &l , size_t &from ) ;
+       virtual void parse_external_link ( string &l , size_t &from ) ;
+       virtual bool is_external_link_protocol ( string protocol ) ;
+       virtual int scan_url ( string &l , size_t from ) ;
+       virtual string table_markup ( string &l ) ;
+               
+       // Variables
+       vector <string> lines , allowed_html ;
+       vector <TTableInfo> tables ;
+       string list ;
+    } ;             
+
+#endif
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/global.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/global.cpp
new file mode 100644 (file)
index 0000000..f2afd9d
--- /dev/null
@@ -0,0 +1,227 @@
+#include "global.h"
+
+// *****************************************************************************
+// *****************************************************************************
+//
+// global string functions
+//
+// *****************************************************************************
+// *****************************************************************************
+
+// The following functions should be language specific
+bool is_text_char ( chart ch )
+       {
+    if ( ch >= 'a' && ch <= 'z' ) return true ;
+    if ( ch >= 'A' && ch <= 'Z' ) return true ;
+    return false ;
+       }    
+
+
+// These are not :
+       
+string left ( string &s , size_t num )
+       {
+       if ( num <= 0 ) return "" ;
+       if ( num >= s.length() ) return s ;
+       return s.substr ( 0 , num ) ;
+       }
+     
+string right ( string &s , int num )
+       {
+       if ( num <= 0 ) return "" ;
+    int from = s.length() - num ;
+    string ret ;
+    if ( from <= 0 ) ret = s ;
+    else ret = s.substr ( from , s.length() ) ;
+    return ret ;
+       }    
+
+string upper ( string s ) // For internal purposes, will do...
+       {
+       size_t a ;
+       for ( a = 0 ; a < s.length() ; a++ )
+               {
+        if ( s[a] >= 'a' && s[a] <= 'z' ) s[a] = s[a] - 'a' + 'A' ;
+               }    
+    return s ;
+       }    
+
+void explode ( chart ch , string &l , vector <string> &parts )
+       {
+    parts.clear () ;
+    size_t a , b ;
+    for ( a = b = 0 ; a < l.length() ; a++ )
+       {
+           if ( l[a] == ch )
+              {
+           parts.push_back ( l.substr ( b , a - b ) ) ;
+           b = a+1 ;
+              }    
+       }    
+       parts.push_back ( l.substr ( b , a - b ) ) ;
+
+       if ( debug ) cout << "Explode : " << l << endl ;
+       for ( a = 0 ; a < parts.size() ; a++ )
+               if ( debug ) cout << a << " " << parts[a] << endl ;
+    if ( debug ) cout << endl ;        
+       }    
+       
+string implode ( string mid , vector <string> &parts )
+       {
+    if ( parts.size() == 0 ) return "" ;
+    if ( parts.size() == 1 ) return parts[0] ;
+    string ret = parts[0] ;
+    for ( size_t a = 1 ; a < parts.size() ; a++ )
+       ret += mid + parts[a] ;
+       return ret ;
+       }    
+
+string unquote ( chart quote , string &s )
+       {
+       size_t a ;
+       for ( a = 0 ; a < s.length() ; a++ )
+               {
+               if ( s[a] == quote && ( a == 0 || ( a > 0 && s[a-1] != '\\' ) ) )
+                  {
+                  s.insert ( a , "\\" ) ;
+                  a++ ;
+                  }    
+               }    
+    return s ;
+       }    
+       
+bool submatch ( string &main , string &sub , int from )
+       {
+       if ( from + sub.length() > main.length() ) return false ;
+       size_t a ;
+       for ( a = 0 ; a < sub.length() ; a++ )
+               {
+               if ( sub[a] != main[a+from] ) return false ;
+               }    
+       return true ;
+       }
+     
+int find_first ( chart c , string &s )
+       {
+       size_t a ;
+       for ( a = 0 ; a < s.length() && s[a] != c ; a++ ) ;
+       if ( a == s.length() ) return -1 ;
+    return a ;
+       }    
+     
+int find_last ( chart c , string &s )
+       {
+       size_t a;
+       int b = -1 ;
+       for ( a = 0 ; a < s.length() ; a++ )
+               {
+               if ( s[a] == c ) b = a ;
+               }    
+       return b ;
+       }    
+     
+string before_first ( chart c , string s )
+       {
+       int pos = find_first ( c , s ) ;
+       if ( pos == -1 ) return s ;
+       return s.substr ( 0 , pos ) ;
+       }
+
+string before_last ( chart c , string s )
+       {
+       int pos = find_last ( c , s ) ;
+       if ( pos == -1 ) return "" ;
+       return s.substr ( 0 , pos ) ;
+       }
+
+string after_first ( chart c , string s )
+       {
+       int pos = find_first ( c , s ) ;
+       if ( pos == -1 ) return "" ;
+       return s.substr ( pos+1 , s.length() ) ;
+       }
+
+string after_last ( chart c , string s )
+       {
+       int pos = find_last ( c , s ) ;
+       if ( pos == -1 ) return s ;
+       return s.substr ( pos+1 , s.length() ) ;
+       }
+     
+string trim ( string &s )
+       {
+       if ( s.length() == 0 ) return s ;
+       if ( s[0] != ' ' && s[s.length()-1] != ' ' ) return s ;
+       size_t a;
+       int b ;
+       for ( a = 0 ; a < s.length() && s[a] == ' ' ; a++ ) ;
+       for ( b = s.length()-1 ; b >= 0 && s[b] == ' ' ; b-- ) ;
+       return s.substr ( a , b - a + 1 ) ;
+       }
+
+int find_next_unquoted ( chart c , string &s , int start )
+       {
+       size_t a ;
+       chart lastquote = ' ' ;
+       for ( a = start ; a < s.length() ; a++ )
+               {
+               if ( s[a] == c && lastquote == ' ' ) return a ; // Success!
+               if ( s[a] != SINGLE_QUOTE && s[a] != DOUBLE_QUOTE ) continue ; // No quotes, next
+               if ( a > 0 && s[a-1] == '\\' ) continue ; // Ignore \' and \"
+               if ( lastquote == ' ' ) lastquote = s[a] ; // Remember opening quote, text now quoted
+               else if ( lastquote == s[a] ) lastquote = ' ' ; // Close quote, not quoted anymore
+               }
+       return -1 ;
+       }
+    
+string val ( int a )
+    {
+    char t[20] ;
+    sprintf ( t , "%d" , a ) ;
+    return string ( t ) ;
+    }
+
+string xml_embed ( string inside , string tag , string param )
+    {
+    string ret ;
+    ret = "<" + tag ;
+    if ( param != "" ) ret += " " + param ;
+    if ( inside == "" ) return ret + "/>" ;
+    return ret + ">" + trim ( inside ) + "</" + tag + ">" ;
+    }
+
+string xml_params ( string l ) // Yes, this function is thin...
+       {
+       string ret ;
+       vector <string> params ;
+       while ( l != "" )
+               {
+               int p = find_next_unquoted ( ' ' , l ) ;
+               string first ;
+               if ( p == -1 )
+                       {
+                       first = l ;
+                       l = "" ;
+                       }
+               else
+                       {
+                   first = left ( l , p ) ;
+                   l = l.substr ( p , l.length() - p ) ;
+                       }        
+               first = trim ( first ) ;
+               l = trim ( l ) ;
+               if ( first == "" ) continue ;
+               
+               p = find_next_unquoted ( '=' , first ) ;
+               if ( p == -1 ) first = xml_embed ( first , "value" ) ;
+               else
+                       {
+                       first = xml_embed ( left ( first , p ) , "key" ) +
+                                       xml_embed ( first.substr ( p + 1 , first.length() - p ) , "value" ) ;
+                       }    
+               first = xml_embed ( first , "wikiparameter" ) ;
+               ret += first ;
+               }    
+       return ret ;
+       }
+    
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/global.h b/stardict-plugins/stardict-wiki-parsedata-plugin/global.h
new file mode 100644 (file)
index 0000000..c9c053d
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef _GLOBAL_FUNCTIONS_H_
+#define _GLOBAL_FUNCTIONS_H_
+
+#define debug 0
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+
+using namespace std;
+
+#define SINGLE_QUOTE 39
+#define DOUBLE_QUOTE '"'
+
+typedef string::value_type chart ; // Char type
+
+string right ( string &s , int num ) ;
+string left ( string &s , size_t num ) ;
+string upper ( string s ) ;
+bool is_text_char ( chart ch ) ;
+void explode ( chart ch , string &l , vector <string> &parts ) ;
+string implode ( string mid , vector <string> &parts ) ;
+string unquote ( chart quote , string &s ) ;
+bool submatch ( string &main , string &sub , int from ) ;
+string before_first ( chart c , string s ) ;
+string before_last ( chart c , string s ) ;
+string after_first ( chart c , string s ) ;
+string after_last ( chart c , string s ) ;
+string trim ( string &s ) ;
+string val ( int a ) ;
+int find_next_unquoted ( chart c , string &s , int start = 0 ) ;
+string xml_embed ( string inside , string tag , string param = "" ) ;
+string xml_params ( string l ) ;
+
+#endif
+
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.cpp
new file mode 100644 (file)
index 0000000..0cfade9
--- /dev/null
@@ -0,0 +1,56 @@
+#include "stardict_wiki2xml.h"
+#include "WIKI2XML.h"
+#include <glib.h>
+
+std::string wiki2xml(std::string &str)
+{
+       WIKI2XML w2x(str);
+       w2x.parse () ;
+       return w2x.get_xml ();
+}
+
+struct WikiXmlParseUserData {
+       std::string *res;
+};
+
+static void wikixml_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
+{
+       WikiXmlParseUserData *Data = (WikiXmlParseUserData *)user_data;
+       if (strcmp(element_name, "wikilink")==0) {
+               Data->res->append("<span foreground=\"blue\" underline=\"single\">");
+       }
+}
+
+static void wikixml_parse_end_element(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error)
+{
+       WikiXmlParseUserData *Data = (WikiXmlParseUserData *)user_data;
+       if (strcmp(element_name, "wikilink")==0) {
+               Data->res->append("</span>");
+       }
+}
+
+static void wikixml_parse_text(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
+{
+       WikiXmlParseUserData *Data = (WikiXmlParseUserData *)user_data;
+       char *estr = g_markup_escape_text(text, text_len);
+       Data->res->append(estr);
+       g_free(estr);
+}
+
+std::string wikixml2pango(std::string &str)
+{
+       std::string res;
+       WikiXmlParseUserData Data;
+       Data.res = &res;
+       GMarkupParser parser;
+       parser.start_element = wikixml_parse_start_element;
+       parser.end_element = wikixml_parse_end_element;
+       parser.text =  wikixml_parse_text;
+       parser.passthrough = NULL;
+       parser.error = NULL;
+       GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
+       g_markup_parse_context_parse(context, str.c_str(), str.length(), NULL);
+       g_markup_parse_context_end_parse(context, NULL);
+       g_markup_parse_context_free(context);
+       return res;
+}
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.h b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki2xml.h
new file mode 100644 (file)
index 0000000..b3133c0
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _STARDICT_WIKI2XML_H_
+#define _STARDICT_WIKI2XML_H_
+
+#include <string>
+
+extern std::string wiki2xml(std::string &str);
+extern std::string wikixml2pango(std::string &str);
+
+#endif
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.cpp b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.cpp
new file mode 100644 (file)
index 0000000..fdf5270
--- /dev/null
@@ -0,0 +1,78 @@
+#include "stardict_wiki_parsedata.h"
+#include "stardict_wiki2xml.h"
+#include <glib/gi18n.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword)
+{
+       if (*p != 'w')
+               return false;
+       p++;
+       size_t len = strlen(p);
+       if (len) {
+               ParseResultItem item;
+               item.type = ParseResultItemType_mark;
+               item.mark = new ParseResultMarkItem;
+               std::string res(p, len);
+               std::string xml = wiki2xml(res);
+               item.mark->pango = wikixml2pango(xml);
+               result.item_list.push_back(item);
+       }
+       *parsed_size = 1 + len + 1;
+       return true;
+}
+
+static void configure()
+{
+}
+
+DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj)
+{
+       if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) {
+               g_print("Error: Wiki data parsing plugin version doesn't match!\n");
+               return true;
+       }
+       obj->type = StarDictPlugInType_PARSEDATA;
+       obj->info_xml = g_strdup_printf("<plugin_info><name>%s</name><version>1.0</version><short_desc>%s</short_desc><long_desc>%s</long_desc><author>Hu Zheng &lt;huzheng_001@163.com&gt;</author><website>http://stardict.sourceforge.net</website></plugin_info>", _("Wiki data parsing"), _("Wiki data parsing engine."), _("Parse the wiki data."));
+       obj->configure_func = configure;
+       return false;
+}
+
+DLLIMPORT void stardict_plugin_exit(void)
+{
+}
+
+DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj)
+{
+       obj->parse_func = parse;
+       g_print(_("Wiki data parsing plug-in loaded.\n"));
+       return false;
+}
+
+#ifdef _WIN32
+BOOL APIENTRY DllMain (HINSTANCE hInst     /* Library instance handle. */ ,
+                       DWORD reason        /* Reason this function is being called. */ ,
+                       LPVOID reserved     /* Not used. */ )
+{
+    switch (reason)
+    {
+      case DLL_PROCESS_ATTACH:
+        break;
+
+      case DLL_PROCESS_DETACH:
+        break;
+
+      case DLL_THREAD_ATTACH:
+        break;
+
+      case DLL_THREAD_DETACH:
+        break;
+    }
+
+    /* Returns TRUE on success, FALSE on failure */
+    return TRUE;
+}
+#endif
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.dev b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.dev
new file mode 100644 (file)
index 0000000..1213950
--- /dev/null
@@ -0,0 +1,149 @@
+[Project]\r
+FileName=stardict_wiki_parsedata.dev\r
+Name=stardict_wiki_parsedata\r
+UnitCount=10\r
+Type=3\r
+Ver=1\r
+ObjFiles=\r
+Includes=\r
+Libs=\r
+PrivateResource=\r
+ResourceIncludes=\r
+MakeIncludes=\r
+Compiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+CppCompiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_\r
+IsCpp=1\r
+Icon=\r
+ExeOutput=\r
+ObjectOutput=\r
+OverrideOutput=0\r
+OverrideOutputName=stardict_wiki_parsedata.dll\r
+HostApplication=\r
+Folders=\r
+CommandLine=\r
+UseCustomMakefile=0\r
+CustomMakefile=\r
+IncludeVersionInfo=0\r
+SupportXPThemes=0\r
+CompilerSet=0\r
+CompilerSettings=0000000000000000000100\r
+\r
+[Unit1]\r
+FileName=WIKI2XML.h\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit2]\r
+FileName=global.cpp\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit3]\r
+FileName=global.h\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit4]\r
+FileName=stardict_wiki2xml.cpp\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit5]\r
+FileName=stardict_wiki2xml.h\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit6]\r
+FileName=stardict_wiki_parsedata.cpp\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit7]\r
+FileName=stardict_wiki_parsedata.h\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit8]\r
+FileName=TXML.cpp\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit9]\r
+FileName=TXML.h\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit10]\r
+FileName=WIKI2XML.cpp\r
+CompileCpp=1\r
+Folder=stardict_wiki_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[VersionInfo]\r
+Major=0\r
+Minor=1\r
+Release=1\r
+Build=1\r
+LanguageID=1033\r
+CharsetID=1252\r
+CompanyName=\r
+FileVersion=\r
+FileDescription=Developed using the Dev-C++ IDE\r
+InternalName=\r
+LegalCopyright=\r
+LegalTrademarks=\r
+OriginalFilename=\r
+ProductName=\r
+ProductVersion=\r
+AutoIncBuildNr=0\r
+\r
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.h b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.h
new file mode 100644 (file)
index 0000000..7a9faf6
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _STARDICT_WIKI_PARSEDATA_PLUGIN_H_
+#define _STARDICT_WIKI_PARSEDATA_PLUGIN_H_
+
+#ifdef _WIN32
+#if BUILDING_DLL
+# define DLLIMPORT __declspec (dllexport)
+#else /* Not BUILDING_DLL */
+# define DLLIMPORT __declspec (dllimport)
+#endif /* Not BUILDING_DLL */
+#else
+# define DLLIMPORT
+#endif
+
+#include "../../src/lib/plugin.h"
+#include "../../src/lib/parsedata_plugin.h"
+
+extern "C" {
+       DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj);
+       DLLIMPORT extern void stardict_plugin_exit(void);
+       DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj);
+}
+
+#endif
diff --git a/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.v b/stardict-plugins/stardict-wiki-parsedata-plugin/stardict_wiki_parsedata.v
new file mode 100644 (file)
index 0000000..683abb3
--- /dev/null
@@ -0,0 +1,10 @@
+{
+       global:
+               extern "C" {
+                       stardict_plugin_init;
+                       stardict_plugin_exit;
+                       stardict_parsedata_plugin_init;
+               };
+       local:
+               *;
+};
diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.am b/stardict-plugins/stardict-xdxf-parsedata-plugin/Makefile.am
new file mode 100644 (file)
index 0000000..f8e944b
--- /dev/null
@@ -0,0 +1,18 @@
+EXTRA_DIST = stardict_xdxf_parsedata.v stardict_xdxf_parsedata.dev
+
+
+LD_VERSION_SCRIPT_OPTION="-Wl,--version-script=stardict_xdxf_parsedata.v"
+
+noinst_HEADERS = stardict_xdxf_parsedata.h
+
+stardict_xdxf_parsedata_LTLIBRARIES = stardict_xdxf_parsedata.la
+
+stardict_xdxf_parsedatadir = $(libdir)/mstardict/plugins
+
+stardict_xdxf_parsedata_la_SOURCES = stardict_xdxf_parsedata.cpp
+
+stardict_xdxf_parsedata_la_LDFLAGS =   -avoid-version \
+                                       -module \
+                                       $(LD_VERSION_SCRIPT_OPTION)
+
+INCLUDES = @LIB_STARDICT_CFLAGS@ -I$(top_builddir)/src
diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.cpp b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.cpp
new file mode 100644 (file)
index 0000000..905a01d
--- /dev/null
@@ -0,0 +1,360 @@
+#include "stardict_xdxf_parsedata.h"
+#include <glib/gi18n.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+static size_t xml_strlen(const std::string& str)
+{
+       const char *q;
+       static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
+       static const int xml_ent_len[] = { 3,     3,     4,      5,       5 };
+       size_t cur_pos;
+       int i;
+
+       for (cur_pos = 0, q = str.c_str(); *q; ++cur_pos) {
+               if (*q == '&') {
+                       for (i = 0; xml_entrs[i]; ++i)
+                               if (strncmp(xml_entrs[i], q + 1,
+                                           xml_ent_len[i]) == 0) {
+                                       q += xml_ent_len[i] + 1;
+                                       break;
+                               }
+                       if (xml_entrs[i] == NULL)
+                               ++q;
+               } else if (*q == '<') {
+                       const char *p = strchr(q+1, '>');
+                       if (p)
+                               q = p + 1;
+                       else
+                               ++q;
+                       --cur_pos;
+               } else
+                       q = g_utf8_next_char(q);
+       }
+
+       return cur_pos;
+}
+
+static void xml_decode(const char *str, std::string& decoded)
+{
+       static const char raw_entrs[] = { 
+               '<',   '>',   '&',    '\'',    '\"',    0 
+       };
+       static const char* xml_entrs[] = { 
+               "lt;", "gt;", "amp;", "apos;", "quot;", 0 
+       };
+       static const int xml_ent_len[] = { 
+               3,     3,     4,      5,       5 
+       };
+       int ient;
+        const char *amp = strchr(str, '&');
+
+        if (amp == NULL) {
+               decoded = str;
+                return;
+        }
+        decoded.assign(str, amp - str);
+        
+        while (*amp)
+                if (*amp == '&') {
+                        for (ient = 0; xml_entrs[ient] != 0; ++ient)
+                                if (strncmp(amp + 1, xml_entrs[ient],
+                                           xml_ent_len[ient]) == 0) {
+                                        decoded += raw_entrs[ient];
+                                        amp += xml_ent_len[ient]+1;
+                                        break;
+                                }
+                        if (xml_entrs[ient] == 0)    // unrecognized sequence
+                                decoded += *amp++;
+
+                } else {
+                        decoded += *amp++;
+                }        
+}
+
+static void xdxf2result(const char *p, ParseResult &result)
+{
+       LinksPosList links_list;
+       std::string res;
+       const char *tag, *next;
+       std::string name;
+       std::string::size_type cur_pos;
+       int i;
+
+       struct ReplaceTag {
+               const char *match_;
+               int match_len_;
+               const char *replace_;
+               int char_len_;
+       };
+       static const ReplaceTag replace_arr[] = {
+               { "abr>", 4, "<span foreground=\"green\" style=\"italic\">", 0 },
+               { "/abr>", 5, "</span>", 0 },
+               { "b>", 2, "<b>", 0 },
+               { "/b>", 3, "</b>", 0 },
+               { "i>", 2, "<i>", 0  },
+               { "/i>", 3, "</i>", 0 },
+               { "sub>", 4, "<sub>", 0 },
+               { "/sub>", 5, "</sub>", 0},
+               { "sup>", 4, "<sup>", 0},
+               { "/sup>", 5, "</sup>", 0},
+               { "tt>", 3, "<tt>", 0},
+               { "/tt>", 4, "</tt>", 0},
+               { "big>", 4, "<big>", 0},
+               { "/big>", 5, "</big>", 0},
+               { "small>", 6, "<small>", 0},
+               { "/small>", 7, "</small>", 0},
+               { "tr>", 3, "<b>[", 1 },
+               { "/tr>", 4, "]</b>", 1 },
+               { "ex>", 3, "<span foreground=\"violet\">", 0 },
+               { "/ex>", 4, "</span>", 0 },
+               { "/c>", 3, "</span>", 0 },
+               { NULL, 0, NULL },
+       };
+
+       bool is_first_k = true;
+       for (cur_pos = 0; *p && (tag = strchr(p, '<')) != NULL;) {
+               //TODO: do not create chunk
+               std::string chunk(p, tag - p);
+               res += chunk;
+               cur_pos += xml_strlen(chunk);
+
+               p = tag;
+               for (i = 0; replace_arr[i].match_; ++i)
+                       if (strncmp(replace_arr[i].match_, p + 1,
+                                               replace_arr[i].match_len_) == 0) {
+                               res += replace_arr[i].replace_;
+                               p += 1 + replace_arr[i].match_len_;
+                               cur_pos += replace_arr[i].char_len_;
+                               goto cycle_end;
+                       }
+
+               if (strncmp("k>", p + 1, 2) == 0) {
+                       next = strstr(p + 3, "</k>");
+                       if (next) {
+                               if (is_first_k) {
+                                       is_first_k = false;
+                                       if (*(next + 4) == '\n')
+                                               next++;
+                               } else {
+                                       res += "<span foreground=\"blue\">";
+                                       std::string chunk(p+3, next-(p+3));
+                                       res += chunk;
+                                       size_t xml_len = xml_strlen(chunk);
+                                       cur_pos += xml_len;
+                                       res += "</span>";
+                               }
+                               p = next + sizeof("</k>") - 1;
+                       } else
+                               p += sizeof("<k>") - 1;
+               } else if (*(p + 1) == 'c' && (*(p + 2) == ' ' || *(p + 2) == '>')) {
+                       next = strchr(p, '>');
+                       if (!next) {
+                               ++p;
+                               continue;
+                       }
+                       name.assign(p + 1, next - p - 1);
+                       std::string::size_type pos = name.find("c=\"");
+                       if (pos != std::string::npos) {
+                               pos += sizeof("c=\"") - 1;
+                               std::string::size_type end_pos = name.find("\"", pos);
+                               if (end_pos == std::string::npos)
+                                       end_pos = name.length();
+
+                               std::string color(name, pos, end_pos - pos);
+                               if (pango_color_parse(NULL, color.c_str()))
+                                       res += "<span foreground=\"" + color + "\">";
+                               else
+                                       res += "<span>";
+                       } else
+                               res += "<span foreground=\"blue\">";
+                       p = next + 1;
+               } else if (*(p + 1) == 'r' && *(p + 2) == 'r' && *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) {
+                       next = strchr(p, '>');
+                       if (!next) {
+                               ++p;
+                               continue;
+                       }
+                       name.assign(p + 1, next - p - 1);
+                       std::string type;
+                       std::string::size_type pos = name.find("type=\"");
+                       if (pos != std::string::npos) {
+                               pos += sizeof("type=\"") - 1;
+                               std::string::size_type end_pos = name.find("\"", pos);
+                               if (end_pos == std::string::npos)
+                                       end_pos = name.length();
+                               type.assign(name, pos, end_pos - pos);
+                       }
+                       p = next + 1;
+                       next = strstr(p, "</rref>");
+                       if (!next)
+                               continue;
+                       std::string chunk(p, next - p);
+                       p = next + sizeof("</rref>") - 1;
+                       if (type.empty()) {
+                               if (g_str_has_suffix(chunk.c_str(), ".jpg") || g_str_has_suffix(chunk.c_str(), ".png")) {
+                                       type = "image";
+                               } else if (g_str_has_suffix(chunk.c_str(), ".wav") || g_str_has_suffix(chunk.c_str(), ".mp3") || g_str_has_suffix(chunk.c_str(), ".ogg")) {
+                                       type = "sound";
+                               } else if (g_str_has_suffix(chunk.c_str(), ".avi") || g_str_has_suffix(chunk.c_str(), ".mpeg")) {
+                                       type = "video";
+                               } else {
+                                       type = "attach";
+                               }
+                       }
+                       ParseResultItem item;
+                       item.type = ParseResultItemType_link;
+                       item.link = new ParseResultLinkItem;
+                       item.link->pango = res;
+                       item.link->links_list = links_list;
+                       result.item_list.push_back(item);
+                       res.clear();
+                       cur_pos = 0;
+                       links_list.clear();
+                       item.type = ParseResultItemType_res;
+                       item.res = new ParseResultResItem;
+                       item.res->type = type;
+                       item.res->key = chunk;
+                       result.item_list.push_back(item);
+               } else if ((*(p + 1) == 'k' || *(p + 1) == 'i') && *(p + 2) == 'r' && *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) {
+                       bool is_k_or_i = (*(p + 1) == 'k');
+                       next = strchr(p, '>');
+                       if (!next) {
+                               ++p;
+                               continue;
+                       }
+                       name.assign(p + 1, next - p - 1);
+                       std::string key;
+                       std::string::size_type pos;
+                       if (is_k_or_i)
+                               pos = name.find("k=\"");
+                       else
+                               pos = name.find("href=\"");
+                       if (pos != std::string::npos) {
+                               if (is_k_or_i)
+                                       pos += sizeof("k=\"") - 1;
+                               else
+                                       pos += sizeof("href=\"") - 1;
+                               std::string::size_type end_pos = name.find("\"", pos);
+                               if (end_pos == std::string::npos)
+                                       end_pos = name.length();
+                               key.assign(name, pos, end_pos - pos);
+                       }
+
+                       p = next + 1;
+                       if (is_k_or_i)
+                               next = strstr(p, "</kref>");
+                       else
+                               next = strstr(p, "</iref>");
+                       if (!next)
+                               continue;
+
+                       res += "<span foreground=\"blue\" underline=\"single\">";
+                       std::string::size_type link_len = next - p;
+                       std::string chunk(p, link_len);
+                       size_t xml_len = xml_strlen(chunk);
+                       std::string xml_enc;
+                       if (key.empty())
+                               xml_decode(chunk.c_str(), xml_enc);
+                       else
+                               xml_decode(key.c_str(), xml_enc);
+                       std::string link;
+                       if (is_k_or_i)
+                               link = "query://";
+                       link += xml_enc;
+                       links_list.push_back(LinkDesc(cur_pos, xml_len, link));
+                       res += chunk;
+                       cur_pos += xml_len;
+                       res += "</span>";
+                       if (is_k_or_i)
+                               p = next + sizeof("</kref>") - 1;
+                       else
+                               p = next + sizeof("</iref>") - 1;
+               } else {
+                       next = strchr(p+1, '>');
+                       if (!next) {
+                               p++;
+                               res += "&lt;";
+                               cur_pos++;
+                               continue;
+                       }
+                       p = next + 1;
+               }
+cycle_end:
+               ;
+       }
+       res += p;
+       ParseResultItem item;
+       item.type = ParseResultItemType_link;
+       item.link = new ParseResultLinkItem;
+       item.link->pango = res;
+       item.link->links_list = links_list;
+       result.item_list.push_back(item);
+}
+
+static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword)
+{
+       if (*p != 'x')
+               return false;
+       p++;
+       size_t len = strlen(p);
+       if (len) {
+               xdxf2result(p, result);
+       }
+       *parsed_size = 1 + len + 1;
+       return true;
+}
+
+static void configure()
+{
+}
+
+DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj)
+{
+       if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) {
+               g_print("Error: XDXF data parsing plugin version doesn't match!\n");
+               return true;
+       }
+       obj->type = StarDictPlugInType_PARSEDATA;
+       obj->info_xml = g_strdup_printf("<plugin_info><name>%s</name><version>1.0</version><short_desc>%s</short_desc><long_desc>%s</long_desc><author>Hu Zheng &lt;huzheng_001@163.com&gt;</author><website>http://stardict.sourceforge.net</website></plugin_info>", _("XDXF data parsing"), _("XDXF data parsing engine."), _("Parse the XDXF data."));
+       obj->configure_func = configure;
+       return false;
+}
+
+DLLIMPORT void stardict_plugin_exit(void)
+{
+}
+
+DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj)
+{
+       obj->parse_func = parse;
+       g_print(_("XDXF data parsing plug-in loaded.\n"));
+       return false;
+}
+
+#ifdef _WIN32
+BOOL APIENTRY DllMain (HINSTANCE hInst     /* Library instance handle. */ ,
+                       DWORD reason        /* Reason this function is being called. */ ,
+                       LPVOID reserved     /* Not used. */ )
+{
+    switch (reason)
+    {
+      case DLL_PROCESS_ATTACH:
+        break;
+
+      case DLL_PROCESS_DETACH:
+        break;
+
+      case DLL_THREAD_ATTACH:
+        break;
+
+      case DLL_THREAD_DETACH:
+        break;
+    }
+
+    /* Returns TRUE on success, FALSE on failure */
+    return TRUE;
+}
+#endif
diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.dev b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.dev
new file mode 100644 (file)
index 0000000..c54bcfe
--- /dev/null
@@ -0,0 +1,69 @@
+[Project]\r
+FileName=stardict_xdxf_parsedata.dev\r
+Name=stardict_xdxf_parsedata\r
+UnitCount=2\r
+Type=3\r
+Ver=1\r
+ObjFiles=\r
+Includes=\r
+Libs=\r
+PrivateResource=\r
+ResourceIncludes=\r
+MakeIncludes=\r
+Compiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+CppCompiler=-DBUILDING_DLL=1 -I"<INCLUDE>\cairo" -I"<INCLUDE>\gtk-2.0" -I"<INCLUDE>\gtkdeps-2.0" -I"<LIB>\gtk-2.0\include" -I"<INCLUDE>\atk-1.0" -I"<INCLUDE>\pango-1.0" -I"<INCLUDE>\glib-2.0" -I"<LIB>\glib-2.0" -I"<LIB>\glib-2.0\include" -DENABLE_NLS -mno-cygwin -mwindows -mms-bitfields -Wall_@@_\r
+Linker=--no-export-all-symbols --add-stdcall-alias -lcairo -lgtk-win32-2.0 -lgdk-win32-2.0 -latk-1.0 -lgdk_pixbuf-2.0 -lm -lpangowin32-1.0 -lpango-1.0 -lgobject-2.0 -lgmodule-2.0 -lglib-2.0 -lgthread-2.0 -lintl -lwinmm -mno-cygwin -mwindows -mms-bitfields_@@_\r
+IsCpp=1\r
+Icon=\r
+ExeOutput=\r
+ObjectOutput=\r
+OverrideOutput=0\r
+OverrideOutputName=stardict_xdxf_parsedata.dll\r
+HostApplication=\r
+Folders=\r
+CommandLine=\r
+UseCustomMakefile=0\r
+CustomMakefile=\r
+IncludeVersionInfo=0\r
+SupportXPThemes=0\r
+CompilerSet=0\r
+CompilerSettings=0000000000000000000100\r
+\r
+[Unit1]\r
+FileName=stardict_xdxf_parsedata.cpp\r
+CompileCpp=1\r
+Folder=stardict_xdxf_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[Unit2]\r
+FileName=stardict_xdxf_parsedata.h\r
+CompileCpp=1\r
+Folder=stardict_xdxf_parsedata\r
+Compile=1\r
+Link=1\r
+Priority=1000\r
+OverrideBuildCmd=0\r
+BuildCmd=\r
+\r
+[VersionInfo]\r
+Major=0\r
+Minor=1\r
+Release=1\r
+Build=1\r
+LanguageID=1033\r
+CharsetID=1252\r
+CompanyName=\r
+FileVersion=\r
+FileDescription=Developed using the Dev-C++ IDE\r
+InternalName=\r
+LegalCopyright=\r
+LegalTrademarks=\r
+OriginalFilename=\r
+ProductName=\r
+ProductVersion=\r
+AutoIncBuildNr=0\r
+\r
diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.h b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.h
new file mode 100644 (file)
index 0000000..6f837e6
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _STARDICT_XDXF_PARSEDATA_PLUGIN_H_
+#define _STARDICT_XDXF_PARSEDATA_PLUGIN_H_
+
+#ifdef _WIN32
+#if BUILDING_DLL
+# define DLLIMPORT __declspec (dllexport)
+#else /* Not BUILDING_DLL */
+# define DLLIMPORT __declspec (dllimport)
+#endif /* Not BUILDING_DLL */
+#else
+# define DLLIMPORT
+#endif
+
+#include "../../src/lib/plugin.h"
+#include "../../src/lib/parsedata_plugin.h"
+
+extern "C" {
+       DLLIMPORT extern bool stardict_plugin_init(StarDictPlugInObject *obj);
+       DLLIMPORT extern void stardict_plugin_exit(void);
+       DLLIMPORT extern bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj);
+}
+
+#endif
diff --git a/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.v b/stardict-plugins/stardict-xdxf-parsedata-plugin/stardict_xdxf_parsedata.v
new file mode 100644 (file)
index 0000000..683abb3
--- /dev/null
@@ -0,0 +1,10 @@
+{
+       global:
+               extern "C" {
+                       stardict_plugin_init;
+                       stardict_plugin_exit;
+                       stardict_parsedata_plugin_init;
+               };
+       local:
+               *;
+};