Added CSV parsing and export of Symbian-format Event logs that have had their tables...
[qwerkisync] / CSV.cpp
diff --git a/CSV.cpp b/CSV.cpp
new file mode 100644 (file)
index 0000000..0fe13ea
--- /dev/null
+++ b/CSV.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2011, Jamie Thompson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "CSV.h"
+
+#include <QDebug>
+
+#include <QFile>
+#include <QHash>
+#include <QString>
+#include <QTextStream>
+
+class SortByValueDesc
+{
+public:
+       inline bool operator()(const QPair<QChar, uint> &a, const QPair<QChar, uint> &b) const
+       {
+               return b.second < a.second;
+       }
+};
+
+CSV::CSV()
+       : m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
+{
+}
+
+CSV::CSV(QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
+       : m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
+{
+       Delimiter(delimiter);
+       NumColumnsPerRecord(numColumnsPerRecord);
+
+       UpdateHeadings(headingIndices);
+
+       IsValid(true);
+}
+
+CSV::~CSV()
+{
+}
+
+void CSV::Open(QFile &file)
+{
+       // Ready the file...
+       LineNumber(0);
+       RecordNumber(0);
+       File(&file);
+       File()->seek(0);
+
+       // Read the first line
+       Stream(new QTextStream(&file));
+
+       // Set up the properties...
+       if(!IsValid())
+       {
+               QString firstLineContent(Stream()->readLine());
+               DetermineDelimiter(firstLineContent);
+               GetHeadings(firstLineContent);
+       }
+       // We accept we've already done the hard work, so advance to the first
+       // actual record (i.e. the 2nd row)
+       else
+               ReadRecord();
+}
+
+void CSV::Open(QFile &file, QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
+{
+       // Set up the properties...
+       Delimiter(delimiter);
+       NumColumnsPerRecord(numColumnsPerRecord);
+       UpdateHeadings(headingIndices);
+       IsValid(true);
+
+       // Ready the file...
+       File(&file);
+       File()->seek(0);
+
+       // Advance to the first actual record (i.e. the 2nd row)
+       ReadRecord();
+}
+
+void CSV::Close()
+{
+       IsValid(false);
+       File(NULL);
+}
+
+bool CSV::AtEnd() const
+{
+       return Stream()->atEnd();
+}
+
+QHash<QString, QString> CSV::ReadRecord()
+{
+       // If we have something more to read...
+       if(LineValues().count() < NumColumnsPerRecord() && !Stream()->atEnd())
+       {
+               // ...read a line's worth but make sure we have enough columns (i.e. handle newlines in values)
+               while(LineValues().count() < NumColumnsPerRecord())
+               {
+                       QStringList nextValues(QString(Stream()->readLine()).split(Delimiter()));
+                       if(LineValues().count() > 0)
+                       {
+                               // Merge the first value of the next line with the last of the previous...
+                               LineValues().last().append('\n');
+                               nextValues.removeAt(0);
+                       }
+                       LineValues().append(nextValues);
+                       ++LineNumber();
+               }
+       }
+
+       // The extract enough values to complete a record
+       QHash<QString, QString> recordValues;
+       for(int i(NumColumnsPerRecord() - 1); i >= 0 && LineValues().count() >= 0; --i)
+       {
+               recordValues.insert(HeadingNames().value(i), LineValues().value(i));
+               LineValues().removeAt(i);
+       }
+       return recordValues;
+}
+
+void CSV::GetHeadings(const QString &firstLineContent)
+{
+       QStringList headingsRaw(QString(firstLineContent).split(Delimiter(), QString::KeepEmptyParts, Qt::CaseSensitive));
+
+       // We have this many fields per record
+       NumColumnsPerRecord(headingsRaw.count());
+
+       // Grab each column heading, and tidy it up.
+       ColumnIndicesHash indices;
+       indices.reserve(headingsRaw.count());
+       for(QStringList::size_type i(0); i < headingsRaw.count(); ++i)
+       {
+               QString heading(ExtractString(headingsRaw.value(i)));
+               qDebug() << headingsRaw.value(i) << " : " << heading;
+
+               indices[heading] = i;
+       }
+
+       UpdateHeadings(indices);
+}
+
+const QStringList CSV::HasRequiredHeadings(const QStringList &requiredHeadings)
+{
+       QStringList missingRequiredHeadings(requiredHeadings);
+
+       // Check over the required headings
+       foreach(const QString requiredHeading, requiredHeadings)
+       {
+               if(HeadingIndices().contains(requiredHeading.toLower()))
+                       missingRequiredHeadings.removeOne(requiredHeading);
+       }
+
+       return missingRequiredHeadings;
+}
+
+void CSV::DetermineDelimiter(const QString &firstLineContent)
+{
+       // Count the non-alphanumeric characters used
+       QHash<QChar, uint> counts;
+       foreach(const QChar c, firstLineContent)
+               ++counts[c];
+
+       QList<QPair<QChar, uint> > orderedCounts;
+       orderedCounts.reserve(counts.size());
+       foreach(const QChar c, counts.keys())
+               if(!QChar(c).isLetterOrNumber())
+                       orderedCounts.append(QPair<QChar, uint>(c, counts.value(c)));
+
+       qSort(orderedCounts.begin(), orderedCounts.end(), SortByValueDesc());
+
+       // Work around Q_FOREACH macro limitation when dealing with
+       // multi-typed templates (comma issue)
+       typedef QPair<QChar, uint> bodge;
+       foreach(bodge count, orderedCounts)
+               qDebug() << count.first << " = " << count.second;
+
+       // No-one would be mad enough to use quotation marks or apostrophes
+       // as their delimiter,but just in case, check the second most
+       // frequent character is present the right number of times for
+       // the quotation marks to be present on every column heading (two
+       // per heading, less one as they're seperators)
+       if((orderedCounts.value(0).first == '"' || orderedCounts.value(0).first == '\'')
+               && ((orderedCounts.value(0).second / 2) - 1 == orderedCounts.value(1).second ))
+       {
+               // We're good.
+               Delimiter(orderedCounts.value(1).first);
+       }
+       else
+               Delimiter(orderedCounts.value(0).first);
+}
+
+const QString CSV::ExtractString(const QString &originalString)
+{
+       QRegExp content("^[\"\']?(.*)?[\"\']?$");
+       content.indexIn(originalString.trimmed());
+       return content.cap(1);
+}
+
+void CSV::UpdateHeadings(const ColumnIndicesHash &headingIndices)
+{
+       HeadingIndices().clear();
+       HeadingIndices().reserve(headingIndices.count());
+       foreach(QString columnName, headingIndices.keys())
+               HeadingIndices().insert(columnName.toLower(), headingIndices.value(columnName));
+
+       // ..and prepare the bidirectional hash (toLower not needed as above
+       // value reused)
+       HeadingNames().clear();
+       HeadingNames().reserve(headingIndices.count());
+       foreach(QString columnName, HeadingIndices().keys())
+               HeadingNames().insert(HeadingIndices().value(columnName), columnName);
+}