2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file copying.txt for copying permission.
20 /* This ensures proper sorting. */
22 #define NSSEP T('\001')
24 static void characterData(void *userData, const XML_Char *s, int len)
27 for (; len > 0; --len, ++s) {
30 fputts(T("&"), fp);
33 fputts(T("<"), fp);
36 fputts(T(">"), fp);
40 fputts(T("
"), fp);
44 fputts(T("""), fp);
49 ftprintf(fp, T("&#%d;"), *s);
59 static void attributeValue(FILE *fp, const XML_Char *s)
70 fputts(T("&"), fp);
73 fputts(T("<"), fp);
76 fputts(T("""), fp);
80 fputts(T("	"), fp);
83 fputts(T("
"), fp);
86 fputts(T("
"), fp);
90 fputts(T(">"), fp);
95 ftprintf(fp, T("&#%d;"), *s);
106 /* Lexicographically comparing UTF-8 encoded attribute values,
107 is equivalent to lexicographically comparing based on the character number. */
109 static int attcmp(const void *att1, const void *att2)
111 return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
114 static void startElement(void *userData, const XML_Char *name, const XML_Char **atts)
125 nAtts = (p - atts) >> 1;
127 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
131 attributeValue(fp, *atts);
137 static void endElement(void *userData, const XML_Char *name)
146 static int nsattcmp(const void *p1, const void *p2)
148 const XML_Char *att1 = *(const XML_Char **)p1;
149 const XML_Char *att2 = *(const XML_Char **)p2;
150 int sep1 = (tcsrchr(att1, NSSEP) != 0);
151 int sep2 = (tcsrchr(att1, NSSEP) != 0);
154 return tcscmp(att1, att2);
157 static void startElementNS(void *userData, const XML_Char *name, const XML_Char **atts)
166 sep = tcsrchr(name, NSSEP);
168 fputts(T("n1:"), fp);
170 fputts(T(" xmlns:n1"), fp);
171 attributeValue(fp, name);
182 nAtts = (p - atts) >> 1;
184 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
187 sep = tcsrchr(name, NSSEP);
190 ftprintf(fp, T("n%d:"), nsi);
195 attributeValue(fp, *atts);
197 ftprintf(fp, T(" xmlns:n%d"), nsi++);
198 attributeValue(fp, name);
205 static void endElementNS(void *userData, const XML_Char *name)
211 sep = tcsrchr(name, NSSEP);
213 fputts(T("n1:"), fp);
223 static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data)
235 #endif /* not W3C14N */
237 static void defaultCharacterData(XML_Parser parser, const XML_Char *s, int len)
239 XML_DefaultCurrent(parser);
242 static void defaultStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
244 XML_DefaultCurrent(parser);
247 static void defaultEndElement(XML_Parser parser, const XML_Char *name)
249 XML_DefaultCurrent(parser);
252 static void defaultProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
254 XML_DefaultCurrent(parser);
257 static void nopCharacterData(XML_Parser parser, const XML_Char *s, int len)
261 static void nopStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
265 static void nopEndElement(XML_Parser parser, const XML_Char *name)
269 static void nopProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
273 static void markup(XML_Parser parser, const XML_Char *s, int len)
275 FILE *fp = XML_GetUserData(parser);
276 for (; len > 0; --len, ++s)
281 void metaLocation(XML_Parser parser)
283 const XML_Char *uri = XML_GetBase(parser);
285 ftprintf(XML_GetUserData(parser), T(" uri=\"%s\""), uri);
286 ftprintf(XML_GetUserData(parser),
287 T(" byte=\"%ld\" nbytes=\"%d\" line=\"%d\" col=\"%d\""),
288 XML_GetCurrentByteIndex(parser),
289 XML_GetCurrentByteCount(parser),
290 XML_GetCurrentLineNumber(parser),
291 XML_GetCurrentColumnNumber(parser));
295 void metaStartDocument(XML_Parser parser)
297 fputts(T("<document>\n"), XML_GetUserData(parser));
301 void metaEndDocument(XML_Parser parser)
303 fputts(T("</document>\n"), XML_GetUserData(parser));
307 void metaStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
309 FILE *fp = XML_GetUserData(parser);
310 const XML_Char **specifiedAttsEnd
311 = atts + XML_GetSpecifiedAttributeCount(parser);
312 const XML_Char **idAttPtr;
313 int idAttIndex = XML_GetIdAttributeIndex(parser);
317 idAttPtr = atts + idAttIndex;
319 ftprintf(fp, T("<starttag name=\"%s\""), name);
320 metaLocation(parser);
322 fputts(T(">\n"), fp);
324 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
325 characterData(fp, atts[1], tcslen(atts[1]));
326 if (atts >= specifiedAttsEnd)
327 fputts(T("\" defaulted=\"yes\"/>\n"), fp);
328 else if (atts == idAttPtr)
329 fputts(T("\" id=\"yes\"/>\n"), fp);
331 fputts(T("\"/>\n"), fp);
332 } while (*(atts += 2));
333 fputts(T("</starttag>\n"), fp);
336 fputts(T("/>\n"), fp);
340 void metaEndElement(XML_Parser parser, const XML_Char *name)
342 FILE *fp = XML_GetUserData(parser);
343 ftprintf(fp, T("<endtag name=\"%s\""), name);
344 metaLocation(parser);
345 fputts(T("/>\n"), fp);
349 void metaProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
351 FILE *fp = XML_GetUserData(parser);
352 ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
353 characterData(fp, data, tcslen(data));
355 metaLocation(parser);
356 fputts(T("/>\n"), fp);
360 void metaComment(XML_Parser parser, const XML_Char *data)
362 FILE *fp = XML_GetUserData(parser);
363 fputts(T("<comment data=\""), fp);
364 characterData(fp, data, tcslen(data));
366 metaLocation(parser);
367 fputts(T("/>\n"), fp);
371 void metaStartCdataSection(XML_Parser parser)
373 FILE *fp = XML_GetUserData(parser);
374 fputts(T("<startcdata"), fp);
375 metaLocation(parser);
376 fputts(T("/>\n"), fp);
380 void metaEndCdataSection(XML_Parser parser)
382 FILE *fp = XML_GetUserData(parser);
383 fputts(T("<endcdata"), fp);
384 metaLocation(parser);
385 fputts(T("/>\n"), fp);
389 void metaCharacterData(XML_Parser parser, const XML_Char *s, int len)
391 FILE *fp = XML_GetUserData(parser);
392 fputts(T("<chars str=\""), fp);
393 characterData(fp, s, len);
395 metaLocation(parser);
396 fputts(T("/>\n"), fp);
400 void metaStartDoctypeDecl(XML_Parser parser, const XML_Char *doctypeName)
402 FILE *fp = XML_GetUserData(parser);
403 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
404 metaLocation(parser);
405 fputts(T("/>\n"), fp);
409 void metaEndDoctypeDecl(XML_Parser parser)
411 FILE *fp = XML_GetUserData(parser);
412 fputts(T("<enddoctype"), fp);
413 metaLocation(parser);
414 fputts(T("/>\n"), fp);
418 void metaUnparsedEntityDecl(XML_Parser parser,
419 const XML_Char *entityName,
420 const XML_Char *base,
421 const XML_Char *systemId,
422 const XML_Char *publicId,
423 const XML_Char *notationName)
425 FILE *fp = XML_GetUserData(parser);
426 ftprintf(fp, T("<entity name=\"%s\""), entityName);
428 ftprintf(fp, T(" public=\"%s\""), publicId);
429 fputts(T(" system=\""), fp);
430 characterData(fp, systemId, tcslen(systemId));
432 ftprintf(fp, T(" notation=\"%s\""), notationName);
433 metaLocation(parser);
434 fputts(T("/>\n"), fp);
438 void metaNotationDecl(XML_Parser parser,
439 const XML_Char *notationName,
440 const XML_Char *base,
441 const XML_Char *systemId,
442 const XML_Char *publicId)
444 FILE *fp = XML_GetUserData(parser);
445 ftprintf(fp, T("<notation name=\"%s\""), notationName);
447 ftprintf(fp, T(" public=\"%s\""), publicId);
449 fputts(T(" system=\""), fp);
450 characterData(fp, systemId, tcslen(systemId));
453 metaLocation(parser);
454 fputts(T("/>\n"), fp);
459 void metaExternalParsedEntityDecl(XML_Parser parser,
460 const XML_Char *entityName,
461 const XML_Char *base,
462 const XML_Char *systemId,
463 const XML_Char *publicId)
465 FILE *fp = XML_GetUserData(parser);
466 ftprintf(fp, T("<entity name=\"%s\""), entityName);
468 ftprintf(fp, T(" public=\"%s\""), publicId);
469 fputts(T(" system=\""), fp);
470 characterData(fp, systemId, tcslen(systemId));
472 metaLocation(parser);
473 fputts(T("/>\n"), fp);
477 void metaInternalParsedEntityDecl(XML_Parser parser,
478 const XML_Char *entityName,
479 const XML_Char *text,
482 FILE *fp = XML_GetUserData(parser);
483 ftprintf(fp, T("<entity name=\"%s\""), entityName);
484 metaLocation(parser);
486 characterData(fp, text, textLen);
487 fputts(T("</entity/>\n"), fp);
491 void metaStartNamespaceDecl(XML_Parser parser,
492 const XML_Char *prefix,
495 FILE *fp = XML_GetUserData(parser);
496 fputts(T("<startns"), fp);
498 ftprintf(fp, T(" prefix=\"%s\""), prefix);
500 fputts(T(" ns=\""), fp);
501 characterData(fp, uri, tcslen(uri));
502 fputts(T("\"/>\n"), fp);
505 fputts(T("/>\n"), fp);
509 void metaEndNamespaceDecl(XML_Parser parser, const XML_Char *prefix)
511 FILE *fp = XML_GetUserData(parser);
513 fputts(T("<endns/>\n"), fp);
515 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
519 int unknownEncodingConvert(void *data, const char *p)
521 return codepageConvert(*(int *)data, p);
525 int unknownEncoding(void *userData,
526 const XML_Char *name,
530 static const XML_Char prefixL[] = T("windows-");
531 static const XML_Char prefixU[] = T("WINDOWS-");
534 for (i = 0; prefixU[i]; i++)
535 if (name[i] != prefixU[i] && name[i] != prefixL[i])
539 for (; name[i]; i++) {
540 static const XML_Char digits[] = T("0123456789");
541 const XML_Char *s = tcschr(digits, name[i]);
549 if (!codepageMap(cp, info->map))
551 info->convert = unknownEncodingConvert;
552 /* We could just cast the code page integer to a void *,
553 and avoid the use of release. */
554 info->release = free;
555 info->data = malloc(sizeof(int));
558 *(int *)info->data = cp;
563 int notStandalone(void *userData)
569 void usage(const XML_Char *prog)
571 ftprintf(stderr, T("usage: %s [-n] [-p] [-r] [-s] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog);
575 int tmain(int argc, XML_Char **argv)
578 const XML_Char *outputDir = 0;
579 const XML_Char *encoding = 0;
580 unsigned processFlags = XML_MAP_FILE;
581 int windowsCodePages = 0;
583 int useNamespaces = 0;
584 int requireStandalone = 0;
585 int paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
588 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
595 if (argv[i][0] != T('-'))
597 if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
603 switch (argv[i][j]) {
605 processFlags &= ~XML_MAP_FILE;
609 requireStandalone = 1;
617 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
620 processFlags |= XML_EXTERNAL_ENTITIES;
624 windowsCodePages = 1;
641 if (argv[i][j + 1] == T('\0')) {
647 outputDir = argv[i] + j + 1;
652 if (argv[i][j + 1] == T('\0')) {
658 encoding = argv[i] + j + 1;
675 for (; i < argc; i++) {
677 XML_Char *outName = 0;
681 parser = XML_ParserCreateNS(encoding, NSSEP);
683 parser = XML_ParserCreate(encoding);
684 if (requireStandalone)
685 XML_SetNotStandaloneHandler(parser, notStandalone);
686 XML_SetParamEntityParsing(parser, paramEntityParsing);
687 if (outputType == 't') {
688 /* This is for doing timings; this gives a more realistic estimate of
691 XML_SetElementHandler(parser, nopStartElement, nopEndElement);
692 XML_SetCharacterDataHandler(parser, nopCharacterData);
693 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
695 else if (outputDir) {
696 const XML_Char *file = argv[i];
697 if (tcsrchr(file, T('/')))
698 file = tcsrchr(file, T('/')) + 1;
700 if (tcsrchr(file, T('\\')))
701 file = tcsrchr(file, T('\\')) + 1;
703 outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char));
704 tcscpy(outName, outputDir);
705 tcscat(outName, T("/"));
706 tcscat(outName, file);
707 fp = tfopen(outName, T("wb"));
712 setvbuf(fp, NULL, _IOFBF, 16384);
716 XML_SetUserData(parser, fp);
717 switch (outputType) {
719 XML_UseParserAsHandlerArg(parser);
720 XML_SetElementHandler(parser, metaStartElement, metaEndElement);
721 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
722 XML_SetCommentHandler(parser, metaComment);
723 XML_SetCdataSectionHandler(parser, metaStartCdataSection, metaEndCdataSection);
724 XML_SetCharacterDataHandler(parser, metaCharacterData);
725 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl, metaEndDoctypeDecl);
726 XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl);
727 XML_SetNotationDeclHandler(parser, metaNotationDecl);
728 XML_SetExternalParsedEntityDeclHandler(parser, metaExternalParsedEntityDecl);
729 XML_SetInternalParsedEntityDeclHandler(parser, metaInternalParsedEntityDecl);
730 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, metaEndNamespaceDecl);
731 metaStartDocument(parser);
734 XML_UseParserAsHandlerArg(parser);
735 XML_SetDefaultHandler(parser, markup);
736 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
737 XML_SetCharacterDataHandler(parser, defaultCharacterData);
738 XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction);
742 XML_SetElementHandler(parser, startElementNS, endElementNS);
744 XML_SetElementHandler(parser, startElement, endElement);
745 XML_SetCharacterDataHandler(parser, characterData);
747 XML_SetProcessingInstructionHandler(parser, processingInstruction);
748 #endif /* not W3C14N */
752 if (windowsCodePages)
753 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
754 result = XML_ProcessFile(parser, argv[i], processFlags);
756 if (outputType == 'm')
757 metaEndDocument(parser);
763 XML_ParserFree(parser);