29#include <QtCore/private/qglobal_p.h>
30#include <qxmlstream.h>
33#include <qstringconverter.h>
37#ifndef QXMLSTREAMPARSER_P_H
38#define QXMLSTREAMPARSER_P_H
42#if QT_CONFIG(xmlstreamreader)
44bool QXmlStreamReaderPrivate::parse()
48 using namespace Qt::StringLiterals;
51 case QXmlStreamReader::StartElement:
54 qualifiedName.clear();
56 publicNamespaceDeclarations.clear();
59 setType(QXmlStreamReader::EndElement);
60 Tag tag = tagStack_pop();
61 namespaceUri = tag.namespaceDeclaration.namespaceUri;
62 prefix = tag.namespaceDeclaration.prefix;
64 qualifiedName = tag.qualifiedName;
65 isEmptyElement =
false;
70 case QXmlStreamReader::EndElement:
73 qualifiedName.clear();
77 case QXmlStreamReader::DTD:
78 publicNotationDeclarations.clear();
79 publicEntityDeclarations.clear();
84 case QXmlStreamReader::Comment:
85 case QXmlStreamReader::Characters:
91 case QXmlStreamReader::EntityReference:
96 case QXmlStreamReader::ProcessingInstruction:
97 processingInstructionTarget.clear();
98 processingInstructionData.clear();
101 case QXmlStreamReader::NoToken:
102 case QXmlStreamReader::Invalid:
104 case QXmlStreamReader::StartDocument:
106 documentVersion.clear();
107 documentEncoding.clear();
108 if (decoder.isValid() && decoder.hasError()) {
109 raiseWellFormedError(QXmlStream::tr(
"Encountered incorrectly encoded content."));
119 setType(QXmlStreamReader::NoToken);
125 if (resumeReduction) {
126 act = state_stack[tos-1];
129 goto ResumeReduction;
132 act = state_stack[tos];
135 if (token == -1 && - TERMINAL_COUNT != action_index[act]) {
138 token_char = cu == ~0U ? cu : ushort(cu);
139 if ((cu != ~0U) && (cu & 0xff0000)) {
141 }
else switch (token_char) {
149 if ((token_char = filterCarriageReturn())) {
151 lastLineStart = characterOffset + readBufferPos;
160 if (!tagsDone && !inParseEntity) {
161 int a = t_action(act, token);
163 raiseError(QXmlStreamReader::PrematureEndOfDocumentError);
171 lastLineStart = characterOffset + readBufferPos;
241 token = QUESTIONMARK;
267 act = t_action (act, token);
268 if (act == ACCEPT_STATE) {
271 state_stack[tos++] = 0;
272 state_stack[tos] = 0;
274 }
else if (act > 0) {
275 if (++tos >= stack_size-1)
278 Value &val = sym_stack[tos];
280 val.pos = textBuffer.size();
284 textBuffer += QChar(token_char);
286 state_stack[tos] = act;
290 }
else if (act < 0) {
293#if defined (QLALR_DEBUG)
294 int ridx = rule_index[r];
295 printf (
"%3d) %s ::=", r + 1, spell[rule_info[ridx]]);
297 for (
int i = ridx; i < ridx + rhs[r]; ++i) {
298 int symbol = rule_info[i];
299 if (
const char *name = spell[symbol])
300 printf (
" %s", name);
302 printf (
" #%d", symbol);
308 act = state_stack[tos++];
313 setType(QXmlStreamReader::EndDocument);
317 if (type != QXmlStreamReader::Invalid) {
318 if (hasSeenTag || inParseEntity) {
319 setType(QXmlStreamReader::EndDocument);
321 raiseError(QXmlStreamReader::NotWellFormedError, QXmlStream::tr(
"Start tag expected."));
324 state_stack[tos++] = 0;
325 state_stack[tos] = 0;
332 auto reference = entityReferenceStack.pop();
333 auto it = reference.hash->find(reference.name);
334 Q_ASSERT(it != reference.hash->end());
335 it->isCurrentlyReferenced =
false;
336 if (entityReferenceStack.isEmpty())
342 if (!scanString(spell[VERSION], VERSION,
false) && atEnd) {
349 setType(QXmlStreamReader::StartDocument);
350 documentVersion = symString(6);
355 hasExternalDtdSubset =
true;
356 dtdSystemId = symString(2);
360 checkPublicLiteral(symString(2));
361 dtdPublicId = symString(2);
362 dtdSystemId = symString(4);
363 hasExternalDtdSubset =
true;
367 if (!scanPublicOrSystem() && atEnd) {
371 dtdName = symString(3);
376 dtdName = symString(3);
381 setType(QXmlStreamReader::DTD);
394 if (!scanString(spell[EMPTY], EMPTY,
false)
395 && !scanString(spell[ANY], ANY,
false)
403 if (!scanString(spell[PCDATA], PCDATA,
false) && atEnd) {
410 lastAttributeIsCData =
true;
414 if (!scanAfterDefaultDecl() && atEnd) {
422 lastAttributeValue.clear();
423 lastAttributeIsCData =
false;
424 if (!scanAttType() && atEnd) {
431 DtdAttribute &dtdAttribute = dtdAttributes.push();
432 dtdAttribute.tagName.clear();
433 dtdAttribute.isCDATA = lastAttributeIsCData;
434 dtdAttribute.attributePrefix = addToStringStorage(symPrefix(1));
435 dtdAttribute.attributeName = addToStringStorage(symString(1));
436 dtdAttribute.attributeQualifiedName = addToStringStorage(symName(1));
437 dtdAttribute.isNamespaceAttribute = (dtdAttribute.attributePrefix ==
"xmlns"_L1
438 || (dtdAttribute.attributePrefix.isEmpty()
439 && dtdAttribute.attributeName ==
"xmlns"_L1));
440 if (lastAttributeValue.isNull()) {
441 dtdAttribute.defaultValue.clear();
443 if (dtdAttribute.isCDATA)
444 dtdAttribute.defaultValue = addToStringStorage(lastAttributeValue);
446 dtdAttribute.defaultValue = addToStringStorage(lastAttributeValue.toString().simplified());
452 if (referenceToUnparsedEntityDetected && !standalone)
454 qsizetype n = dtdAttributes.size();
455 XmlStringRef tagName = addToStringStorage(symName(3));
457 DtdAttribute &dtdAttribute = dtdAttributes[n];
458 if (!dtdAttribute.tagName.isNull())
460 dtdAttribute.tagName = tagName;
461 for (qsizetype i = 0; i < n; ++i) {
462 if ((dtdAttributes[i].tagName.isNull() || dtdAttributes[i].tagName == tagName)
463 && dtdAttributes[i].attributeQualifiedName == dtdAttribute.attributeQualifiedName) {
464 dtdAttribute.attributeQualifiedName.clear();
472 if (!scanPublicOrSystem() && atEnd) {
476 EntityDeclaration &entityDeclaration = entityDeclarations.push();
477 entityDeclaration.clear();
478 entityDeclaration.name = symString(3);
482 if (!scanPublicOrSystem() && atEnd) {
486 EntityDeclaration &entityDeclaration = entityDeclarations.push();
487 entityDeclaration.clear();
488 entityDeclaration.name = symString(5);
489 entityDeclaration.parameter =
true;
493 if (!scanNData() && atEnd) {
497 EntityDeclaration &entityDeclaration = entityDeclarations.top();
498 entityDeclaration.systemId = symString(3);
499 entityDeclaration.external =
true;
503 if (!scanNData() && atEnd) {
507 EntityDeclaration &entityDeclaration = entityDeclarations.top();
508 checkPublicLiteral((entityDeclaration.publicId = symString(3)));
509 entityDeclaration.systemId = symString(5);
510 entityDeclaration.external =
true;
514 EntityDeclaration &entityDeclaration = entityDeclarations.top();
515 entityDeclaration.notationName = symString(3);
516 if (entityDeclaration.parameter)
517 raiseWellFormedError(QXmlStream::tr(
"NDATA in parameter entity declaration."));
523 if (referenceToUnparsedEntityDetected && !standalone) {
524 entityDeclarations.pop();
527 EntityDeclaration &entityDeclaration = entityDeclarations.top();
528 if (!entityDeclaration.external)
529 entityDeclaration.value = symString(2);
530 auto &hash = entityDeclaration.parameter ? parameterEntityHash : entityHash;
531 if (!hash.contains(entityDeclaration.name)) {
532 Entity entity(entityDeclaration.name.toString(),
533 entityDeclaration.value.toString());
534 entity.unparsed = (!entityDeclaration.notationName.isNull());
535 entity.external = entityDeclaration.external;
536 hash.insert(qToStringViewIgnoringNull(entity.name), entity);
541 setType(QXmlStreamReader::ProcessingInstruction);
542 const qsizetype pos = sym(4).pos + sym(4).len;
543 processingInstructionTarget = symString(3);
544 if (scanUntil(
"?>")) {
545 processingInstructionData = XmlStringRef(&textBuffer, pos, textBuffer.size() - pos - 2);
546 if (!processingInstructionTarget.view().compare(
"xml"_L1, Qt::CaseInsensitive)) {
547 raiseWellFormedError(QXmlStream::tr(
"XML declaration not at start of document."));
549 else if (!QXmlUtils::isNCName(processingInstructionTarget))
550 raiseWellFormedError(QXmlStream::tr(
"%1 is an invalid processing instruction name.")
551 .arg(processingInstructionTarget));
552 }
else if (type != QXmlStreamReader::Invalid){
559 setType(QXmlStreamReader::ProcessingInstruction);
560 processingInstructionTarget = symString(3);
561 if (!processingInstructionTarget.view().compare(
"xml"_L1, Qt::CaseInsensitive))
562 raiseWellFormedError(QXmlStream::tr(
"Invalid processing instruction name."));
566 if (!scanAfterLangleBang() && atEnd) {
573 if (!scanUntil(
"--")) {
580 setType(QXmlStreamReader::Comment);
581 const qsizetype pos = sym(1).pos + 4;
582 text = XmlStringRef(&textBuffer, pos, textBuffer.size() - pos - 3);
586 setType(QXmlStreamReader::Characters);
588 isWhitespace =
false;
589 const qsizetype pos = sym(2).pos;
590 if (scanUntil(
"]]>", -1)) {
591 text = XmlStringRef(&textBuffer, pos, textBuffer.size() - pos - 3);
599 if (!scanPublicOrSystem() && atEnd) {
603 NotationDeclaration ¬ationDeclaration = notationDeclarations.push();
604 notationDeclaration.name = symString(3);
608 NotationDeclaration ¬ationDeclaration = notationDeclarations.top();
609 notationDeclaration.systemId = symString(3);
610 notationDeclaration.publicId.clear();
614 NotationDeclaration ¬ationDeclaration = notationDeclarations.top();
615 notationDeclaration.systemId.clear();
616 checkPublicLiteral((notationDeclaration.publicId = symString(3)));
620 NotationDeclaration ¬ationDeclaration = notationDeclarations.top();
621 checkPublicLiteral((notationDeclaration.publicId = symString(3)));
622 notationDeclaration.systemId = symString(5);
626 isWhitespace =
false;
630 sym(1).len += fastScanContentCharList();
631 if (atEnd && !inParseEntity) {
638 if (!textBuffer.isEmpty()) {
639 setType(QXmlStreamReader::Characters);
658 sym(1).len += sym(2).len;
662 if (normalizeLiterals)
663 textBuffer.data()[textBuffer.size()-1] = u' ';
667 sym(1).len += fastScanLiteralContent();
675 if (!QXmlUtils::isPublicID(symString(1))) {
676 raiseWellFormedError(QXmlStream::tr(
"%1 is an invalid PUBLIC identifier.").arg(symString(1)));
696 sym(1).len += sym(2).len;
707 lastAttributeValue = symString(1);
714 sym(1).len += sym(2).len;
718 const XmlStringRef prfx = symPrefix(1);
719 if (prfx.isEmpty() && symString(1) ==
"xmlns"_L1 && namespaceProcessing) {
720 NamespaceDeclaration &namespaceDeclaration = namespaceDeclarations.push();
721 namespaceDeclaration.prefix.clear();
723 const XmlStringRef ns(symString(5));
724 if (ns.view() ==
"http://www.w3.org/2000/xmlns/"_L1 ||
725 ns.view() ==
"http://www.w3.org/XML/1998/namespace"_L1)
726 raiseWellFormedError(QXmlStream::tr(
"Illegal namespace declaration."));
728 namespaceDeclaration.namespaceUri = addToStringStorage(ns);
730 Attribute &attribute = attributeStack.push();
731 attribute.key = sym(1);
732 attribute.value = sym(5);
734 XmlStringRef attributeQualifiedName = symName(1);
735 bool normalize =
false;
736 for (
const DtdAttribute &dtdAttribute : std::as_const(dtdAttributes)) {
737 if (!dtdAttribute.isCDATA
738 && dtdAttribute.tagName == qualifiedName
739 && dtdAttribute.attributeQualifiedName == attributeQualifiedName
747 const qsizetype pos = textBuffer.size();
749 bool wasSpace =
true;
750 for (qsizetype i = 0; i < attribute.value.len; ++i) {
751 QChar c = textBuffer.at(attribute.value.pos + i);
752 if (c.unicode() ==
' ') {
759 textBuffer += textBuffer.at(attribute.value.pos + i);
763 while (n && textBuffer.at(pos + n - 1).unicode() ==
' ')
765 attribute.value.pos = pos;
766 attribute.value.len = n;
768 if (prfx ==
"xmlns"_L1 && namespaceProcessing) {
769 NamespaceDeclaration &namespaceDeclaration = namespaceDeclarations.push();
770 XmlStringRef namespacePrefix = symString(attribute.key);
771 XmlStringRef namespaceUri = symString(attribute.value);
772 attributeStack.pop();
773 if (((namespacePrefix ==
"xml"_L1)
774 ^ (namespaceUri ==
"http://www.w3.org/XML/1998/namespace"_L1))
775 || namespaceUri ==
"http://www.w3.org/2000/xmlns/"_L1
776 || namespaceUri.isEmpty()
777 || namespacePrefix ==
"xmlns"_L1)
778 raiseWellFormedError(QXmlStream::tr(
"Illegal namespace declaration."));
780 namespaceDeclaration.prefix = addToStringStorage(namespacePrefix);
781 namespaceDeclaration.namespaceUri = addToStringStorage(namespaceUri);
787 normalizeLiterals =
true;
788 Tag &tag = tagStack_push();
789 prefix = tag.namespaceDeclaration.prefix = addToStringStorage(symPrefix(2));
790 name = tag.name = addToStringStorage(symString(2));
791 qualifiedName = tag.qualifiedName = addToStringStorage(symName(2));
792 if ((!prefix.isEmpty() && !QXmlUtils::isNCName(prefix)) || !QXmlUtils::isNCName(name))
793 raiseWellFormedError(QXmlStream::tr(
"Invalid XML name."));
797 isEmptyElement =
true;
801 setType(QXmlStreamReader::StartElement);
803 if (tagStack.size() == 1 && hasSeenTag && !inParseEntity)
804 raiseWellFormedError(QXmlStream::tr(
"Extra content at end of document."));
809 setType(QXmlStreamReader::EndElement);
810 Tag tag = tagStack_pop();
812 namespaceUri = tag.namespaceDeclaration.namespaceUri;
813 prefix = tag.namespaceDeclaration.prefix;
815 qualifiedName = tag.qualifiedName;
816 if (qualifiedName != symName(3))
817 raiseWellFormedError(QXmlStream::tr(
"Opening and ending tag mismatch."));
821 if (entitiesMustBeDeclared()) {
822 raiseWellFormedError(QXmlStream::tr(
"Entity '%1' not declared.").arg(unresolvedEntity));
825 setType(QXmlStreamReader::EntityReference);
826 name = &unresolvedEntity;
830 sym(1).len += sym(2).len + 1;
831 QStringView reference = symView(2);
832 if (
const auto it = entityHash.find(reference); it != entityHash.end()) {
833 Entity &entity = *it;
834 if (entity.unparsed) {
835 raiseWellFormedError(QXmlStream::tr(
"Reference to unparsed entity '%1'.").arg(reference));
837 if (!entity.hasBeenParsed) {
838 parseEntity(entity.value);
839 entity.hasBeenParsed =
true;
842 putStringLiteral(entity.value);
843 else if (referenceEntity(&entityHash, entity))
844 putReplacement(entity.value);
845 textBuffer.chop(2 + sym(2).len);
851 if (entityResolver) {
852 QString replacementText = resolveUndeclaredEntity(reference.toString());
853 if (!replacementText.isNull()) {
854 putReplacement(replacementText);
855 textBuffer.chop(2 + sym(2).len);
861 injectToken(UNRESOLVED_ENTITY);
862 unresolvedEntity = symString(2).toString();
863 textBuffer.chop(2 + sym(2).len);
869 sym(1).len += sym(2).len + 1;
870 QStringView reference = symView(2);
871 if (
const auto it = parameterEntityHash.find(reference); it != parameterEntityHash.end()) {
872 referenceToParameterEntityDetected =
true;
873 Entity &entity = *it;
874 if (entity.unparsed || entity.external) {
875 referenceToUnparsedEntityDetected =
true;
877 if (referenceEntity(¶meterEntityHash, entity))
878 putString(entity.value);
879 textBuffer.chop(2 + sym(2).len);
882 }
else if (entitiesMustBeDeclared()) {
883 raiseWellFormedError(QXmlStream::tr(
"Entity '%1' not declared.").arg(symString(2)));
888 sym(1).len += sym(2).len + 1;
892 sym(1).len += sym(2).len + 1;
893 QStringView reference = symView(2);
894 if (
const auto it = entityHash.find(reference); it != entityHash.end()) {
895 Entity &entity = *it;
896 if (entity.unparsed || entity.value.isNull()) {
897 raiseWellFormedError(QXmlStream::tr(
"Reference to external entity '%1' in attribute value.").arg(reference));
900 if (!entity.hasBeenParsed) {
901 parseEntity(entity.value);
902 entity.hasBeenParsed =
true;
905 putStringLiteral(entity.value);
906 else if (referenceEntity(&entityHash, entity))
907 putReplacementInAttributeValue(entity.value);
908 textBuffer.chop(2 + sym(2).len);
913 if (entityResolver) {
914 QString replacementText = resolveUndeclaredEntity(reference.toString());
915 if (!replacementText.isNull()) {
916 putReplacement(replacementText);
917 textBuffer.chop(2 + sym(2).len);
922 if (entitiesMustBeDeclared()) {
923 raiseWellFormedError(QXmlStream::tr(
"Entity '%1' not declared.").arg(reference));
928 if (
char32_t s = resolveCharRef(3)) {
929 putStringLiteral(QChar::fromUcs4(s));
930 textBuffer.chop(3 + sym(3).len);
933 raiseWellFormedError(QXmlStream::tr(
"Invalid character reference."));
939 sym(1).len += sym(2).len;
943 sym(1).len += fastScanSpace();
952 if (
auto res = fastScanName(&val))
964 if (
auto res = fastScanName())
980 sym(1).len += fastScanNMTOKEN();
991 act = state_stack[tos] = nt_action (act, lhs[r] - TERMINAL_COUNT);
992 if (type != QXmlStreamReader::NoToken)