Skip to content

Commit

Permalink
Fix xml parser truncating large texts
Browse files Browse the repository at this point in the history
  • Loading branch information
prakanth97 committed Nov 28, 2023
1 parent ae34fc4 commit 8d3ff90
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 44 deletions.
16 changes: 16 additions & 0 deletions ballerina/tests/fromXml_test.bal
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,22 @@ function testXmlWithAttributesAgainstOpenRecord3() returns error? {
]);
}

@test:Config{}
function testCommentMiddleInContent() returns error? {
string xmlStr = string `<Data>
<A>John<!-- firstname --> Doe<!-- lastname --></A>
</Data>`;
record {} rec = check fromXmlStringWithType(xmlStr);
test:assertEquals(rec.length(), 1);
test:assertEquals(rec.get("A"), "John Doe");

record {|
string A;
|} rec2 = check fromXmlStringWithType(xmlStr);
test:assertEquals(rec2.length(), 1);
test:assertEquals(rec2.A, "John Doe");
}

// Negative cases
type DataN1 record {|
int A;
Expand Down
120 changes: 76 additions & 44 deletions native/src/main/java/io/ballerina/stdlib/data/xml/XmlParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,16 @@ public Object parse(XmlParserData xmlParserData) {
try {
parseRootElement(xmlStreamReader, xmlParserData);

boolean readNext = false;
int next;
while (xmlStreamReader.hasNext()) {
int next = xmlStreamReader.next();
if (readNext) {
readNext = false;
next = xmlStreamReader.getEventType();
} else {
next = xmlStreamReader.next();
}

switch (next) {
case START_ELEMENT:
readElement(xmlStreamReader, xmlParserData);
Expand All @@ -136,8 +144,11 @@ public Object parse(XmlParserData xmlParserData) {
endElement(xmlStreamReader, xmlParserData);
break;
case CDATA:
readText(xmlStreamReader, true, xmlParserData);
break;
case CHARACTERS:
readText(xmlStreamReader, xmlParserData);
readText(xmlStreamReader, false, xmlParserData);
readNext = true;
break;
case END_DOCUMENT:
return buildDocument(xmlParserData);
Expand All @@ -163,8 +174,15 @@ public Object parse(XmlParserData xmlParserData) {

public void parseRecordRest(String startElementName, XmlParserData xmlParserData) {
try {
boolean readNext = false;
int next;
while (xmlStreamReader.hasNext()) {
int next = xmlStreamReader.next();
if (readNext) {
readNext = false;
next = xmlStreamReader.getEventType();
} else {
next = xmlStreamReader.next();
}

// Terminate the record rest field parsing if the end element is reached.
if (next == END_ELEMENT) {
Expand All @@ -186,8 +204,11 @@ public void parseRecordRest(String startElementName, XmlParserData xmlParserData
endElement(xmlStreamReader, xmlParserData);
break;
case CDATA:
readText(xmlStreamReader, true, xmlParserData);
break;
case CHARACTERS:
readText(xmlStreamReader, xmlParserData);
readText(xmlStreamReader, false, xmlParserData);
readNext = true;
break;
case END_DOCUMENT:
buildDocument(xmlParserData);
Expand All @@ -208,37 +229,36 @@ public void parseRecordRest(String startElementName, XmlParserData xmlParserData
}
}

private void parseRootElement(XMLStreamReader xmlStreamReader, XmlParserData xmlParserData) {
try {
if (xmlStreamReader.hasNext()) {
int next = xmlStreamReader.next();
if (next == COMMENT || next == PROCESSING_INSTRUCTION) {
parseRootElement(xmlStreamReader, xmlParserData);
return;
} else if (next != START_ELEMENT) {
throw DiagnosticLog.error(DiagnosticErrorCode.XML_ROOT_MISSING);
}
private void parseRootElement(XMLStreamReader xmlStreamReader,
XmlParserData xmlParserData) throws XMLStreamException {
if (xmlStreamReader.hasNext()) {
int next = xmlStreamReader.next();
if (next == COMMENT || next == PROCESSING_INSTRUCTION) {
parseRootElement(xmlStreamReader, xmlParserData);
return;
} else if (next != START_ELEMENT) {
throw DiagnosticLog.error(DiagnosticErrorCode.XML_ROOT_MISSING);
}
}

RecordType rootRecord = xmlParserData.rootRecord;
initRootObject(rootRecord, xmlParserData);
RecordType rootRecord = xmlParserData.rootRecord;
initRootObject(rootRecord, xmlParserData);

QualifiedName elementQName = getElementName(xmlStreamReader);
xmlParserData.rootElement =
DataUtils.validateAndGetXmlNameFromRecordAnnotation(rootRecord, rootRecord.getName(), elementQName);
DataUtils.validateTypeNamespace(elementQName.getPrefix(), elementQName.getNamespaceURI(), rootRecord);
QualifiedName elementQName = getElementName(xmlStreamReader);
xmlParserData.rootElement =
DataUtils.validateAndGetXmlNameFromRecordAnnotation(rootRecord, rootRecord.getName(), elementQName);
DataUtils.validateTypeNamespace(elementQName.getPrefix(), elementQName.getNamespaceURI(), rootRecord);

// Keep track of fields and attributes
updateExpectedTypeStacks(rootRecord, xmlParserData);
handleAttributes(xmlStreamReader, xmlParserData);
} catch (XMLStreamException e) {
handleXMLStreamException(e);
}
// Keep track of fields and attributes
updateExpectedTypeStacks(rootRecord, xmlParserData);
handleAttributes(xmlStreamReader, xmlParserData);
}

private void readText(XMLStreamReader xmlStreamReader, XmlParserData xmlParserData) {
private void readText(XMLStreamReader xmlStreamReader,
boolean isCData,
XmlParserData xmlParserData) throws XMLStreamException {
Field currentField = xmlParserData.currentField;
String text = xmlStreamReader.getText();
String text = isCData ? xmlStreamReader.getText() : handleTruncatedCharacters(xmlStreamReader);
if (text.strip().isBlank()) {
return;
}
Expand All @@ -257,16 +277,6 @@ private void readText(XMLStreamReader xmlStreamReader, XmlParserData xmlParserDa
BString bFieldName = StringUtils.fromString(fieldName);
Type fieldType = TypeUtils.getReferredType(currentField.getFieldType());
if (currentNode.containsKey(bFieldName)) {
// Handle - <name>James <!-- FirstName --> Clark</name>
if (!xmlParserData.siblings.get(
xmlParserData.modifiedNamesHierarchy.peek().getOrDefault(fieldName,
new QualifiedName(QualifiedName.NS_ANNOT_NOT_DEFINED, fieldName, "")))
&& DataUtils.isStringValueAssignable(fieldType.getTag())) {
currentNode.put(bFieldName,
StringUtils.fromString(currentNode.get(bFieldName) + xmlStreamReader.getText()));
return;
}

if (!DataUtils.isArrayValueAssignable(fieldType.getTag())) {
throw DiagnosticLog.error(DiagnosticErrorCode.FOUND_ARRAY_FOR_NON_ARRAY_TYPE, fieldType, fieldName);
}
Expand All @@ -276,7 +286,7 @@ private void readText(XMLStreamReader xmlStreamReader, XmlParserData xmlParserDa
return;
}

((BArray) currentNode.get(bFieldName)).append(convertStringToExpType(bText, fieldType));
((BArray) currentNode.get(bFieldName)).append(convertStringToRestExpType(bText, fieldType));
return;
}

Expand All @@ -291,6 +301,17 @@ private void readText(XMLStreamReader xmlStreamReader, XmlParserData xmlParserDa
currentNode.put(bFieldName, convertStringToRestExpType(bText, fieldType));
}

private String handleTruncatedCharacters(XMLStreamReader xmlStreamReader) throws XMLStreamException {
StringBuilder textBuilder = new StringBuilder();
while (xmlStreamReader.getEventType() == CHARACTERS) {
textBuilder.append(xmlStreamReader.getText());
if (xmlStreamReader.next() == COMMENT) {
xmlStreamReader.next();
}
}
return textBuilder.toString();
}

@SuppressWarnings("unchecked")
private void handleContentFieldInRecordType(RecordType recordType, BString text, XmlParserData xmlParserData) {
popStacks(xmlParserData);
Expand Down Expand Up @@ -507,6 +528,7 @@ private Object parseRestField(XmlParserData xmlParserData) {

BString currentFieldName = null;
try {
boolean readNext = false;
while (!xmlParserData.restFieldsPoints.isEmpty()) {
switch (next) {
case START_ELEMENT:
Expand All @@ -516,8 +538,11 @@ private Object parseRestField(XmlParserData xmlParserData) {
endElementRest(xmlStreamReader, xmlParserData);
break;
case CDATA:
readTextRest(xmlStreamReader, currentFieldName, true, xmlParserData);
break;
case CHARACTERS:
readTextRest(xmlStreamReader, currentFieldName, xmlParserData);
readTextRest(xmlStreamReader, currentFieldName, false, xmlParserData);
readNext = true;
break;
case PROCESSING_INSTRUCTION:
case COMMENT:
Expand All @@ -527,7 +552,12 @@ private Object parseRestField(XmlParserData xmlParserData) {
}

if (xmlStreamReader.hasNext() && !xmlParserData.restFieldsPoints.isEmpty()) {
next = xmlStreamReader.next();
if (readNext) {
readNext = false;
next = xmlStreamReader.getEventType();
} else {
next = xmlStreamReader.next();
}
} else {
break;
}
Expand Down Expand Up @@ -630,15 +660,17 @@ private void endElementRest(XMLStreamReader xmlStreamReader, XmlParserData xmlPa
}

@SuppressWarnings("unchecked")
private void readTextRest(XMLStreamReader xmlStreamReader, BString currentFieldName, XmlParserData xmlParserData) {
String text = xmlStreamReader.getText();
private void readTextRest(XMLStreamReader xmlStreamReader,
BString currentFieldName,
boolean isCData,
XmlParserData xmlParserData) throws XMLStreamException {
String text = isCData ? xmlStreamReader.getText() : handleTruncatedCharacters(xmlStreamReader);
if (text.strip().isBlank()) {
return;
}

BString bText = StringUtils.fromString(text);
Type restType = TypeUtils.getReferredType(xmlParserData.restTypes.peek());
// TODO: <name>James <!-- FirstName --> Clark</name>
Object currentElement = currentNode.get(currentFieldName);
BMap<BString, Object> parent = (BMap<BString, Object>) xmlParserData.nodesStack.peek();
Object result = convertStringToRestExpType(bText, restType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ private void convertText(String text, XmlAnalyzerData analyzerData) {
((BArray) value).append(convertedValue);
} else {
BArray array = DataUtils.createNewAnydataList(fieldType);
array.append(value);
array.append(convertedValue);
mapValue.put(fieldName, array);
}
Expand Down

0 comments on commit 8d3ff90

Please sign in to comment.