Skip to content

Commit

Permalink
Fix cases where comments inside xml content
Browse files Browse the repository at this point in the history
  • Loading branch information
prakanth97 committed Dec 19, 2023
1 parent 6f303e7 commit 3bac626
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 6 deletions.
50 changes: 49 additions & 1 deletion ballerina/tests/fromXml_test.bal
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ function testXmlWithAttributesAgainstOpenRecord3() returns error? {
}

@test:Config
function testCommentMiddleInContent() returns error? {
function testCommentMiddleInContent1() returns error? {
string xmlStr = string `<Data>
<A>John<!-- firstname --> Doe<!-- lastname --></A>
</Data>`;
Expand All @@ -1604,6 +1604,22 @@ function testCommentMiddleInContent() returns error? {
test:assertEquals(rec2.A, "John Doe");
}

@test:Config
function testCommentMiddleInContent2() returns error? {
xml xmlVal = xml `<Data>
<A>John<!-- firstname --> Doe<!-- lastname --></A>
</Data>`;
record {} rec = check fromXmlWithType(xmlVal);
test:assertEquals(rec.length(), 1);
test:assertEquals(rec.get("A"), "John Doe");

record {|
string A;
|} rec2 = check fromXmlWithType(xmlVal);
test:assertEquals(rec2.length(), 1);
test:assertEquals(rec2.A, "John Doe");
}

// Negative cases
type DataN1 record {|
int A;
Expand Down Expand Up @@ -1871,3 +1887,35 @@ function testXmlToRecordNegative12() {
RecAtt6|error rec = fromXmlWithType(xmlVal);
test:assertEquals((<error>rec).message(), "required attribute 'data' not present in XML");
}

@test:Config {
groups: ["fromXmlString"]
}
function testCommentMiddleInContentNegative1() {
string xmlStr = string `<Data><A>1<!-- cmt -->2</A></Data>`;
record {|
int A;
|}|error rec1 = fromXmlStringWithType(xmlStr);
test:assertEquals((<error>rec1).message(), "invalid type expected 'int' but found 'string'");

record {|
int...;
|}|error rec2 = fromXmlStringWithType(xmlStr);
test:assertEquals((<error>rec2).message(), "invalid type expected 'int' but found 'string'");
}

@test:Config {
groups: ["fromXml"]
}
function testCommentMiddleInContentNegative2() {
xml xmlVal = xml `<Data><A>1<!-- cmt -->2</A></Data>`;
record {|
int A;
|}|error rec1 = fromXmlWithType(xmlVal);
test:assertEquals((<error>rec1).message(), "invalid type expected 'int' but found 'string'");

record {|
int...;
|}|error rec2 = fromXmlWithType(xmlVal);
test:assertEquals((<error>rec2).message(), "invalid type expected 'int' but found 'string'");
}
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ public static boolean isArrayValueAssignable(int typeTag) {
return typeTag == TypeTags.ARRAY_TAG || typeTag == TypeTags.ANYDATA_TAG || typeTag == TypeTags.JSON_TAG;
}

public static boolean isStringValueAssignable(int typeTag) {
return typeTag == TypeTags.STRING_TAG || typeTag == TypeTags.ANYDATA_TAG || typeTag == TypeTags.JSON_TAG;
}

public static ArrayType getValidArrayType(Type type) {
return switch (type.getTag()) {
case TypeTags.ARRAY_TAG -> (ArrayType) type;
Expand Down Expand Up @@ -707,7 +711,7 @@ private static String addAttributeToRecord(BString prefix, BString uri, String k
}

/**
* Holds data required for the parsing and traversing.
* Holds data required for the traversing.
*
* @since 0.1.0
*/
Expand Down
51 changes: 47 additions & 4 deletions native/src/main/java/io/ballerina/stdlib/data/xml/XmlParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,15 @@ private void readText(XMLStreamReader xmlStreamReader,
boolean isCData,
XmlParserData xmlParserData) throws XMLStreamException {
Field currentField = xmlParserData.currentField;
String text = isCData ? xmlStreamReader.getText() : handleTruncatedCharacters(xmlStreamReader);
TextValue textValue = new TextValue();
String text;
if (isCData) {
text = xmlStreamReader.getText();
} else {
handleTruncatedCharacters(xmlStreamReader, textValue);
text = textValue.text;
}

if (text.strip().isBlank()) {
return;
}
Expand All @@ -250,6 +258,11 @@ private void readText(XMLStreamReader xmlStreamReader,
String fieldName = currentField.getFieldName();
BString bFieldName = StringUtils.fromString(fieldName);
Type fieldType = TypeUtils.getReferredType(currentField.getFieldType());

if (textValue.isCommentInTheMiddle && !DataUtils.isStringValueAssignable(fieldType.getTag())) {
throw DiagnosticLog.error(DiagnosticErrorCode.INVALID_TYPE, fieldType, PredefinedTypes.TYPE_STRING);
}

if (currentNode.containsKey(bFieldName)) {
if (!DataUtils.isArrayValueAssignable(fieldType.getTag())) {
throw DiagnosticLog.error(DiagnosticErrorCode.FOUND_ARRAY_FOR_NON_ARRAY_TYPE, fieldType, fieldName);
Expand All @@ -274,15 +287,17 @@ private void readText(XMLStreamReader xmlStreamReader,
}
}

private String handleTruncatedCharacters(XMLStreamReader xmlStreamReader) throws XMLStreamException {
private void handleTruncatedCharacters(XMLStreamReader xmlStreamReader, TextValue textValue)
throws XMLStreamException {
StringBuilder textBuilder = new StringBuilder();
while (xmlStreamReader.getEventType() == CHARACTERS) {
textBuilder.append(xmlStreamReader.getText());
if (xmlStreamReader.next() == COMMENT) {
textValue.isCommentInTheMiddle = true;
xmlStreamReader.next();
}
}
return textBuilder.toString();
textValue.text = textBuilder.toString();
}

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -623,13 +638,26 @@ private void readTextRest(XMLStreamReader xmlStreamReader,
BString currentFieldName,
boolean isCData,
XmlParserData xmlParserData) throws XMLStreamException {
String text = isCData ? xmlStreamReader.getText() : handleTruncatedCharacters(xmlStreamReader);
TextValue textValue = new TextValue();
String text;
if (isCData) {
text = xmlStreamReader.getText();
} else {
handleTruncatedCharacters(xmlStreamReader, textValue);
text = textValue.text;
}

if (text.strip().isBlank()) {
return;
}

BString bText = StringUtils.fromString(text);
Type restType = TypeUtils.getReferredType(xmlParserData.restTypes.peek());

if (textValue.isCommentInTheMiddle && !DataUtils.isStringValueAssignable(restType.getTag())) {
throw DiagnosticLog.error(DiagnosticErrorCode.INVALID_TYPE, restType, PredefinedTypes.TYPE_STRING);
}

Object currentElement = currentNode.get(currentFieldName);
BMap<BString, Object> parent = (BMap<BString, Object>) xmlParserData.nodesStack.peek();
Object result = convertStringToRestExpType(bText, restType);
Expand Down Expand Up @@ -788,6 +816,21 @@ private QualifiedName getElementName(XMLStreamReader xmlStreamReader) {
return new QualifiedName(qName.getNamespaceURI(), qName.getLocalPart(), qName.getPrefix());
}

/**
* Represents the content of an XML element.
*
* @since 0.1.0
*/
static class TextValue {
String text;
boolean isCommentInTheMiddle = false;
}

/**
* Holds data required for the parsing.
*
* @since 0.1.0
*/
public static class XmlParserData {
private final Stack<Object> nodesStack = new Stack<>();
private final Stack<Map<QualifiedName, Field>> fieldHierarchy = new Stack<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package io.ballerina.stdlib.data.xml;

import io.ballerina.runtime.api.PredefinedTypes;
import io.ballerina.runtime.api.TypeTags;
import io.ballerina.runtime.api.creators.TypeCreator;
import io.ballerina.runtime.api.creators.ValueCreator;
Expand Down Expand Up @@ -397,6 +398,10 @@ private void convertSequence(BXmlSequence xmlSequence, Type type, XmlAnalyzerDat
}

private Object convertHeterogeneousSequence(List<BXml> sequence, Type type, XmlAnalyzerData analyzerData) {
if (isAllChildrenText(sequence)) {
return handleCommentInMiddleOfText(sequence, type, analyzerData);
}

for (BXml bXml: sequence) {
if (!isCommentOrPi(bXml)) {
traverseXml(bXml, type, analyzerData);
Expand All @@ -405,6 +410,28 @@ private Object convertHeterogeneousSequence(List<BXml> sequence, Type type, XmlA
return currentNode;
}

private boolean isAllChildrenText(List<BXml> sequence) {
for (BXml bXml: sequence) {
if (bXml.getNodeType() != XmlNodeType.TEXT) {
return false;
}
}
return true;
}

private Object handleCommentInMiddleOfText(List<BXml> sequence, Type type, XmlAnalyzerData analyzerData) {
if (!DataUtils.isStringValueAssignable(type.getTag())) {
throw DiagnosticLog.error(DiagnosticErrorCode.INVALID_TYPE, type, PredefinedTypes.TYPE_STRING);
}

StringBuilder textBuilder = new StringBuilder();
for (BXml bXml: sequence) {
textBuilder.append(bXml.toString());
}
convertText(textBuilder.toString(), analyzerData);
return currentNode;
}

private static boolean isCommentOrPi(BXml bxml) {
return bxml.getNodeType() == XmlNodeType.COMMENT || bxml.getNodeType() == XmlNodeType.PI;
}
Expand Down

0 comments on commit 3bac626

Please sign in to comment.