Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests for different utils (string, map, regex) #20

Merged
merged 3 commits into from
Dec 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 85 additions & 85 deletions modules/utils/src/main/scala/es/weso/utils/StrUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@ package es.weso.utils
object StrUtils {

/**
* Converter takes an index i into a string and checks how many charts it can replace
* If it can't replace, it returns None (no conversion)
* If it replaces, it returns the characters that are replaced and the next index in the string
* Rationale: Some conversions may require some lookahead, in which case, the index will be i + characters read
*/
* Converter takes an index i into a string and checks how many charts it can replace
* If it can't replace, it returns None (no conversion)
* If it replaces, it returns the characters that are replaced and the next index in the string
* Rationale: Some conversions may require some lookahead, in which case, the index will be i + characters read
*/
type Converter = (String, Int) => Option[CharConversion]
type CharConversion = (Array[Char],Int)

/**
* Unescape unicode numbers
* Given a string like: "p\u0031", return "p1"
* The code implements the Turtle rules: https://www.w3.org/TR/turtle/#sec-escapes
*
* @param str input string
* @return unscaped output string
*/
* Unescape unicode numbers
* Given a string like: "p\u0031", return "p1"
* The code implements the Turtle rules: https://www.w3.org/TR/turtle/#sec-escapes
*
* @param str input string
* @return unscaped output string
*/

def unescapeStringLiteral(str: String): String = cnvLoop(str,
List(
Expand Down Expand Up @@ -51,73 +51,73 @@ object StrUtils {
Some((Array(c), i))

private def unescapeBackSlash: Converter = (str,i) =>
if (str(i) == '\\') {
if (str(i) == '\\' && str.length > 1) {
str(i + 1) match {
case '\\' => Some((Array('\\','\\'), i + 1))
case _ => None
}
} else None

private def unescapeStringEscapeSequence: Converter = (str,i) =>
if (str(i) == '\\') {
str(i + 1) match {
case 't' => cnvChar('\u0009', i + 1)
case 'b' => cnvChar('\u0008', i + 1)
case 'n' => cnvChar('\u000A', i + 1)
case 'r' => cnvChar('\u000D', i + 1)
case 'f' => cnvChar('\u000C', i + 1)
case '\"' => cnvChar('\u0022', i + 1)
case '\'' => cnvChar('\'', i + 1)
case '\\' => cnvChar('\\', i + 1)
case _ => None
}
} else None
if (str(i) == '\\' && str.length > 1) {
str(i + 1) match {
case 't' => cnvChar('\u0009', i + 1)
case 'b' => cnvChar('\u0008', i + 1)
case 'n' => cnvChar('\u000A', i + 1)
case 'r' => cnvChar('\u000D', i + 1)
case 'f' => cnvChar('\u000C', i + 1)
case '\"' => cnvChar('\u0022', i + 1)
case '\'' => cnvChar('\'', i + 1)
case '\\' => cnvChar('\\', i + 1)
case _ => None
}
} else None

private def unescapeNumericSequence: Converter = (str,i) =>
if (str(i) == '\\') {
str(i + 1) match {
case 'u' => {
val hexValue = getHex(str,i + 2, 4)
Some((Character.toChars(hexValue),i + 5))
}
case 'U' => {
val hexValue = getHex(str,i + 2, 8)
Some((Character.toChars(hexValue),i + 9))
if (str(i) == '\\' && str.length > 1) {
str(i + 1) match {
case 'u' => {
val hexValue = getHex(str,i + 2, 4)
Some((Character.toChars(hexValue),i + 5))
}
case 'U' => {
val hexValue = getHex(str,i + 2, 8)
Some((Character.toChars(hexValue),i + 9))
}
case _ => None
}
case _ => None
}
} else None
} else None

private def getHex(str: String, index: Int, num: Int): Int = {
val rs = (0 to num - 1).map(n => str(index + n)).mkString
Integer.parseInt(rs,16)
}

private def unescapeReservedPatternChar: Converter = (str,i) =>
if (str(i) == '\\') {
str(i + 1) match {
case c if "^$[]".contains(c) => {
// println(s"unescape pattern: $c")
Some((Array('\\',c),i + 1))
if (str(i) == '\\' && str.length > 1) {
str(i + 1) match {
case c if "^$[]".contains(c) => {
// println(s"unescape pattern: $c")
Some((Array('\\',c),i + 1))
}
case c => None
}
case c => None
}
} else None
} else None

private def unescapeReservedChar: Converter = (str,i) =>
if (str(i) == '\\') {
str(i + 1) match {
case c if "~.-!$&'()*+,;=/?#@%_".contains(c) => cnvChar(c,i + 1)
case c => None
}
} else None
if (str(i) == '\\' && str.length > 1) {
str(i + 1) match {
case c if "~.-!$&'()*+,;=/?#@%_".contains(c) => cnvChar(c,i + 1)
case c => None
}
} else None

/**
* Escape a string
* Example: "Hi\n\t" -> "Hi\\n\\t"
* @param str
* @return
*/
* Escape a string
* Example: "Hi\n\t" -> "Hi\\n\\t"
* @param str
* @return
*/
def escapeStringLiteral(str: String): String = cnvLoop(str, List(cnvCtrl))

def escapePattern(str: String): String = cnvLoop(str,List())
Expand All @@ -141,42 +141,42 @@ object StrUtils {
builder.mkString
}

/* def escape(str: String): String = {
var i = 0
val length = str.length
val builder = new StringBuilder(length)
while (i < str.length) {
val (nextChars,newIndex) = cnvCtrl(str,i).getOrElse(noConverter(str,i))
i = newIndex + 1
builder.appendAll(nextChars)
}
builder.mkString
} */
/* def escape(str: String): String = {
var i = 0
val length = str.length
val builder = new StringBuilder(length)
while (i < str.length) {
val (nextChars,newIndex) = cnvCtrl(str,i).getOrElse(noConverter(str,i))
i = newIndex + 1
builder.appendAll(nextChars)
}
builder.mkString
} */

private def escapeChar(c: Char, i: Int) = Some((Array('\\',c), i))

private def cnvCtrl: Converter = (str,i) =>
str(i) match {
case '\t' => escapeChar('t', i)
case '\b' => escapeChar('b', i)
case '\n' => escapeChar('n', i)
case '\r' => escapeChar('r', i)
case '\f' => escapeChar('f', i)
case '\'' => escapeChar('\'', i)
case '\"' => escapeChar('\"', i)
case _ => None
}
str(i) match {
case '\t' => escapeChar('t', i)
case '\b' => escapeChar('b', i)
case '\n' => escapeChar('n', i)
case '\r' => escapeChar('r', i)
case '\f' => escapeChar('f', i)
case '\'' => escapeChar('\'', i)
case '\"' => escapeChar('\"', i)
case _ => None
}

private def noConverter(str: String, i: Int): CharConversion =
(Array(str(i)),i)
(Array(str(i)),i)

/**
* escapeDot: Escapes strings to be represented as labels in Dot
* It follows dot conventions: https://graphviz.gitlab.io/_pages/doc/info/lang.html
* Extra characters are escaped using their Unicode representation
* @param str
* @return
*/
* escapeDot: Escapes strings to be represented as labels in Dot
* It follows dot conventions: https://graphviz.gitlab.io/_pages/doc/info/lang.html
* Extra characters are escaped using their Unicode representation
* @param str
* @return
*/
def escapeDot(str: String): String = cnvLoop(str, List(dotConverter))

private def dotConverter: Converter = (str,i) => str(i) match {
Expand Down
28 changes: 28 additions & 0 deletions modules/utils/src/test/scala/es/weso/utils/MapUtilsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,33 @@ class MapUtilsTest extends FunSpec with Matchers {
}
}

describe("MapUtils combineMaps") {
it(s"Should combine map") {
val mm: Map[String,Int] = Map(
"a" -> 1,
"b" -> 2,
"c" -> 3,
)
val mm1: Map[String,Int] = Map(
"a" -> 4,
"b" -> 5,
"c" -> 6,
)
val mm2: Map[String,Int] = Map(
"d" -> 7,
)

val comb: Map[String, Int] = combineMaps(List(mm, mm1, mm2))

comb should be(
Map(
"a" -> 5,
"b" -> 7,
"c" -> 9,
"d" -> 7,
)
)
}
}

}
26 changes: 22 additions & 4 deletions modules/utils/src/test/scala/es/weso/utils/RegexUtilsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,35 @@ import org.scalatest._

class RegexUtilsTest extends FunSpec with Matchers {
describe("Regex") {
shouldMatch("\\d{2}", None, "34")
// shouldMatch("""^\\/\t\\n\\r$""", None, "/\t\n\r")
shouldMatch("\\d{2}", None, "34") // Match
shouldNotMatch("[a-z]", None, "A") // Do not match
ErrorWhileMatch("\\[a-z]", None, "A") // Error while processing regex
}


def shouldMatch(regex: String, flags: Option[String], str: String): Unit = {
it(s"should match /$regex/${flags.getOrElse("")} with $str") {
RegEx(regex, flags).matches(str) match {
case Right(true) => info(s"$str matches /$regex/${flags.getOrElse("")}")
case Right(false) => fail(s"$str doesn't match /$regex/${flags.getOrElse("")}")
case Left(msg) => fail(s"Error $msg trying to match $str with /$regex/${flags.getOrElse("")}")
case _ => fail(s"Execution of regex on $str was expected to match: /$regex/${flags.getOrElse("")}")
}
}
}

def shouldNotMatch(regex: String, flags: Option[String], str: String): Unit = {
it(s"should not match /$regex/${flags.getOrElse("")} with $str") {
RegEx(regex, flags).matches(str) match {
case Right(false) => info(s"$str doesn't match /$regex/${flags.getOrElse("")}")
case _ => fail(s"Execution of regex on $str was expected not to match: /$regex/${flags.getOrElse("")}")
}
}
}

def ErrorWhileMatch(regex: String, flags: Option[String], str: String): Unit = {
it(s"should fail when trying to match /$regex/${flags.getOrElse("")} with $str") {
RegEx(regex, flags).matches(str) match {
case Left(msg) => info(s"Error $msg trying to match $str with /$regex/${flags.getOrElse("")}")
case _ => fail(s"Execution of regex on $str was expected to fail: /$regex/${flags.getOrElse("")}")
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions modules/utils/src/test/scala/es/weso/utils/StrUtilsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ import StrUtils._
class StrUtilsTest extends FunSpec with Matchers {

describe("StrUtils unescape") {
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\", "\\")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\tpepe", "\tpepe")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\bpepe", "\bpepe")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\npepe", "\npepe")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\rpepe", "\rpepe")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\fpepe", "\fpepe")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral, "\\'pepe", "\'pepe")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral,"pepe\\u0031", "pepe1")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral,"\\u0031pepe\\u0031", "1pepe1")
shouldConvert("unescapeStringLiteral", unescapeStringLiteral,"\\u0032\\u00ac\\u0031", "2¬1")
Expand All @@ -28,6 +34,16 @@ class StrUtilsTest extends FunSpec with Matchers {
shouldConvert("unescapePattern", unescapePattern, "\\\\u0061", "\\\\u0061")
}

describe("StrUtils unescapeIRI") {
shouldConvert("unescapeIRI", unescapeIRI, "\\thttp://www.w3.org/1999/02/22-rdf-syntax-ns",
"\thttp://www.w3.org/1999/02/22-rdf-syntax-ns")
}

describe("StrUtils unescapeCode") {
shouldConvert("unescapeCode", unescapeCode, "\\\"Hello\\r\\n\\tW\"orld\\n",
"\"Hello\r\n\tW\"orld\n")
}

describe("StrUtils escapePattern") {
shouldConvert("escapePattern", escapePattern, "\tpepe", "\tpepe")
shouldConvert("escapePattern", escapePattern, "pepe1", "pepe1")
Expand Down