diff --git a/core/src/main/scala/uri.scala b/core/src/main/scala/uri.scala index db4ead4d..438698b6 100644 --- a/core/src/main/scala/uri.scala +++ b/core/src/main/scala/uri.scala @@ -50,30 +50,33 @@ object UriEncode { def pchar = unreserved ++ ( ':' :: '@' :: '&' :: '=' :: '+' :: '$' :: ',' :: Nil ) - val segmentValid = (';' +: pchar).toSet + val segmentValid: Set[Char] = (';' +: pchar).toSet - private val validMarkers = (0 to segmentValid.max.toInt).map(i => segmentValid(i.toChar)).toArray - private def isValidChar(ch: Char) = (ch < validMarkers.length) && validMarkers(ch.toInt) + // There are likely more optimal ways of doing this calculation, however + // it seems unlikely that long path segments are often on the hot path + // of a request in such a way that they can't be cached. If that proves + // not to be true, then we can revisit. + private def isValidChar(b: Byte) = { + segmentValid.contains(b.toChar) + } def path(pathSegment: String, encoding: String = "UTF-8") = { - if (pathSegment.forall(isValidChar)) { + val pathBytes = pathSegment.getBytes(encoding) + + if (pathBytes.forall(isValidChar)) { pathSegment - } - else { + } else { val sb = new StringBuilder(pathSegment.length << 1) - pathSegment foreach { ch => - if (isValidChar(ch)) { - sb.append(ch) - } - else { - ch.toString.getBytes(encoding) foreach { b => - val hi = (b >>> 4) & 0xf - val lo = b & 0xf - sb.append('%') - .append((if (hi > 9) hi + '7' else hi + '0').toChar) - .append((if (lo > 9) lo + '7' else lo + '0').toChar) - } + pathBytes.foreach { b => + if (isValidChar(b)) { + sb.append(b.toChar) + } else { + val hi = (b >>> 4) & 0xf + val lo = b & 0xf + sb.append('%') + .append((if (hi > 9) hi + '7' else hi + '0').toChar) + .append((if (lo > 9) lo + '7' else lo + '0').toChar) } } diff --git a/core/src/test/scala/uri.scala b/core/src/test/scala/uri.scala index efcb40da..5e419e62 100644 --- a/core/src/test/scala/uri.scala +++ b/core/src/test/scala/uri.scala @@ -1,21 +1,26 @@ package dispatch.spec import org.scalacheck._ -import org.scalacheck.Prop.BooleanOperators +import org.scalacheck.Prop._ object UriSpecification extends Properties("Uri") { /** java.net.URLDecoder should *NOT* be used for testing URI segment decoding * because it implements completely different functionality: query parameter decoding */ - property("encode-decode") = Prop.forAll { (path: String) => + property("Encodes and decodes basic strings") = Prop.forAll { (path: String) => !path.contains(":") ==> { new java.net.URI(dispatch.UriEncode.path(path)).getPath == path } // else Prop.throws(classOf[java.net.URISyntaxException]) } /** if there is nothing to escape, encoder must return original reference */ - property("noop") = Prop.forAll(Gen.choose(0,100)) { (n: Int) => + property("Does nothing if there's nothing eo encode") = Prop.forAll(Gen.choose(0,100)) { (n: Int) => val path = "A" * n dispatch.UriEncode.path(path) eq path } + + property("Encodes emoji correctly") = forAll(Gen.const("unused")) { (sample: String) => + val path = "roma🇮🇹" + new java.net.URI(dispatch.UriEncode.path(path)).getPath == (path) + } }