Skip to content

Commit

Permalink
Merge pull request #228 from dispatch/1.1/encode-emoji
Browse files Browse the repository at this point in the history
[1.1.x] Correctly url encode emoji in path segments
  • Loading branch information
farmdawgnation authored Dec 27, 2019
2 parents 3cbdbb3 + fa3458e commit 03eecf3
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 21 deletions.
39 changes: 21 additions & 18 deletions core/src/main/scala/uri.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,30 +50,33 @@ object UriEncode {
def pchar = unreserved ++ (
':' :: '@' :: '&' :: '=' :: '+' :: '$' :: ',' :: Nil
)
val segmentValid = (';' +: pchar).toSet
val segmentValid: Set[Char] = (';' +: pchar).toSet

private val validMarkers = (0 to segmentValid.max.toInt).map(i => segmentValid(i.toChar)).toArray
private def isValidChar(ch: Char) = (ch < validMarkers.length) && validMarkers(ch.toInt)
// There are likely more optimal ways of doing this calculation, however
// it seems unlikely that long path segments are often on the hot path
// of a request in such a way that they can't be cached. If that proves
// not to be true, then we can revisit.
private def isValidChar(b: Byte) = {
segmentValid.contains(b.toChar)
}

def path(pathSegment: String, encoding: String = "UTF-8") = {
if (pathSegment.forall(isValidChar)) {
val pathBytes = pathSegment.getBytes(encoding)

if (pathBytes.forall(isValidChar)) {
pathSegment
}
else {
} else {
val sb = new StringBuilder(pathSegment.length << 1)

pathSegment foreach { ch =>
if (isValidChar(ch)) {
sb.append(ch)
}
else {
ch.toString.getBytes(encoding) foreach { b =>
val hi = (b >>> 4) & 0xf
val lo = b & 0xf
sb.append('%')
.append((if (hi > 9) hi + '7' else hi + '0').toChar)
.append((if (lo > 9) lo + '7' else lo + '0').toChar)
}
pathBytes.foreach { b =>
if (isValidChar(b)) {
sb.append(b.toChar)
} else {
val hi = (b >>> 4) & 0xf
val lo = b & 0xf
sb.append('%')
.append((if (hi > 9) hi + '7' else hi + '0').toChar)
.append((if (lo > 9) lo + '7' else lo + '0').toChar)
}
}

Expand Down
11 changes: 8 additions & 3 deletions core/src/test/scala/uri.scala
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
package dispatch.spec

import org.scalacheck._
import org.scalacheck.Prop.BooleanOperators
import org.scalacheck.Prop._

object UriSpecification extends Properties("Uri") {
/** java.net.URLDecoder should *NOT* be used for testing URI segment decoding
* because it implements completely different functionality: query parameter decoding
*/
property("encode-decode") = Prop.forAll { (path: String) =>
property("Encodes and decodes basic strings") = Prop.forAll { (path: String) =>
!path.contains(":") ==> {
new java.net.URI(dispatch.UriEncode.path(path)).getPath == path
} // else Prop.throws(classOf[java.net.URISyntaxException])
}

/** if there is nothing to escape, encoder must return original reference */
property("noop") = Prop.forAll(Gen.choose(0,100)) { (n: Int) =>
property("Does nothing if there's nothing eo encode") = Prop.forAll(Gen.choose(0,100)) { (n: Int) =>
val path = "A" * n
dispatch.UriEncode.path(path) eq path
}

property("Encodes emoji correctly") = forAll(Gen.const("unused")) { (sample: String) =>
val path = "roma🇮🇹"
new java.net.URI(dispatch.UriEncode.path(path)).getPath == (path)
}
}

0 comments on commit 03eecf3

Please sign in to comment.