Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

Port PerCharacterEscaper #21

Closed
nedtwigg opened this issue Feb 29, 2024 · 1 comment
Closed

Port PerCharacterEscaper #21

nedtwigg opened this issue Feb 29, 2024 · 1 comment

Comments

@nedtwigg
Copy link
Member

nedtwigg commented Feb 29, 2024

The goal is to implement this interface

/**
* If your escape policy is `'123`, it means this: <br>
*
* ```
* abc->abc
* 123->'1'2'3
* I won't->I won''t
* ```
*/

Using this code

/**
* If your escape policy is "'123", it means this:
* ```
* abc->abc
* 123->'1'2'3
* I won't->I won''t
* ```
*/
actual class PerCharacterEscaper
/**
* The first character in the string will be uses as the escape character, and all characters will
* be escaped.
*/
private constructor(
private val escapeCodePoint: Int,
private val escapedCodePoints: IntArray,
private val escapedByCodePoints: IntArray
) {
private fun firstOffsetNeedingEscape(input: String): Int {
val length = input.length
var firstOffsetNeedingEscape = -1
var offset = 0
outer@ while (offset < length) {
val codepoint = input.codePointAt(offset)
for (escaped in escapedCodePoints) {
if (codepoint == escaped) {
firstOffsetNeedingEscape = offset
break@outer
}
}
offset += Character.charCount(codepoint)
}
return firstOffsetNeedingEscape
}
actual fun escape(input: String): String {
val noEscapes = firstOffsetNeedingEscape(input)
return if (noEscapes == -1) {
input
} else {
val length = input.length
val needsEscapes = length - noEscapes
val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4)
builder.append(input, 0, noEscapes)
var offset = noEscapes
while (offset < length) {
val codepoint = input.codePointAt(offset)
offset += Character.charCount(codepoint)
val idx = indexOf(escapedCodePoints, codepoint)
if (idx == -1) {
builder.appendCodePoint(codepoint)
} else {
builder.appendCodePoint(escapeCodePoint)
builder.appendCodePoint(escapedByCodePoints[idx])
}
}
builder.toString()
}
}
private fun firstOffsetNeedingUnescape(input: String): Int {
val length = input.length
var firstOffsetNeedingEscape = -1
var offset = 0
while (offset < length) {
val codepoint = input.codePointAt(offset)
if (codepoint == escapeCodePoint) {
firstOffsetNeedingEscape = offset
break
}
offset += Character.charCount(codepoint)
}
return firstOffsetNeedingEscape
}
actual fun unescape(input: String): String {
val noEscapes = firstOffsetNeedingUnescape(input)
return if (noEscapes == -1) {
input
} else {
val length = input.length
val needsEscapes = length - noEscapes
val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4)
builder.append(input, 0, noEscapes)
var offset = noEscapes
while (offset < length) {
var codepoint = input.codePointAt(offset)
offset += Character.charCount(codepoint)
// if we need to escape something, escape it
if (codepoint == escapeCodePoint) {
if (offset < length) {
codepoint = input.codePointAt(offset)
val idx = indexOf(escapedByCodePoints, codepoint)
if (idx != -1) {
codepoint = escapedCodePoints[idx]
}
offset += Character.charCount(codepoint)
} else {
throw IllegalArgumentException(
"Escape character '" +
String(intArrayOf(escapeCodePoint), 0, 1) +
"' can't be the last character in a string.")
}
}
// we didn't escape it, append it raw
builder.appendCodePoint(codepoint)
}
builder.toString()
}
}
actual companion object {
private fun indexOf(arr: IntArray, target: Int): Int {
for ((index, value) in arr.withIndex()) {
if (value == target) {
return index
}
}
return -1
}
actual fun selfEscape(escapePolicy: String): PerCharacterEscaper {
val escapedCodePoints = escapePolicy.codePoints().toArray()
val escapeCodePoint = escapedCodePoints[0]
return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedCodePoints)
}
actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper {
val codePoints = escapePolicy.codePoints().toArray()
require(codePoints.size % 2 == 0)
val escapeCodePoint = codePoints[0]
val escapedCodePoints = IntArray(codePoints.size / 2)
val escapedByCodePoints = IntArray(codePoints.size / 2)
for (i in escapedCodePoints.indices) {
escapedCodePoints[i] = codePoints[2 * i]
escapedByCodePoints[i] = codePoints[2 * i + 1]
}
return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedByCodePoints)
}
}
}

@Trickybrain
Copy link
Collaborator

Test Part of PerCharacterEscaper

class PerCharacterEscaperTest {
@Test
fun performanceOptimizationSelf() {
val escaper = selfEscape("`123")
// if nothing gets changed, it should return the exact same value
val abc = "abc"
assertSame(abc, escaper.escape(abc))
assertSame(abc, escaper.unescape(abc))
// otherwise it should have the normal behavior
assertEquals("`1", escaper.escape("1"))
assertEquals("``", escaper.escape("`"))
assertEquals("abc`1`2`3``def", escaper.escape("abc123`def"))
// in both directions
assertEquals("1", escaper.unescape("`1"))
assertEquals("`", escaper.unescape("``"))
assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def"))
}
@Test
fun performanceOptimizationSpecific() {
val escaper = specifiedEscape("`a1b2c3d")
// if nothing gets changed, it should return the exact same value
val abc = "abc"
assertSame(abc, escaper.escape(abc))
assertSame(abc, escaper.unescape(abc))
// otherwise it should have the normal behavior
assertEquals("`b", escaper.escape("1"))
assertEquals("`a", escaper.escape("`"))
assertEquals("abc`b`c`d`adef", escaper.escape("abc123`def"))
// in both directions
assertEquals("1", escaper.unescape("`b"))
assertEquals("`", escaper.unescape("`a"))
assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def"))
}
@Test
fun cornerCasesSelf() {
val escaper = selfEscape("`123")
// cornercase - escape character without follow-on will throw an error
val exception = assertFails { escaper.unescape("`") }
assertEquals("Escape character '`' can't be the last character in a string.", exception.message)
// escape character followed by non-escape character is fine
assertEquals("a", escaper.unescape("`a"))
}
@Test
fun cornerCasesSpecific() {
val escaper = specifiedEscape("`a1b2c3d")
// cornercase - escape character without follow-on will throw an error
val exception = assertFails { escaper.unescape("`") }
assertEquals("Escape character '`' can't be the last character in a string.", exception.message)
// escape character followed by non-escape character is fine
assertEquals("e", escaper.unescape("`e"))
}
@Test
fun roundtrip() {
val escaper = selfEscape("`<>")
val roundtrip = { str: String? -> assertEquals(str, escaper.unescape(escaper.escape(str!!))) }
roundtrip("")
roundtrip("<local>~`/")
}
}

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants