Skip to content

Commit

Permalink
Add bare keys check and support for c_document_end in yaml stream
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenamar-db committed Dec 10, 2024
1 parent ce37810 commit 6e2c64e
Show file tree
Hide file tree
Showing 5 changed files with 579 additions and 539 deletions.
49 changes: 29 additions & 20 deletions sjsonnet/src/sjsonnet/BaseCharRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ package sjsonnet
// with some private definitions made accessible to subclasses

import ujson._

import scala.annotation.switch
import upickle.core.{ArrVisitor, ObjVisitor}
import upickle.core.{ArrVisitor, ObjVisitor, Visitor}
class BaseCharRenderer[T <: upickle.core.CharOps.Output]
(out: T,
indent: Int = -1,
escapeUnicode: Boolean = false) extends JsVisitor[T, T]{
protected[this] val elemBuilder = new upickle.core.CharBuilder
protected[this] val unicodeCharBuilder = new upickle.core.CharBuilder()
def flushCharBuilder() = {
def flushCharBuilder(): Unit = {
elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 1000)
}

Expand All @@ -21,25 +21,29 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]

protected[this] var commaBuffered = false

def flushBuffer() = {
def flushBuffer(): Unit = {
if (commaBuffered) {
commaBuffered = false
elemBuilder.append(',')
renderIndent()
}
}
def visitArray(length: Int, index: Int) = new ArrVisitor[T, T] {

def visitArray(length: Int, index: Int): ArrVisitor[T, T] = new ArrVisitor[T, T] {
flushBuffer()
elemBuilder.append('[')

depth += 1
renderIndent()
def subVisitor = BaseCharRenderer.this

def subVisitor: Visitor[T, T] = BaseCharRenderer.this

def visitValue(v: T, index: Int): Unit = {
flushBuffer()
commaBuffered = true
}
def visitEnd(index: Int) = {

def visitEnd(index: Int): T = {
commaBuffered = false
depth -= 1
renderIndent()
Expand All @@ -49,21 +53,26 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
}
}

def visitObject(length: Int, index: Int) = new ObjVisitor[T, T] {
def visitObject(length: Int, index: Int): ObjVisitor[T, T] = new ObjVisitor[T, T] {
flushBuffer()
elemBuilder.append('{')
depth += 1
renderIndent()
def subVisitor = BaseCharRenderer.this
def visitKey(index: Int) = BaseCharRenderer.this

def subVisitor: Visitor[T, T] = BaseCharRenderer.this

def visitKey(index: Int): Visitor[T, T] = BaseCharRenderer.this

def visitKeyValue(s: Any): Unit = {
elemBuilder.append(':')
if (indent != -1) elemBuilder.append(' ')
}

def visitValue(v: T, index: Int): Unit = {
commaBuffered = true
}
def visitEnd(index: Int) = {

def visitEnd(index: Int): T = {
commaBuffered = false
depth -= 1
renderIndent()
Expand All @@ -73,7 +82,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
}
}

def visitNull(index: Int) = {
def visitNull(index: Int): T = {
flushBuffer()
elemBuilder.ensureLength(4)
elemBuilder.appendUnsafe('n')
Expand All @@ -84,7 +93,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
out
}

def visitFalse(index: Int) = {
def visitFalse(index: Int): T = {
flushBuffer()
elemBuilder.ensureLength(5)
elemBuilder.appendUnsafe('f')
Expand All @@ -96,7 +105,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
out
}

def visitTrue(index: Int) = {
def visitTrue(index: Int): T = {
flushBuffer()
elemBuilder.ensureLength(4)
elemBuilder.appendUnsafe('t')
Expand All @@ -107,7 +116,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
out
}

def visitFloat64StringParts(s: CharSequence, decIndex: Int, expIndex: Int, index: Int) = {
def visitFloat64StringParts(s: CharSequence, decIndex: Int, expIndex: Int, index: Int): T = {
flushBuffer()
elemBuilder.ensureLength(s.length())
var i = 0
Expand All @@ -120,7 +129,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
out
}

override def visitFloat64(d: Double, index: Int) = {
override def visitFloat64(d: Double, index: Int): T = {
d match{
case Double.PositiveInfinity => visitNonNullString("Infinity", -1)
case Double.NegativeInfinity => visitNonNullString("-Infinity", -1)
Expand All @@ -136,20 +145,20 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
}


def visitString(s: CharSequence, index: Int) = {
def visitString(s: CharSequence, index: Int): T = {

if (s eq null) visitNull(index)
else visitNonNullString(s, index)
}

def visitNonNullString(s: CharSequence, index: Int) = {
private def visitNonNullString(s: CharSequence, index: Int) = {
flushBuffer()
upickle.core.RenderUtils.escapeChar(unicodeCharBuilder, elemBuilder, s, escapeUnicode)
upickle.core.RenderUtils.escapeChar(null, elemBuilder, s, escapeUnicode)
flushCharBuilder()
out
}

final def renderIndent() = {
final def renderIndent(): Unit = {
if (indent == -1) ()
else {
var i = indent * depth
Expand Down
12 changes: 9 additions & 3 deletions sjsonnet/src/sjsonnet/Std.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1079,14 +1079,20 @@ class Std {
builtinWithDefaults("manifestYamlStream",
"v" -> null,
"indent_array_in_object" -> Val.False(dummyPos),
"quote_keys" -> Val.True(dummyPos)){ (args, pos, ev) =>
"c_document_end" -> Val.True(dummyPos),
"quote_keys" -> Val.True(dummyPos)){ (args, _, ev) =>
val v = args(0)
val indentArrayInObject = args(1) match {
case Val.False(_) => false
case Val.True(_) => true
case _ => Error.fail("indent_array_in_object has to be a boolean, got" + v.getClass)
}
val quoteKeys = args(2) match {
val cDocumentEnd = args(2) match {
case Val.False(_) => false
case Val.True(_) => true
case _ => Error.fail("c_document_end has to be a boolean, got " + v.getClass)
}
val quoteKeys = args(3) match {
case Val.False(_) => false
case Val.True(_) => true
case _ => Error.fail("quote_keys has to be a boolean, got " + v.getClass)
Expand All @@ -1099,7 +1105,7 @@ class Std {
new YamlRenderer(indentArrayInObject = indentArrayInObject, quoteKeys = quoteKeys)
)(ev).toString()
}
.mkString("---\n", "\n---\n", "\n...\n")
.mkString("---\n", "\n---\n", if (cDocumentEnd) "\n...\n" else "\n")
case _ => Error.fail("manifestYamlStream only takes arrays, got " + v.getClass)
}
},
Expand Down
81 changes: 52 additions & 29 deletions sjsonnet/src/sjsonnet/YamlRenderer.scala
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package sjsonnet

import java.io.{StringWriter, Writer}
import java.io.StringWriter
import java.util.regex.Pattern
import upickle.core.{ArrVisitor, ObjVisitor, SimpleVisitor, Visitor}

import upickle.core.{ArrVisitor, ObjVisitor}
import scala.util.Try



Expand All @@ -13,14 +14,27 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
var dashBuffered = false
var afterKey = false
private var topLevel = true
private val outBuffer = _out.getBuffer

private val yamlKeyVisitor = new SimpleVisitor[StringWriter, StringWriter]() {
override def expectedMsg = "Expected a string key"
override def visitString(s: CharSequence, index: Int): StringWriter = {
YamlRenderer.this.flushBuffer()
if (quoteKeys || !YamlRenderer.isSafeBareKey(s.toString)) {
upickle.core.RenderUtils.escapeChar(null, YamlRenderer.this.elemBuilder, s, unicode = true)
} else {
YamlRenderer.this.appendString(s.toString)
}
YamlRenderer.this.flushCharBuilder()
_out
}
}

private val outBuffer = _out.getBuffer()

override def flushCharBuilder() = {
override def flushCharBuilder(): Unit = {
elemBuilder.writeOutToIfLongerThan(_out, if (depth <= 0 || topLevel) 0 else 1000)
}

private[this] def appendString(s: String) = {
private[this] def appendString(s: String): Unit = {
val len = s.length
var i = 0
elemBuilder.ensureLength(len)
Expand All @@ -30,15 +44,6 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
}
}

private[this] def removeQuoteKey(): Unit = {
// refer to quote_keys parameter in https://jsonnet.org/ref/stdlib.html, only unquote keys
if (!quoteKeys && !afterKey) {
val key = elemBuilder.makeString()
elemBuilder.reset()
elemBuilder.appendAll(key.toCharArray, 1, key.length - 2);
}
}

override def visitString(s: CharSequence, index: Int): StringWriter = {
flushBuffer()
val len = s.length()
Expand All @@ -57,21 +62,20 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
}
depth -= 1
} else {
upickle.core.RenderUtils.escapeChar(unicodeCharBuilder, elemBuilder, s, true)
removeQuoteKey()
upickle.core.RenderUtils.escapeChar(null, elemBuilder, s, unicode=true)
}
flushCharBuilder()
_out
}

override def visitFloat64(d: Double, index: Int) = {
override def visitFloat64(d: Double, index: Int): StringWriter = {
flushBuffer()
appendString(RenderUtils.renderDouble(d))
flushCharBuilder()
_out
}

override def flushBuffer() = {
override def flushBuffer(): Unit = {
if (newlineBuffered) {
// drop space between colon and newline
elemBuilder.writeOutToIfLongerThan(_out, 0)
Expand All @@ -91,7 +95,7 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
dashBuffered = false
}

override def visitArray(length: Int, index: Int) = new ArrVisitor[StringWriter, StringWriter] {
override def visitArray(length: Int, index: Int): ArrVisitor[StringWriter, StringWriter] = new ArrVisitor[StringWriter, StringWriter] {
var empty = true
flushBuffer()

Expand All @@ -101,19 +105,19 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
}
topLevel = false

val dedentInObject = afterKey && !indentArrayInObject
private val dedentInObject = afterKey && !indentArrayInObject
afterKey = false
if (dedentInObject) depth -= 1
dashBuffered = true

def subVisitor = YamlRenderer.this
def subVisitor: Visitor[StringWriter, StringWriter] = YamlRenderer.this
def visitValue(v: StringWriter, index: Int): Unit = {
empty = false
flushBuffer()
newlineBuffered = true
dashBuffered = true
}
def visitEnd(index: Int) = {
def visitEnd(index: Int): StringWriter = {
if (!dedentInObject) depth -= 1
if (empty) {
elemBuilder.ensureLength(2)
Expand All @@ -126,16 +130,19 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
_out
}
}
override def visitObject(length: Int, index: Int) = new ObjVisitor[StringWriter, StringWriter] {

override def visitObject(length: Int, index: Int): ObjVisitor[StringWriter, StringWriter] = new ObjVisitor[StringWriter, StringWriter] {
var empty = true
flushBuffer()
if (!topLevel) depth += 1
topLevel = false

if (afterKey) newlineBuffered = true

def subVisitor = YamlRenderer.this
def visitKey(index: Int) = YamlRenderer.this
def subVisitor: Visitor[StringWriter, StringWriter] = YamlRenderer.this

def visitKey(index: Int): Visitor[StringWriter, StringWriter] = yamlKeyVisitor

def visitKeyValue(s: Any): Unit = {
empty = false
flushBuffer()
Expand All @@ -146,11 +153,13 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
afterKey = true
newlineBuffered = false
}

def visitValue(v: StringWriter, index: Int): Unit = {
newlineBuffered = true
afterKey = false
}
def visitEnd(index: Int) = {

def visitEnd(index: Int): StringWriter = {
if (empty) {
elemBuilder.ensureLength(2)
elemBuilder.append('{')
Expand All @@ -165,9 +174,23 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
}
}
object YamlRenderer{
val newlinePattern = Pattern.compile("\n")
val newlinePattern: Pattern = Pattern.compile("\n")
private val safeYamlKeyPattern = Pattern.compile("^[a-zA-Z0-9/._-]+$")
private val yamlReserved = Set("true", "false", "null", "yes", "no", "on", "off", "y", "n", ".nan",
"+.inf", "-.inf", ".inf", "null", "-", "---", "''")
private val yamlTimestampPattern = Pattern.compile("^(?:[0-9]*-){2}[0-9]*$", Pattern.CASE_INSENSITIVE)
private val yamlBinaryPattern = Pattern.compile("^[-+]?0b[0-1_]+$")
private val yamlHexPattern = Pattern.compile("[-+]?0x[0-9a-fA-F_]+")

private def isSafeBareKey(k: String) = {
val l = k.toLowerCase
!yamlReserved.contains(l) && safeYamlKeyPattern.matcher(k).matches() &&
!yamlTimestampPattern.matcher(k).matches() && !yamlBinaryPattern.matcher(k).matches() &&
!yamlHexPattern.matcher(k).matches() && (Try(l.replace("_", "").toLong).isFailure
&& Try(l.replace("_", "").toDouble).isFailure)
}

def writeIndentation(out: upickle.core.CharBuilder, n: Int) = {
def writeIndentation(out: upickle.core.CharBuilder, n: Int): Unit = {
out.ensureLength(n+1)
out.append('\n')
var i = n
Expand Down
Loading

0 comments on commit 6e2c64e

Please sign in to comment.