Skip to content

Commit

Permalink
Use pure-regex implementation to validate yaml keys
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenamar-db committed Dec 12, 2024
1 parent 7741786 commit f64bc60
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 142 deletions.
1 change: 1 addition & 0 deletions sjsonnet/src/sjsonnet/Std.scala
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ class Std {
q.removeFirst().force match {
case v: Val.Arr => v.asLazyArray.reverseIterator.foreach(q.push)
case s: Val.Str => out.write(s.value)
case _ =>
}
}
Val.Str(pos, out.toString)
Expand Down
15 changes: 10 additions & 5 deletions sjsonnet/src/sjsonnet/YamlRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,21 @@ object YamlRenderer{
private val safeYamlKeyPattern = Pattern.compile("^[a-zA-Z0-9/._-]+$")
private val yamlReserved = Set("true", "false", "null", "yes", "no", "on", "off", "y", "n", ".nan",
"+.inf", "-.inf", ".inf", "null", "-", "---", "''")
private val yamlTimestampPattern = Pattern.compile("^(?:[0-9]*-){2}[0-9]*$", Pattern.CASE_INSENSITIVE)
private val yamlTimestampPattern = Pattern.compile("^(?:[0-9]*-){2}[0-9]*$")
private val yamlBinaryPattern = Pattern.compile("^[-+]?0b[0-1_]+$")
private val yamlHexPattern = Pattern.compile("[-+]?0x[0-9a-fA-F_]+")
private val yamlFloatPattern = Pattern.compile( "^-?([0-9_]*)*(\\.[0-9_]*)?(e[-+][0-9_]+)?$" )
private val yamlIntPattern = Pattern.compile("^[-+]?[0-9_]+$")

private def isSafeBareKey(k: String) = {
val l = k.toLowerCase
!yamlReserved.contains(l) && safeYamlKeyPattern.matcher(k).matches() &&
!yamlTimestampPattern.matcher(k).matches() && !yamlBinaryPattern.matcher(k).matches() &&
!yamlHexPattern.matcher(k).matches() && (Try(l.replace("_", "").toLong).isFailure
&& Try(l.replace("_", "").toDouble).isFailure)
!yamlReserved.contains(l) &&
safeYamlKeyPattern.matcher(k).matches() &&
!yamlTimestampPattern.matcher(l).matches() &&
!yamlBinaryPattern.matcher(k).matches() &&
!yamlHexPattern.matcher(k).matches() &&
!yamlFloatPattern.matcher(l).matches() &&
!yamlIntPattern.matcher(l).matches()
}

def writeIndentation(out: upickle.core.CharBuilder, n: Int): Unit = {
Expand Down
272 changes: 135 additions & 137 deletions sjsonnet/test/resources/test_suite/stdlib_native.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -1078,144 +1078,142 @@ std.assertEqual(
|||
) &&

// Scala native behaves differently from JVM regarding numerical separator like _.
// so for now, this test is disabled.
//std.assertEqual(
// std.manifestYamlDoc(bare_yaml_test, quote_keys=false) + '\n',
// |||
// "": "empty key"
// "+.inf": "positive infinity"
// "+685_230": "decimal"
// "-": "invalid bare key"
// "---": "triple dash key"
// "-.inf": "negative infinity"
// "-0.1_0_0": "negative float"
// -0B1010_0111_0100_1010_1110: "BARE_KEY"
// "-0b1010_0111_0100_1010_1110": "binary"
// "-0x_0A_74_AE": "negative hexadecimal"
// "-190:20:30": "negative sexagesimal"
// "-190:20:30.15": "negative sexagesimal"
// "-1_0": "negative integer"
// "-6.8523015e+5": "negative canonical"
// "-685.230_15E-03": "negative w/ negative exponential"
// "-685.230_15e+03": "negative exponential"
// "-685.230_15e-03": "negative w/ negative exponential"
// "-685_230.15": "negative fixed"
// ".NaN": "not a number"
// ".inf": "positive infinity"
// "02472256": "octal"
// 0X_0a_74_ae: "BARE_KEY"
// "0b1010_0111_0100_1010_1110": "binary"
// "0x_0A_74_AE": "hexadecimal"
// 1-234-567-8901: "BARE_KEY"
// "190:20:30": "sexagesimal"
// "190:20:30.15": "sexagesimal"
// 192.168.0.1: "BARE_KEY"
// "2001-12-14 21:59:43.10 -5": "space separated"
// "2001-12-14t21:59:43.10-05:00": "valid iso8601"
// "2001-12-15 2:59:43.10": "no time zone (Z)"
// "2001-12-15T02:59:43.1Z": "canonical"
// "2002-12-14": "date"
// "6.8523015e+5": "canonical"
// "6.8523015e-5": "canonical"
// "685.230_15e+03": "exponential"
// "685230": "canonical"
// "685_230.15": "fixed"
// "N": "boolean false"
// "NO": "boolean false"
// "NULL": "null word capital"
// "Null": "null word"
// "OFF": "boolean false"
// "On": "boolean true"
// "True": "boolean true"
// "Yes": "boolean true"
// __-0B1010_0111_0100_1010_1110: "BARE_KEY"
// __-0X_0a_74_ae: "BARE_KEY"
// b: "BARE_KEY"
// jsonnet.org/k8s-label-like: "BARE_KEY"
// just-letters-dashes: "BARE_KEY"
// just_letters_underscores: "BARE_KEY"
// "n": "boolean false"
// "null": "null word"
// "off": "boolean false"
// "on": "boolean true"
// "true": "boolean true"
// x: "BARE_KEY"
// "y": "boolean true"
// "yes": "boolean true"
// "~": "null key"
// |||
//) &&
std.assertEqual(
std.manifestYamlDoc(bare_yaml_test, quote_keys=false) + '\n',
|||
"": "empty key"
"+.inf": "positive infinity"
"+685_230": "decimal"
"-": "invalid bare key"
"---": "triple dash key"
"-.inf": "negative infinity"
"-0.1_0_0": "negative float"
-0B1010_0111_0100_1010_1110: "BARE_KEY"
"-0b1010_0111_0100_1010_1110": "binary"
"-0x_0A_74_AE": "negative hexadecimal"
"-190:20:30": "negative sexagesimal"
"-190:20:30.15": "negative sexagesimal"
"-1_0": "negative integer"
"-6.8523015e+5": "negative canonical"
"-685.230_15E-03": "negative w/ negative exponential"
"-685.230_15e+03": "negative exponential"
"-685.230_15e-03": "negative w/ negative exponential"
"-685_230.15": "negative fixed"
".NaN": "not a number"
".inf": "positive infinity"
"02472256": "octal"
0X_0a_74_ae: "BARE_KEY"
"0b1010_0111_0100_1010_1110": "binary"
"0x_0A_74_AE": "hexadecimal"
1-234-567-8901: "BARE_KEY"
"190:20:30": "sexagesimal"
"190:20:30.15": "sexagesimal"
192.168.0.1: "BARE_KEY"
"2001-12-14 21:59:43.10 -5": "space separated"
"2001-12-14t21:59:43.10-05:00": "valid iso8601"
"2001-12-15 2:59:43.10": "no time zone (Z)"
"2001-12-15T02:59:43.1Z": "canonical"
"2002-12-14": "date"
"6.8523015e+5": "canonical"
"6.8523015e-5": "canonical"
"685.230_15e+03": "exponential"
"685230": "canonical"
"685_230.15": "fixed"
"N": "boolean false"
"NO": "boolean false"
"NULL": "null word capital"
"Null": "null word"
"OFF": "boolean false"
"On": "boolean true"
"True": "boolean true"
"Yes": "boolean true"
__-0B1010_0111_0100_1010_1110: "BARE_KEY"
__-0X_0a_74_ae: "BARE_KEY"
b: "BARE_KEY"
jsonnet.org/k8s-label-like: "BARE_KEY"
just-letters-dashes: "BARE_KEY"
just_letters_underscores: "BARE_KEY"
"n": "boolean false"
"null": "null word"
"off": "boolean false"
"on": "boolean true"
"true": "boolean true"
x: "BARE_KEY"
"y": "boolean true"
"yes": "boolean true"
"~": "null key"
|||
) &&

//std.assertEqual(
// std.manifestYamlStream([bare_yaml_quoted, bare_yaml_unquoted], quote_keys=false),
// |||
// ---
// "": "empty key"
// "+.inf": "positive infinity"
// "+685_230": "decimal"
// "-": "invalid bare key"
// "---": "triple dash key"
// "-.inf": "negative infinity"
// "-0.1_0_0": "negative float"
// "-0b1010_0111_0100_1010_1110": "binary"
// "-0x_0A_74_AE": "negative hexadecimal"
// "-190:20:30": "negative sexagesimal"
// "-190:20:30.15": "negative sexagesimal"
// "-1_0": "negative integer"
// "-6.8523015e+5": "negative canonical"
// "-685.230_15E-03": "negative w/ negative exponential"
// "-685.230_15e+03": "negative exponential"
// "-685.230_15e-03": "negative w/ negative exponential"
// "-685_230.15": "negative fixed"
// ".NaN": "not a number"
// ".inf": "positive infinity"
// "02472256": "octal"
// "0b1010_0111_0100_1010_1110": "binary"
// "0x_0A_74_AE": "hexadecimal"
// "190:20:30": "sexagesimal"
// "190:20:30.15": "sexagesimal"
// "2001-12-14 21:59:43.10 -5": "space separated"
// "2001-12-14t21:59:43.10-05:00": "valid iso8601"
// "2001-12-15 2:59:43.10": "no time zone (Z)"
// "2001-12-15T02:59:43.1Z": "canonical"
// "2002-12-14": "date"
// "6.8523015e+5": "canonical"
// "6.8523015e-5": "canonical"
// "685.230_15e+03": "exponential"
// "685230": "canonical"
// "685_230.15": "fixed"
// "N": "boolean false"
// "NO": "boolean false"
// "NULL": "null word capital"
// "Null": "null word"
// "OFF": "boolean false"
// "On": "boolean true"
// "True": "boolean true"
// "Yes": "boolean true"
// "n": "boolean false"
// "null": "null word"
// "off": "boolean false"
// "on": "boolean true"
// "true": "boolean true"
// "y": "boolean true"
// "yes": "boolean true"
// "~": "null key"
// ---
// -0B1010_0111_0100_1010_1110: "BARE_KEY"
// 0X_0a_74_ae: "BARE_KEY"
// 1-234-567-8901: "BARE_KEY"
// 192.168.0.1: "BARE_KEY"
// __-0B1010_0111_0100_1010_1110: "BARE_KEY"
// __-0X_0a_74_ae: "BARE_KEY"
// b: "BARE_KEY"
// jsonnet.org/k8s-label-like: "BARE_KEY"
// just-letters-dashes: "BARE_KEY"
// just_letters_underscores: "BARE_KEY"
// x: "BARE_KEY"
// ...
// |||
//) &&
std.assertEqual(
std.manifestYamlStream([bare_yaml_quoted, bare_yaml_unquoted], quote_keys=false),
|||
---
"": "empty key"
"+.inf": "positive infinity"
"+685_230": "decimal"
"-": "invalid bare key"
"---": "triple dash key"
"-.inf": "negative infinity"
"-0.1_0_0": "negative float"
"-0b1010_0111_0100_1010_1110": "binary"
"-0x_0A_74_AE": "negative hexadecimal"
"-190:20:30": "negative sexagesimal"
"-190:20:30.15": "negative sexagesimal"
"-1_0": "negative integer"
"-6.8523015e+5": "negative canonical"
"-685.230_15E-03": "negative w/ negative exponential"
"-685.230_15e+03": "negative exponential"
"-685.230_15e-03": "negative w/ negative exponential"
"-685_230.15": "negative fixed"
".NaN": "not a number"
".inf": "positive infinity"
"02472256": "octal"
"0b1010_0111_0100_1010_1110": "binary"
"0x_0A_74_AE": "hexadecimal"
"190:20:30": "sexagesimal"
"190:20:30.15": "sexagesimal"
"2001-12-14 21:59:43.10 -5": "space separated"
"2001-12-14t21:59:43.10-05:00": "valid iso8601"
"2001-12-15 2:59:43.10": "no time zone (Z)"
"2001-12-15T02:59:43.1Z": "canonical"
"2002-12-14": "date"
"6.8523015e+5": "canonical"
"6.8523015e-5": "canonical"
"685.230_15e+03": "exponential"
"685230": "canonical"
"685_230.15": "fixed"
"N": "boolean false"
"NO": "boolean false"
"NULL": "null word capital"
"Null": "null word"
"OFF": "boolean false"
"On": "boolean true"
"True": "boolean true"
"Yes": "boolean true"
"n": "boolean false"
"null": "null word"
"off": "boolean false"
"on": "boolean true"
"true": "boolean true"
"y": "boolean true"
"yes": "boolean true"
"~": "null key"
---
-0B1010_0111_0100_1010_1110: "BARE_KEY"
0X_0a_74_ae: "BARE_KEY"
1-234-567-8901: "BARE_KEY"
192.168.0.1: "BARE_KEY"
__-0B1010_0111_0100_1010_1110: "BARE_KEY"
__-0X_0a_74_ae: "BARE_KEY"
b: "BARE_KEY"
jsonnet.org/k8s-label-like: "BARE_KEY"
just-letters-dashes: "BARE_KEY"
just_letters_underscores: "BARE_KEY"
x: "BARE_KEY"
...
|||
) &&

std.assertEqual(
std.manifestYamlStream([some_json, some_json, {}, [], 3, '"']),
Expand Down

0 comments on commit f64bc60

Please sign in to comment.