From d1803fc29930af0bf4d29e5c402afe8e70e0a8f5 Mon Sep 17 00:00:00 2001 From: scnace Date: Mon, 12 Aug 2024 19:01:31 +0800 Subject: [PATCH 1/2] yaml: support multi-directives YAML Per to SPEC: https://yaml.org/spec/1.2.2 , three dashes should be valid and worked as the doc splitor. Signed-off-by: scnace --- yaml.go | 85 ++++++++++++++++++- yaml_test.go | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 313 insertions(+), 2 deletions(-) diff --git a/yaml.go b/yaml.go index fc10246..4b2d0b0 100644 --- a/yaml.go +++ b/yaml.go @@ -24,7 +24,8 @@ import ( "reflect" "strconv" - "sigs.k8s.io/yaml/goyaml.v2" + yaml "sigs.k8s.io/yaml/goyaml.v2" + yamlv3 "sigs.k8s.io/yaml/goyaml.v3" ) // Marshal marshals obj into JSON using stdlib json.Marshal, and then converts JSON to YAML using JSONToYAML (see that method for more reference) @@ -125,6 +126,52 @@ func JSONToYAML(j []byte) ([]byte, error) { return yamlBytes, nil } +type jsonStreamReader struct { + r io.Reader + buf *bytes.Buffer +} + +func (r *jsonStreamReader) Read(p []byte) (int, error) { + n, err := r.r.Read(p) + if n > 0 { + r.buf.Write(p[:n]) + } + return n, err +} + +func (r *jsonStreamReader) BytesRead() []byte { + return r.buf.Bytes() +} + +func MultiJSONToYAML(j []byte) ([]byte, error) { + sr := &jsonStreamReader{ + r: bytes.NewReader(j), + buf: bytes.NewBuffer(nil), + } + dec := json.NewDecoder(sr) + jsonBytes := bytes.NewBuffer(nil) + for { + var noOp interface{} + err := dec.Decode(&noOp) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + var jsonObj interface{} + if err := yamlv3.Unmarshal(sr.BytesRead(), &jsonObj); err != nil { + return nil, err + } + yb, err := yamlv3.Marshal(jsonObj) + if err != nil { + return nil, err + } + jsonBytes.Write(yb) + } + return jsonBytes.Bytes(), nil +} + // YAMLToJSON converts YAML to JSON. Since JSON is a subset of YAML, // passing JSON through this method should be a no-op. // @@ -153,6 +200,42 @@ func YAMLToJSONStrict(y []byte) ([]byte, error) { return yamlToJSONTarget(y, nil, yaml.UnmarshalStrict) } +// MultiYAMLToJSON converts YAML to JSON using the go-yaml v3 library using the decoding-style. +// Which supports more YAML features than the yamlv3.Unmarshal. Including parsing the multi-document YAML. +// Also see [go-yaml #232](https://github.com/go-yaml/yaml/issues/232) for details +func MultiYAMLToJSON(y []byte) ([]byte, error) { + dec := yamlv3.NewDecoder(bytes.NewReader(y)) + var jsonObjs []interface{} + for { + var yamlObj interface{} + if err := dec.Decode(&yamlObj); err == io.EOF { + break + } else if err != nil { + return nil, err + } + // YAML objects are not completely compatible with JSON objects (e.g. you + // can have non-string keys in YAML). So, convert the YAML-compatible object + // to a JSON-compatible object, failing with an error if irrecoverable + // incompatibilties happen along the way. + jsonObj, err := convertToJSONableObject(yamlObj, nil) + if err != nil { + return nil, err + } + jsonObjs = append(jsonObjs, jsonObj) + } + // For compatibility with single YAML documents + // we should always trust single YAML as a single JSON object. + if len(jsonObjs) == 1 { + return json.Marshal(jsonObjs[0]) + } + // Convert this object to JSON and return the data. + jsonBytes, err := json.Marshal(jsonObjs) + if err != nil { + return nil, err + } + return jsonBytes, nil +} + func yamlToJSONTarget(yamlBytes []byte, jsonTarget *reflect.Value, unmarshalFn func([]byte, interface{}) error) ([]byte, error) { // Convert the YAML to an object. var yamlObj interface{} diff --git a/yaml_test.go b/yaml_test.go index bcabc79..dfae5d5 100644 --- a/yaml_test.go +++ b/yaml_test.go @@ -42,6 +42,7 @@ type errorType int const ( noErrorsType errorType = 0 fatalErrorsType errorType = 1 << iota + notMatchedType ) type unmarshalTestCase struct { @@ -122,9 +123,17 @@ var ( funcYAMLToJSONStrict testYAMLToJSONFunc = func(yamlBytes []byte) ([]byte, error) { return YAMLToJSONStrict(yamlBytes) } + + funcMultiYAMLv3ToJSON testYAMLToJSONFunc = func(yamlBytes []byte) ([]byte, error) { + return MultiYAMLToJSON(yamlBytes) + } ) -func testYAMLToJSON(t *testing.T, f testYAMLToJSONFunc, tests map[string]yamlToJSONTestcase) { +func testYAMLToJSON( + t *testing.T, + f testYAMLToJSONFunc, + tests map[string]yamlToJSONTestcase, +) { for testName, test := range tests { t.Run(fmt.Sprintf("%s_YAMLToJSON", testName), func(t *testing.T) { // Convert Yaml to Json @@ -170,6 +179,56 @@ func testYAMLToJSON(t *testing.T, f testYAMLToJSONFunc, tests map[string]yamlToJ } } +func testMultiYAMLToJSON( + t *testing.T, + f testYAMLToJSONFunc, + tests map[string]yamlToJSONTestcase, +) { + for testName, test := range tests { + t.Run(fmt.Sprintf("%s_MultiYAMLToJSON", testName), func(t *testing.T) { + // Convert Yaml to Json + jsonBytes, err := f([]byte(test.yaml)) + if err != nil && test.err == noErrorsType { + t.Errorf("Failed to convert YAML to JSON, yamlv3: `%s`, err: %v", test.yaml, err) + } + if err == nil && test.err&fatalErrorsType != 0 { + t.Errorf("expected a fatal error, but no fatal error was returned, yaml: `%s`", test.yaml) + } + + if test.err&fatalErrorsType != 0 { + // Don't check output if error is fatal + return + } + + // Check it against the expected output. + if string(jsonBytes) != test.json && (test.err != notMatchedType) { + t.Errorf("Failed to convert YAML to JSON, yaml: `%s`, expected json `%s`, got `%s`", test.yaml, test.json, string(jsonBytes)) + } + }) + + t.Run(fmt.Sprintf("%s_MultiJSONToYAML", testName), func(t *testing.T) { + // Convert JSON to YAML + yamlBytes, err := MultiJSONToYAML([]byte(test.json)) + if err != nil { + t.Errorf("Failed to convert JSON to YAML, json: `%s`, err: %v", test.json, err) + } + + // Set the string that we will compare the reversed output to. + correctYamlString := test.yaml + + // If a special reverse string was specified, use that instead. + if test.yamlReverseOverwrite != nil { + correctYamlString = *test.yamlReverseOverwrite + } + + // Check it against the expected output. + if string(yamlBytes) != correctYamlString { + t.Errorf("Failed to convert JSON to YAML, json: `%s`, expected yaml `%s`, got `%s`", test.json, correctYamlString, string(yamlBytes)) + } + }) + } +} + /* Start tests */ type MarshalTest struct { @@ -751,6 +810,136 @@ func TestYAMLToJSON(t *testing.T) { }) } +func TestYAMLv3ToJSON(t *testing.T) { + v3Tests := map[string]yamlToJSONTestcase{ + "string value": { + yaml: "t: a\n", + json: `{"t":"a"}`, + }, + "null value": { + yaml: "t: null\n", + json: `{"t":null}`, + }, + "boolean value": { + yaml: "t: True\n", + json: `{"t":true}`, + yamlReverseOverwrite: strPtr("t: true\n"), + }, + "boolean value (no)": { + yaml: "t: \"no\"\n", + json: `{"t":"no"}`, + }, + "integer value (2^53 + 1)": { + yaml: "t: 9007199254740993\n", + json: `{"t":9007199254740993}`, + yamlReverseOverwrite: strPtr("t: 9007199254740993\n"), + }, + "integer value (1000000000000000000000000000000000000)": { + yaml: "t: 1000000000000000000000000000000000000\n", + json: `{"t":1e+36}`, + yamlReverseOverwrite: strPtr("t: 1e+36\n"), + }, + "line-wrapped string value": { + yaml: "t: this is very long line with spaces and it must be longer than 80 so we will repeat\n that it must be longer that 80\n", + json: `{"t":"this is very long line with spaces and it must be longer than 80 so we will repeat that it must be longer that 80"}`, + yamlReverseOverwrite: strPtr("t: this is very long line with spaces and it must be longer than 80 so we will repeat that it must be longer that 80\n"), + }, + "empty yaml value": { + yaml: "t: ", + json: `{"t":null}`, + yamlReverseOverwrite: strPtr("t: null\n"), + }, + "boolean key": { + yaml: "True: a", + json: `{"true":"a"}`, + yamlReverseOverwrite: strPtr("\"true\": a\n"), + }, + "boolean key (no)": { + yaml: "no: a", + json: `{"no":"a"}`, + yamlReverseOverwrite: strPtr("\"no\": a\n"), + }, + "integer key": { + yaml: "1: a", + json: `{"1":"a"}`, + yamlReverseOverwrite: strPtr("\"1\": a\n"), + }, + "float key": { + yaml: "1.2: a", + json: `{"1.2":"a"}`, + yamlReverseOverwrite: strPtr("\"1.2\": a\n"), + }, + "large integer key": { + yaml: "1000000000000000000000000000000000000: a", + json: `{"1e+36":"a"}`, + yamlReverseOverwrite: strPtr("\"1e+36\": a\n"), + }, + "large integer key (scientific notation)": { + yaml: "1e+36: a", + json: `{"1e+36":"a"}`, + yamlReverseOverwrite: strPtr("\"1e+36\": a\n"), + }, + "string key (large integer as string)": { + yaml: "\"1e+36\": a\n", + json: `{"1e+36":"a"}`, + }, + "string key (float as string)": { + yaml: "\"1.2\": a\n", + json: `{"1.2":"a"}`, + }, + "array": { + yaml: "- t: a\n", + json: `[{"t":"a"}]`, + }, + "nested struct array": { + yaml: "- t: a\n- t:\n b: 1\n c: 2\n", + json: `[{"t":"a"},{"t":{"b":1,"c":2}}]`, + }, + "nested struct array (json notation)": { + yaml: `[{t: a}, {t: {b: 1, c: 2}}]`, + json: `[{"t":"a"},{"t":{"b":1,"c":2}}]`, + yamlReverseOverwrite: strPtr("- t: a\n- t:\n b: 1\n c: 2\n"), + }, + "empty struct value": { + yaml: "- t: ", + json: `[{"t":null}]`, + yamlReverseOverwrite: strPtr("- t: null\n"), + }, + "null struct value": { + yaml: "- t: null\n", + json: `[{"t":null}]`, + }, + "binary data": { + yaml: "a: !!binary gIGC", + json: `{"a":"\ufffd\ufffd\ufffd"}`, + yamlReverseOverwrite: strPtr("a: \ufffd\ufffd\ufffd\n"), + }, + + // Cases that should produce errors. + "~ key": { + yaml: "~: a", + json: `{"null":"a"}`, + yamlReverseOverwrite: strPtr("\"null\": a\n"), + err: fatalErrorsType, + }, + "null key": { + yaml: "null: a", + json: `{"null":"a"}`, + yamlReverseOverwrite: strPtr("\"null\": a\n"), + err: fatalErrorsType, + }, + "multi-directives": { + yaml: "a: b\n---\nc: d\n", + json: `[{"a":"b"},{"c":"d"}]`, + yamlReverseOverwrite: strPtr("- a: b\n- c: d\n"), + }, + } + + t.Run("YAMLv3ToJSON", func(t *testing.T) { + testMultiYAMLToJSON(t, funcMultiYAMLv3ToJSON, v3Tests) + }) +} + func TestYAMLToJSONStrictFails(t *testing.T) { tests := map[string]yamlToJSONTestcase{ // expect YAMLtoJSON to pass on duplicate field names @@ -773,6 +962,45 @@ func TestYAMLToJSONStrictFails(t *testing.T) { } testYAMLToJSON(t, funcYAMLToJSONStrict, failTests) }) + + t.Run("YAMLv3ToJSON", func(t *testing.T) { + failTests := map[string]yamlToJSONTestcase{} + for name, test := range tests { + test.err = fatalErrorsType + failTests[name] = test + } + testMultiYAMLToJSON(t, funcMultiYAMLv3ToJSON, failTests) + }) +} + +func TestMultiYAMLToJSON(t *testing.T) { + tests := map[string]yamlToJSONTestcase{ + "multi-directives": { + yaml: "---\n a: b\n---\n c: d\n", + json: `[{"a":"b"},{"c":"d"}]`, + yamlReverseOverwrite: strPtr("- a: b\n- c: d\n"), + }, + } + t.Run("YAMLv3ToJSON", func(t *testing.T) { + testMultiYAMLToJSON(t, funcMultiYAMLv3ToJSON, tests) + }) + + t.Run("YAMLToJSON", func(t *testing.T) { + for name, test := range tests { + test.err = notMatchedType + tests[name] = test + } + testMultiYAMLToJSON(t, funcYAMLToJSON, tests) + }) + + t.Run("YAMLToJSONStrict", func(t *testing.T) { + failTests := map[string]yamlToJSONTestcase{} + for name, test := range tests { + test.err = notMatchedType + failTests[name] = test + } + testMultiYAMLToJSON(t, funcYAMLToJSONStrict, failTests) + }) } func TestJSONObjectToYAMLObject(t *testing.T) { From c5cf05958edcd4c8ef3cde3bc4f4db411307005f Mon Sep 17 00:00:00 2001 From: scnace Date: Thu, 15 Aug 2024 16:16:25 +0800 Subject: [PATCH 2/2] yaml: ignore dashes for multi-directives --- yaml.go | 37 ++++++++++++++++++++++++++++++------- yaml_test.go | 17 +++++++++++------ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/yaml.go b/yaml.go index 4b2d0b0..b3fa157 100644 --- a/yaml.go +++ b/yaml.go @@ -216,26 +216,49 @@ func MultiYAMLToJSON(y []byte) ([]byte, error) { // YAML objects are not completely compatible with JSON objects (e.g. you // can have non-string keys in YAML). So, convert the YAML-compatible object // to a JSON-compatible object, failing with an error if irrecoverable - // incompatibilties happen along the way. + // incompatibilities happen along the way. jsonObj, err := convertToJSONableObject(yamlObj, nil) if err != nil { return nil, err } jsonObjs = append(jsonObjs, jsonObj) } - // For compatibility with single YAML documents - // we should always trust single YAML as a single JSON object. - if len(jsonObjs) == 1 { - return json.Marshal(jsonObjs[0]) + // since yaml doesn't allow define the same node key as well as json + // so we can merge the json objects into one + // which means we can easily handle the new directives with or without the dashes (both are valid per RFC) + unaryJSON, err := mergeJSONObjects(jsonObjs...) + if err != nil { + return nil, err } - // Convert this object to JSON and return the data. - jsonBytes, err := json.Marshal(jsonObjs) + jsonBytes, err := json.Marshal(unaryJSON) if err != nil { return nil, err } return jsonBytes, nil } +// mergeJSONObjects merges multiple JSON objects into a single JSON object. +func mergeJSONObjects(jsonObjs ...interface{}) (interface{}, error) { + if len(jsonObjs) == 0 { + return nil, nil + } + if len(jsonObjs) == 1 { + return jsonObjs[0], nil + } + merged := make(map[string]interface{}) + for _, jsonObj := range jsonObjs { + switch typedJSONObj := jsonObj.(type) { + case map[string]interface{}: + for k, v := range typedJSONObj { + merged[k] = v + } + default: + return nil, fmt.Errorf("unsupported JSON object type: %T", jsonObj) + } + } + return merged, nil +} + func yamlToJSONTarget(yamlBytes []byte, jsonTarget *reflect.Value, unmarshalFn func([]byte, interface{}) error) ([]byte, error) { // Convert the YAML to an object. var yamlObj interface{} diff --git a/yaml_test.go b/yaml_test.go index dfae5d5..949f85d 100644 --- a/yaml_test.go +++ b/yaml_test.go @@ -930,8 +930,8 @@ func TestYAMLv3ToJSON(t *testing.T) { }, "multi-directives": { yaml: "a: b\n---\nc: d\n", - json: `[{"a":"b"},{"c":"d"}]`, - yamlReverseOverwrite: strPtr("- a: b\n- c: d\n"), + json: `{"a":"b","c":"d"}`, + yamlReverseOverwrite: strPtr("a: b\nc: d\n"), }, } @@ -975,10 +975,15 @@ func TestYAMLToJSONStrictFails(t *testing.T) { func TestMultiYAMLToJSON(t *testing.T) { tests := map[string]yamlToJSONTestcase{ - "multi-directives": { - yaml: "---\n a: b\n---\n c: d\n", - json: `[{"a":"b"},{"c":"d"}]`, - yamlReverseOverwrite: strPtr("- a: b\n- c: d\n"), + "multi-directives-with-dashes": { + yaml: "---\na: b\n---\nc: d\n", + json: `{"a":"b","c":"d"}`, + yamlReverseOverwrite: strPtr("a: b\nc: d\n"), + }, + "multi-directives-without-dashes": { + yaml: "a: b\nc: d\n", + json: `{"a":"b","c":"d"}`, + yamlReverseOverwrite: strPtr("a: b\nc: d\n"), }, } t.Run("YAMLv3ToJSON", func(t *testing.T) {