diff --git a/src/parser.hpp b/src/parser.hpp index 29851e1..5160857 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -140,6 +140,28 @@ std::string parse_string(std::string_view& source, std::string& error) noexcept value |= take_hex() << 8; value |= take_hex() << 4; value |= take_hex(); + if (0xd800 <= value && value <= 0xdbff) { + // surrogate pair + if (take_one(source, error) != '\\') { + error = "expected backslash"; + return {}; + } + if (take_one(source, error) != 'u') { + error = "expected u"; + return {}; + } + int32_t value2 = 0; + value2 |= take_hex() << 12; + value2 |= take_hex() << 8; + value2 |= take_hex() << 4; + value2 |= take_hex(); + if (0xdc00 <= value2 && value2 <= 0xdfff) { + value = 0x10000 + ((value & 0x3ff) << 10) + (value2 & 0x3ff); + } else { + error = "invalid surrogate pair"; + return {}; + } + } if (!error.empty()) return {}; encode_utf8(str, value); } break; diff --git a/src/value.hpp b/src/value.hpp index 5311e38..7a45950 100644 --- a/src/value.hpp +++ b/src/value.hpp @@ -221,11 +221,12 @@ void dump_impl_string(const std::string& str, std::string& result) { case '"': result += "\\\""sv; break; case '\\': result += "\\\\"sv; break; default: { - // TODO: exceptionless dump to make alk happy - // in the meantime, this is better than creating - // an invalid json :+1: - if (c >= 0 && c < 0x20) - throw std::runtime_error("invalid string"); + if (c >= 0 && c < 0x20) { + std::array buffer; + snprintf(buffer.data(), buffer.size(), "\\u%04x", c); + result += buffer.data(); + break; + } result.push_back(c); break; } } diff --git a/test/test.cpp b/test/test.cpp index 4841ebb..176eaf2 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -260,12 +260,6 @@ TEST_CASE("Invalid json") { TEST_CASE("Invalid dump") { matjson::Value obj; using namespace std::string_literals; - // if this somehow happens (cough cough) - obj["Hello"] = "Wor\x00ld"s; - // then dump() should throw because it would create an invalid json - REQUIRE_THROWS(obj.dump()); - - obj.as_object().clear(); // no throw obj.dump(); @@ -311,4 +305,28 @@ TEST_CASE("Rvalue as_array() return") { // `auto& arr = get_json().as_array();` should fail to compile, however i can't test that auto const& arr = get_json().as_array(); REQUIRE(arr.size() == 4); +} + +TEST_CASE("Parsing unicode characters") { + auto obj = matjson::parse(R"( + { + "hello": "\u00D3l\u00E1!", + "cool": "๐Ÿ˜Ž", + "pair": "\uD83D\uDE00" + } + )"); + + REQUIRE(obj["hello"].as_string() == "ร“lรก!"); + REQUIRE(obj["cool"].as_string() == "๐Ÿ˜Ž"); + REQUIRE(obj["pair"].as_string() == "๐Ÿ˜€"); +} + +TEST_CASE("Special characters") { + auto obj = matjson::parse(R"( + { + "control": "\b\f\n\r\t\u0012 " + } + )"); + + REQUIRE(obj["control"].as_string() == "\b\f\n\r\t\x12 "); } \ No newline at end of file