From 9eda88291cac30a3aa6d69aed0ab0f147e0719db Mon Sep 17 00:00:00 2001 From: Luke Wagner Date: Mon, 10 Jun 2024 14:02:32 -0500 Subject: [PATCH] Tweak CABI string conversion to match what impls want to do --- design/mvp/CanonicalABI.md | 26 +++++++++++++------------ design/mvp/canonical-abi/definitions.py | 26 +++++++++++++------------ 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md index db0c3eac..c8e279ad 100644 --- a/design/mvp/CanonicalABI.md +++ b/design/mvp/CanonicalABI.md @@ -905,18 +905,20 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size): assert(src_code_units <= MAX_STRING_BYTE_LENGTH) ptr = cx.opts.realloc(0, 0, 1, src_code_units) trap_if(ptr + src_code_units > len(cx.opts.memory)) - encoded = src.encode('utf-8') - assert(src_code_units <= len(encoded)) - cx.opts.memory[ptr : ptr+src_code_units] = encoded[0 : src_code_units] - if src_code_units < len(encoded): - trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH) - ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size) - trap_if(ptr + worst_case_size > len(cx.opts.memory)) - cx.opts.memory[ptr+src_code_units : ptr+len(encoded)] = encoded[src_code_units : ] - if worst_case_size > len(encoded): - ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded)) - trap_if(ptr + len(encoded) > len(cx.opts.memory)) - return (ptr, len(encoded)) + for i,code_point in enumerate(src): + if ord(code_point) < 2**7: + cx.opts.memory[ptr + i] = ord(code_point) + else: + trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH) + ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size) + trap_if(ptr + worst_case_size > len(cx.opts.memory)) + encoded = src.encode('utf-8') + cx.opts.memory[ptr+i : ptr+len(encoded)] = encoded[i : ] + if worst_case_size > len(encoded): + ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded)) + trap_if(ptr + len(encoded) > len(cx.opts.memory)) + return (ptr, len(encoded)) + return (ptr, src_code_units) ``` Converting from UTF-8 to UTF-16 performs an initial worst-case size allocation diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py index 4e78ac0b..95b3fb68 100644 --- a/design/mvp/canonical-abi/definitions.py +++ b/design/mvp/canonical-abi/definitions.py @@ -704,18 +704,20 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size): assert(src_code_units <= MAX_STRING_BYTE_LENGTH) ptr = cx.opts.realloc(0, 0, 1, src_code_units) trap_if(ptr + src_code_units > len(cx.opts.memory)) - encoded = src.encode('utf-8') - assert(src_code_units <= len(encoded)) - cx.opts.memory[ptr : ptr+src_code_units] = encoded[0 : src_code_units] - if src_code_units < len(encoded): - trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH) - ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size) - trap_if(ptr + worst_case_size > len(cx.opts.memory)) - cx.opts.memory[ptr+src_code_units : ptr+len(encoded)] = encoded[src_code_units : ] - if worst_case_size > len(encoded): - ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded)) - trap_if(ptr + len(encoded) > len(cx.opts.memory)) - return (ptr, len(encoded)) + for i,code_point in enumerate(src): + if ord(code_point) < 2**7: + cx.opts.memory[ptr + i] = ord(code_point) + else: + trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH) + ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size) + trap_if(ptr + worst_case_size > len(cx.opts.memory)) + encoded = src.encode('utf-8') + cx.opts.memory[ptr+i : ptr+len(encoded)] = encoded[i : ] + if worst_case_size > len(encoded): + ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded)) + trap_if(ptr + len(encoded) > len(cx.opts.memory)) + return (ptr, len(encoded)) + return (ptr, src_code_units) def store_utf8_to_utf16(cx, src, src_code_units): worst_case_size = 2 * src_code_units