From 97dee892069e2f019d9f1320545563f7b4e23444 Mon Sep 17 00:00:00 2001 From: Joe Eli McIlvain Date: Thu, 8 Sep 2022 11:31:52 -0700 Subject: [PATCH] Add a compilation error for invalid escapes in string literals. This helps to prevent bugs wherein the user believes they have correctly written an escape code. Prior to this commit, invalid escape code were treated as if they were literal characters, including the backslash character. See discussion in #348 --- spec/parser_spec.cr | 19 +++++++++++++++++++ src/savi/parser/builder/state.cr | 30 +++++++++++++++++++----------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/spec/parser_spec.cr b/spec/parser_spec.cr index ea2a7a73..ce2a5504 100644 --- a/spec/parser_spec.cr +++ b/spec/parser_spec.cr @@ -185,6 +185,25 @@ describe Savi::Parser do end end + it "complains when a string literal has an unknown escape character" do + source = Savi::Source.new_example <<-SOURCE + :actor Main + :new + greeting = "Hello, World\\?" + SOURCE + + expected = <<-MSG + This is an invalid escape character: + from (example):3: + greeting = "Hello, World\\?" + ^ + MSG + + expect_raises Savi::Error, expected do + Savi::Parser.parse(source) + end + end + it "handles nifty heredoc string literals" do source = Savi::Source.new_example <<-SOURCE :actor Main diff --git a/src/savi/parser/builder/state.cr b/src/savi/parser/builder/state.cr index c2c5a5e2..c6920b60 100644 --- a/src/savi/parser/builder/state.cr +++ b/src/savi/parser/builder/state.cr @@ -48,6 +48,18 @@ module Savi::Parser::Builder ) end + def pos_single_with_offset( + token : Pegmatite::Token, + offset : Int32, + ) : Source::Pos + kind, start, finish = token + + start = start + offset + finish = start + 1 + + pos({kind, start, finish}) + end + def slice(token : Pegmatite::Token) kind, start, finish = token slice(start...finish) @@ -58,11 +70,7 @@ module Savi::Parser::Builder end def slice_with_escapes(token : Pegmatite::Token) - kind, start, finish = token - slice_with_escapes(start...finish) - end - - def slice_with_escapes(range : Range) + range = token[1]...token[2] string = slice(range) reader = Char::Reader.new(string) @@ -91,7 +99,8 @@ module Savi::Parser::Builder elsif 'A' <= hex_char <= 'F' 10 + (hex_char - 'A') else - raise "invalid escape hex character: #{hex_char}" + Error.at pos_single_with_offset(token, reader.pos), + "This is an invalid escape hex character" end byte_value = 16 * byte_value + hex_value end @@ -108,7 +117,8 @@ module Savi::Parser::Builder elsif 'A' <= hex_char <= 'F' 10 + (hex_char - 'A') else - raise "invalid unicode escape hex character: #{hex_char}" + Error.at pos_single_with_offset(token, reader.pos), + "This is an invalid unicode escape hex character" end codepoint = 16 * codepoint + hex_value end @@ -126,10 +136,8 @@ module Savi::Parser::Builder reader.next_char end else - # Not a valid escape character - pass it on as a literal slash - # followed by that literal character, as if not an escape. - result << '\\' - result << reader.current_char + Error.at pos_single_with_offset(token, reader.pos), + "This is an invalid escape character" end else result << reader.current_char