Skip to content

Commit

Permalink
Remove quote hacks in text/scanner based lexer.
Browse files Browse the repository at this point in the history
This lexer predated participle.Unquote() and automatically removed
quotes where really it shouldn't have. It also supported strings with
single quotes, via a very ugly hack. Neither of these work anymore.
  • Loading branch information
alecthomas committed Sep 18, 2020
1 parent 081e31b commit 4f53af9
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 92 deletions.
30 changes: 2 additions & 28 deletions lexer/text_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@ package lexer

import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"text/scanner"
"unicode/utf8"
)

// TextScannerLexer is a lexer that uses the text/scanner module.
Expand Down Expand Up @@ -95,32 +92,9 @@ func (t *textScannerLexer) Next() (Token, error) {
if t.err != nil {
return Token{}, t.err
}
return textScannerTransform(Token{
return Token{
Type: typ,
Value: text,
Pos: pos,
})
}

func textScannerTransform(token Token) (Token, error) {
// Unquote strings.
switch token.Type {
case scanner.Char:
// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
// quoted string in order to support single quoted strings.
token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
fallthrough
case scanner.String:
s, err := strconv.Unquote(token.Value)
if err != nil {
return Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
}
token.Value = s
if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
token.Type = scanner.String
}
case scanner.RawString:
token.Value = token.Value[1 : len(token.Value)-1]
}
return token, nil
}, nil
}
17 changes: 0 additions & 17 deletions lexer/text_scanner_go110_test.go

This file was deleted.

17 changes: 0 additions & 17 deletions lexer/text_scanner_go111_test.go

This file was deleted.

10 changes: 5 additions & 5 deletions lexer/text_scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ func TestLexer(t *testing.T) {
}

func TestLexString(t *testing.T) {
lexer := LexString("", `"hello\nworld"`)
lexer := LexString("", "\"hello world\"")
token, err := lexer.Next()
require.NoError(t, err)
require.Equal(t, Token{Type: scanner.String, Value: "hello\nworld", Pos: Position{Line: 1, Column: 1}}, token)
require.Equal(t, token, Token{Type: scanner.String, Value: "\"hello world\"", Pos: Position{Line: 1, Column: 1}})
}

func TestLexSingleString(t *testing.T) {
lexer := LexString("", `'hello\nworld'`)
lexer := LexString("", "`hello world`")
token, err := lexer.Next()
require.NoError(t, err)
require.Equal(t, Token{Type: scanner.String, Value: "hello\nworld", Pos: Position{Line: 1, Column: 1}}, token)
require.Equal(t, Token{Type: scanner.RawString, Value: "`hello world`", Pos: Position{Line: 1, Column: 1}}, token)
lexer = LexString("", `'\U00008a9e'`)
token, err = lexer.Next()
require.NoError(t, err)
require.Equal(t, Token{Type: scanner.Char, Value: "\U00008a9e", Pos: Position{Line: 1, Column: 1}}, token)
require.Equal(t, Token{Type: scanner.Char, Value: `'\U00008a9e'`, Pos: Position{Line: 1, Column: 1}}, token)
}

func BenchmarkTextScannerLexer(b *testing.B) {
Expand Down
11 changes: 5 additions & 6 deletions lookahead_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,21 @@ func TestIssue3Example1(t *testing.T) {
}

g := &LAT1Module{}
p := mustTestParser(t, g, participle.UseLookahead(5))
p := mustTestParser(t, g, participle.UseLookahead(5), participle.Unquote())
err := p.ParseString("", `
source_filename = "foo.c"
target datalayout = "bar"
target triple = "baz"
`, g)
require.NoError(t, err)
require.Equal(t,
g,
&LAT1Module{
Decls: []*LAT1Decl{
{SourceFilename: "foo.c"},
{DataLayout: "bar"},
{TargetTriple: "baz"},
},
})
}, g)
}

type LAT2Config struct {
Expand All @@ -59,7 +58,7 @@ type LAT2Group struct {

func TestIssue3Example2(t *testing.T) {
g := &LAT2Config{}
p := mustTestParser(t, g, participle.UseLookahead(2))
p := mustTestParser(t, g, participle.UseLookahead(2), participle.Unquote())
err := p.ParseString("", `
key = "value"
block {
Expand All @@ -68,7 +67,6 @@ func TestIssue3Example2(t *testing.T) {
`, g)
require.NoError(t, err)
require.Equal(t,
g,
&LAT2Config{
Entries: []*LAT2Entry{
{Attribute: &LAT2Attribute{Key: "key", Value: "value"}},
Expand All @@ -82,6 +80,7 @@ func TestIssue3Example2(t *testing.T) {
},
},
},
g,
)
}

Expand Down Expand Up @@ -228,7 +227,7 @@ type issue28Value struct {
}

func TestIssue28(t *testing.T) {
p := mustTestParser(t, &issue28Term{}, participle.UseLookahead(5))
p := mustTestParser(t, &issue28Term{}, participle.UseLookahead(5), participle.Unquote())

actual := &issue28Term{}
err := p.ParseString("", `"key": "value"`, actual)
Expand Down
26 changes: 23 additions & 3 deletions map.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,36 @@ func Elide(types ...string) Option {

// Apply a Mapping to all tokens coming out of a Lexer.
type mappingLexerDef struct {
lexer.Definition
l lexer.Definition
mapper Mapper
}

var _ lexer.Definition = &mappingLexerDef{}

func (m *mappingLexerDef) Symbols() map[string]rune { return m.l.Symbols() }

func (m *mappingLexerDef) LexString(filename string, s string) (lexer.Lexer, error) {
l, err := m.l.LexString(filename, s)
if err != nil {
return nil, err
}
return &mappingLexer{l, m.mapper}, nil
}

func (m *mappingLexerDef) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
l, err := m.l.LexBytes(filename, b)
if err != nil {
return nil, err
}
return &mappingLexer{l, m.mapper}, nil
}

func (m *mappingLexerDef) LexReader(filename string, r io.Reader) (lexer.Lexer, error) {
lexer, err := m.Definition.LexReader("", r)
l, err := m.l.LexReader(filename, r)
if err != nil {
return nil, err
}
return &mappingLexer{lexer, m.mapper}, nil
return &mappingLexer{l, m.mapper}, nil
}

type mappingLexer struct {
Expand Down
4 changes: 0 additions & 4 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ func Build(grammar interface{}, options ...Option) (parser *Parser, err error) {
useLookahead: 1,
}
for _, option := range options {
if option == nil {
return nil, fmt.Errorf("nil Option passed, signature has changed; " +
"if you intended to provide a custom Lexer, try participle.Build(grammar, participle.Lexer(lexer))")
}
if err = option(p); err != nil {
return nil, err
}
Expand Down
18 changes: 9 additions & 9 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ type EBNF struct {
}

func TestEBNFParser(t *testing.T) {
parser := mustTestParser(t, &EBNF{})
parser := mustTestParser(t, &EBNF{}, participle.Unquote())

expected := &EBNF{
Productions: []*Production{
Expand Down Expand Up @@ -508,9 +508,9 @@ func TestHello(t *testing.T) {
To string `@String`
}

parser := mustTestParser(t, &testHello{})
parser := mustTestParser(t, &testHello{}, participle.Unquote())

expected := &testHello{"hello", "Bobby Brown"}
expected := &testHello{"hello", `Bobby Brown`}
actual := &testHello{}
err := parser.ParseString("", `hello "Bobby Brown"`, actual)
require.NoError(t, err)
Expand Down Expand Up @@ -655,7 +655,7 @@ func TestLiteralTypeConstraint(t *testing.T) {
Literal string `@"123456":String`
}

parser := mustTestParser(t, &grammar{})
parser := mustTestParser(t, &grammar{}, participle.Unquote())

actual := &grammar{}
expected := &grammar{Literal: "123456"}
Expand All @@ -681,7 +681,7 @@ func TestStructCaptureInterface(t *testing.T) {
Capture *nestedCapture `@String`
}

parser, err := participle.Build(&grammar{})
parser, err := participle.Build(&grammar{}, participle.Unquote())
require.NoError(t, err)

actual := &grammar{}
Expand Down Expand Up @@ -711,7 +711,7 @@ func TestParseable(t *testing.T) {
Inner *parseableStruct `@@`
}

parser, err := participle.Build(&grammar{})
parser, err := participle.Build(&grammar{}, participle.Unquote())
require.NoError(t, err)

actual := &grammar{}
Expand Down Expand Up @@ -1239,16 +1239,16 @@ func TestNegationWithPattern(t *testing.T) {
EverythingMoreComplex *[]string `@!(';' String)* @';' @String`
}

p := mustTestParser(t, &grammar{})
p := mustTestParser(t, &grammar{}, participle.Unquote())
// j, err := json.MarshalIndent(p.root, "", " ")
// log.Print(j)
// log.Print(stringer(p.root))
ast := &grammar{}
err := p.ParseString("", `hello world ; 'some-str'`, ast)
err := p.ParseString("", `hello world ; "some-str"`, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", "world", ";", `some-str`}, ast.EverythingMoreComplex)

err = p.ParseString("", `hello ; world ; 'hey'`, ast)
err = p.ParseString("", `hello ; world ; "hey"`, ast)
require.NoError(t, err)
require.Equal(t, &[]string{"hello", ";", "world", ";", `hey`}, ast.EverythingMoreComplex)

Expand Down
71 changes: 68 additions & 3 deletions struct.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ package participle
import (
"fmt"
"reflect"
"strconv"
"strings"
"text/scanner"
"unicode/utf8"

"github.com/alecthomas/participle/lexer"
)
Expand All @@ -26,7 +30,7 @@ func lexStruct(s reflect.Type) (*structLexer, error) {
}
if len(slex.indexes) > 0 {
tag := fieldLexerTag(slex.Field().StructField)
slex.lexer, err = lexer.Upgrade(lexer.LexString(s.Name(), tag))
slex.lexer, err = lexer.Upgrade(newTagLexer(s.Name(), tag))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -77,7 +81,7 @@ func (s *structLexer) Peek() (lexer.Token, error) {
}
ft := s.GetField(field).StructField
tag := fieldLexerTag(ft)
lex, err = lexer.Upgrade(lexer.LexString(ft.Name, tag))
lex, err = lexer.Upgrade(newTagLexer(ft.Name, tag))
if err != nil {
return token, err
}
Expand All @@ -99,7 +103,7 @@ func (s *structLexer) Next() (lexer.Token, error) {
s.field++
ft := s.Field().StructField
tag := fieldLexerTag(ft)
s.lexer, err = lexer.Upgrade(lexer.LexString(ft.Name, tag))
s.lexer, err = lexer.Upgrade(newTagLexer(ft.Name, tag))
if err != nil {
return token, err
}
Expand Down Expand Up @@ -140,3 +144,64 @@ func collectFieldIndexes(s reflect.Type) (out [][]int, err error) {
}
return
}

// tagLexer is a Lexer based on text/scanner.Scanner
type tagLexer struct {
scanner *scanner.Scanner
filename string
err error
}

func newTagLexer(filename string, tag string) *tagLexer {
s := &scanner.Scanner{}
s.Init(strings.NewReader(tag))
lexer := &tagLexer{
filename: filename,
scanner: s,
}
lexer.scanner.Error = func(s *scanner.Scanner, msg string) {
// This is to support single quoted strings. Hacky.
if !strings.HasSuffix(msg, "char literal") {
lexer.err = fmt.Errorf("%s: %s", lexer.scanner.Pos(), msg)
}
}
return lexer
}

func (t *tagLexer) Next() (lexer.Token, error) {
typ := t.scanner.Scan()
text := t.scanner.TokenText()
pos := lexer.Position(t.scanner.Position)
pos.Filename = t.filename
if t.err != nil {
return lexer.Token{}, t.err
}
return textScannerTransform(lexer.Token{
Type: typ,
Value: text,
Pos: pos,
})
}

func textScannerTransform(token lexer.Token) (lexer.Token, error) {
// Unquote strings.
switch token.Type {
case scanner.Char:
// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
// quoted string in order to support single quoted strings.
token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
fallthrough
case scanner.String:
s, err := strconv.Unquote(token.Value)
if err != nil {
return lexer.Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
}
token.Value = s
if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
token.Type = scanner.String
}
case scanner.RawString:
token.Value = token.Value[1 : len(token.Value)-1]
}
return token, nil
}

0 comments on commit 4f53af9

Please sign in to comment.