utf8 support
This commit is contained in:
parent
ece31b11e0
commit
a16026b364
|
@ -1,6 +1,7 @@
|
||||||
import strutils
|
import strutils
|
||||||
import tables
|
import tables
|
||||||
import strformat
|
import strformat
|
||||||
|
import unicode
|
||||||
|
|
||||||
type
|
type
|
||||||
Scanner* = ref object
|
Scanner* = ref object
|
||||||
|
@ -33,23 +34,28 @@ proc debugPrint*(token: Token) =
|
||||||
proc isAtEnd(scanner: Scanner): bool =
|
proc isAtEnd(scanner: Scanner): bool =
|
||||||
scanner.current > scanner.source.high
|
scanner.current > scanner.source.high
|
||||||
|
|
||||||
proc advance(scanner: Scanner): char =
|
proc advance(scanner: Scanner): Rune =
|
||||||
scanner.current.inc
|
scanner.source.fastRuneAt(scanner.current, result, doInc = true)
|
||||||
scanner.source[scanner.current - 1]
|
|
||||||
|
|
||||||
proc peek(scanner: Scanner): char =
|
proc peek(scanner: Scanner): Rune =
|
||||||
if scanner.isAtEnd():
|
if scanner.isAtEnd():
|
||||||
'\0'
|
return "\0".runeAt(0)
|
||||||
else:
|
else:
|
||||||
scanner.source[scanner.current]
|
scanner.source.fastRuneAt(scanner.current, result, doInc = false)
|
||||||
|
|
||||||
proc peekNext(scanner: Scanner): char =
|
proc peekNext(scanner: Scanner): Rune =
|
||||||
if scanner.current < scanner.source.high:
|
if scanner.current < scanner.source.high:
|
||||||
scanner.source[scanner.current + 1]
|
scanner.source.fastRuneAt(scanner.current + 1, result, doInc = false)
|
||||||
else:
|
else:
|
||||||
'\0'
|
return "\0".runeAt(0)
|
||||||
|
|
||||||
proc match(scanner: Scanner, exp: char): bool =
|
template `==`(l: char, r: Rune): bool =
|
||||||
|
($l).runeAt(0) == r
|
||||||
|
|
||||||
|
template `==`(l: Rune, r: char): bool =
|
||||||
|
($r).runeAt(0) == l
|
||||||
|
|
||||||
|
proc match(scanner: Scanner, exp: char | Rune): bool =
|
||||||
if scanner.peek() == exp:
|
if scanner.peek() == exp:
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
true
|
true
|
||||||
|
@ -69,9 +75,18 @@ proc errorToken(scanner: Scanner, msg: string): Token =
|
||||||
result.text = msg
|
result.text = msg
|
||||||
result.line = scanner.line
|
result.line = scanner.line
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
proc toChar(r: Rune): char =
|
||||||
|
## use only for matching runes in case statements
|
||||||
|
if r.size() > 1:
|
||||||
|
char(255) # never match this
|
||||||
|
else:
|
||||||
|
($r)[0]
|
||||||
|
|
||||||
proc skipWhitespace(scanner: Scanner) =
|
proc skipWhitespace(scanner: Scanner) =
|
||||||
while true:
|
while true:
|
||||||
let c = scanner.peek()
|
let c = scanner.peek().toChar()
|
||||||
case c:
|
case c:
|
||||||
of {' ', '\r', '\t'}:
|
of {' ', '\r', '\t'}:
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
|
@ -80,15 +95,29 @@ proc skipWhitespace(scanner: Scanner) =
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
of '/':
|
of '/':
|
||||||
if scanner.peekNext() == '/':
|
if scanner.peekNext() == '/':
|
||||||
while not scanner.isAtEnd() and scanner.peek != '\n' :
|
while not scanner.isAtEnd() and scanner.peek().toChar() != '\n' :
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
|
elif scanner.peekNext() == '*':
|
||||||
|
var depth = 1
|
||||||
|
while not scanner.isAtEnd():
|
||||||
|
discard scanner.advance()
|
||||||
|
if scanner.peek().toChar() == '/' and scanner.peekNext().toChar() == '*':
|
||||||
|
depth.inc
|
||||||
|
discard scanner.advance()
|
||||||
|
discard scanner.advance()
|
||||||
|
if scanner.peek().toChar() == '*' and scanner.peekNext().toChar() == '/':
|
||||||
|
depth.dec
|
||||||
|
discard scanner.advance()
|
||||||
|
discard scanner.advance()
|
||||||
|
if depth == 0:
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
proc scanString(scanner: Scanner): Token =
|
proc scanString(scanner: Scanner): Token =
|
||||||
while not scanner.isAtEnd() and scanner.peek() != '\"' :
|
while not scanner.isAtEnd() and scanner.peek().toChar() != '\"' :
|
||||||
if scanner.peek() == '\n':
|
if scanner.peek() == '\n':
|
||||||
scanner.line.inc
|
scanner.line.inc
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
|
@ -100,12 +129,12 @@ proc scanString(scanner: Scanner): Token =
|
||||||
scanner.makeToken(tkString)
|
scanner.makeToken(tkString)
|
||||||
|
|
||||||
proc scanNumber(scanner: Scanner): Token =
|
proc scanNumber(scanner: Scanner): Token =
|
||||||
while scanner.peek() in Digits:
|
while scanner.peek().toChar() in Digits:
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
|
|
||||||
if scanner.peek() == '.' and scanner.peekNext() in Digits:
|
if scanner.peek().toChar() == '.' and scanner.peekNext().toChar() in Digits:
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
while scanner.peek() in Digits:
|
while scanner.peek().toChar() in Digits:
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
|
|
||||||
return scanner.makeToken(tkNumber)
|
return scanner.makeToken(tkNumber)
|
||||||
|
@ -127,14 +156,14 @@ const keywords = {
|
||||||
"while": tkWhile,
|
"while": tkWhile,
|
||||||
}.toTable
|
}.toTable
|
||||||
|
|
||||||
proc canStartIdent(chr: char): bool =
|
proc canStartIdent(chr: Rune): bool =
|
||||||
chr in Letters or chr in {'_'}
|
chr.isAlpha() or chr.toChar() == '_'
|
||||||
|
|
||||||
proc canContIdent(chr: char): bool =
|
proc canContIdent(chr: Rune): bool =
|
||||||
canStartIdent(chr) or chr in Digits
|
canStartIdent(chr) or chr.toChar() in Digits
|
||||||
|
|
||||||
proc scanIdentifier(scanner: Scanner): Token =
|
proc scanIdentifier(scanner: Scanner): Token =
|
||||||
while scanner.peek.canContIdent:
|
while scanner.peek().canContIdent():
|
||||||
discard scanner.advance()
|
discard scanner.advance()
|
||||||
|
|
||||||
let text = scanner.source[scanner.start..scanner.current-1]
|
let text = scanner.source[scanner.start..scanner.current-1]
|
||||||
|
@ -144,8 +173,8 @@ proc scanIdentifier(scanner: Scanner): Token =
|
||||||
|
|
||||||
return scanner.makeToken(tkIdentifier)
|
return scanner.makeToken(tkIdentifier)
|
||||||
|
|
||||||
proc canContLabel(chr: char): bool =
|
proc canContLabel(chr: Rune): bool =
|
||||||
chr in Letters or chr == '_'
|
chr.isAlpha() or chr.toChar() == '_'
|
||||||
|
|
||||||
proc scanLabel(scanner: Scanner): Token =
|
proc scanLabel(scanner: Scanner): Token =
|
||||||
if not scanner.peek.canContLabel:
|
if not scanner.peek.canContLabel:
|
||||||
|
@ -164,7 +193,8 @@ proc scanToken*(scanner: Scanner): Token =
|
||||||
if scanner.isAtEnd():
|
if scanner.isAtEnd():
|
||||||
return scanner.makeToken(tkEof)
|
return scanner.makeToken(tkEof)
|
||||||
|
|
||||||
let c = scanner.advance()
|
let rune = scanner.advance()
|
||||||
|
let c = rune.toChar()
|
||||||
|
|
||||||
case c:
|
case c:
|
||||||
of '(': return scanner.makeToken(tkLeftParen)
|
of '(': return scanner.makeToken(tkLeftParen)
|
||||||
|
@ -207,7 +237,7 @@ proc scanToken*(scanner: Scanner): Token =
|
||||||
elif scanner.peek().canContIdent(): return scanner.scanIdentifier()
|
elif scanner.peek().canContIdent(): return scanner.scanIdentifier()
|
||||||
else: return scanner.makeToken(tkColon)
|
else: return scanner.makeToken(tkColon)
|
||||||
else:
|
else:
|
||||||
if c.canStartIdent():
|
if rune.canStartIdent():
|
||||||
# ':' can start ident, but is not handled here
|
# ':' can start ident, but is not handled here
|
||||||
return scanner.scanIdentifier()
|
return scanner.scanIdentifier()
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
// nice comments
|
||||||
|
/*
|
||||||
|
a multiline comment
|
||||||
|
/*
|
||||||
|
with nested multiline comments
|
||||||
|
/*/
|
||||||
|
this doesn't break it
|
||||||
|
*/
|
||||||
|
*/
|
||||||
|
*/
|
||||||
|
|
||||||
|
// some utf8 letters in idents
|
||||||
|
|
||||||
|
var áéíóú = 5;
|
||||||
|
print (áéíóú);
|
||||||
|
//expect:5.0
|
||||||
|
|
||||||
|
{ @å
|
||||||
|
print ("before");
|
||||||
|
{
|
||||||
|
:å = "result";
|
||||||
|
break @å;
|
||||||
|
// this convolution needed because breaks detect code after them and error
|
||||||
|
};
|
||||||
|
print ("after");
|
||||||
|
} :: print;
|
||||||
|
|
||||||
|
//expect:before
|
||||||
|
//expect:result
|
|
@ -1,5 +1,6 @@
|
||||||
// testing syntactic sugars
|
// testing syntactic sugars
|
||||||
|
|
||||||
|
|
||||||
// :: piping function call
|
// :: piping function call
|
||||||
|
|
||||||
var double = funct(num) :result = num * 2;
|
var double = funct(num) :result = num * 2;
|
||||||
|
|
Loading…
Reference in New Issue