utf8 support
This commit is contained in:
parent
ece31b11e0
commit
a16026b364
|
@ -1,6 +1,7 @@
|
|||
import strutils
|
||||
import tables
|
||||
import strformat
|
||||
import unicode
|
||||
|
||||
type
|
||||
Scanner* = ref object
|
||||
|
@ -33,23 +34,28 @@ proc debugPrint*(token: Token) =
|
|||
proc isAtEnd(scanner: Scanner): bool =
|
||||
scanner.current > scanner.source.high
|
||||
|
||||
proc advance(scanner: Scanner): char =
|
||||
scanner.current.inc
|
||||
scanner.source[scanner.current - 1]
|
||||
proc advance(scanner: Scanner): Rune =
|
||||
scanner.source.fastRuneAt(scanner.current, result, doInc = true)
|
||||
|
||||
proc peek(scanner: Scanner): char =
|
||||
proc peek(scanner: Scanner): Rune =
|
||||
if scanner.isAtEnd():
|
||||
'\0'
|
||||
return "\0".runeAt(0)
|
||||
else:
|
||||
scanner.source[scanner.current]
|
||||
scanner.source.fastRuneAt(scanner.current, result, doInc = false)
|
||||
|
||||
proc peekNext(scanner: Scanner): char =
|
||||
proc peekNext(scanner: Scanner): Rune =
|
||||
if scanner.current < scanner.source.high:
|
||||
scanner.source[scanner.current + 1]
|
||||
scanner.source.fastRuneAt(scanner.current + 1, result, doInc = false)
|
||||
else:
|
||||
'\0'
|
||||
return "\0".runeAt(0)
|
||||
|
||||
proc match(scanner: Scanner, exp: char): bool =
|
||||
template `==`(l: char, r: Rune): bool =
|
||||
($l).runeAt(0) == r
|
||||
|
||||
template `==`(l: Rune, r: char): bool =
|
||||
($r).runeAt(0) == l
|
||||
|
||||
proc match(scanner: Scanner, exp: char | Rune): bool =
|
||||
if scanner.peek() == exp:
|
||||
discard scanner.advance()
|
||||
true
|
||||
|
@ -69,9 +75,18 @@ proc errorToken(scanner: Scanner, msg: string): Token =
|
|||
result.text = msg
|
||||
result.line = scanner.line
|
||||
|
||||
|
||||
|
||||
proc toChar(r: Rune): char =
|
||||
## use only for matching runes in case statements
|
||||
if r.size() > 1:
|
||||
char(255) # never match this
|
||||
else:
|
||||
($r)[0]
|
||||
|
||||
proc skipWhitespace(scanner: Scanner) =
|
||||
while true:
|
||||
let c = scanner.peek()
|
||||
let c = scanner.peek().toChar()
|
||||
case c:
|
||||
of {' ', '\r', '\t'}:
|
||||
discard scanner.advance()
|
||||
|
@ -80,15 +95,29 @@ proc skipWhitespace(scanner: Scanner) =
|
|||
discard scanner.advance()
|
||||
of '/':
|
||||
if scanner.peekNext() == '/':
|
||||
while not scanner.isAtEnd() and scanner.peek != '\n' :
|
||||
while not scanner.isAtEnd() and scanner.peek().toChar() != '\n' :
|
||||
discard scanner.advance()
|
||||
elif scanner.peekNext() == '*':
|
||||
var depth = 1
|
||||
while not scanner.isAtEnd():
|
||||
discard scanner.advance()
|
||||
if scanner.peek().toChar() == '/' and scanner.peekNext().toChar() == '*':
|
||||
depth.inc
|
||||
discard scanner.advance()
|
||||
discard scanner.advance()
|
||||
if scanner.peek().toChar() == '*' and scanner.peekNext().toChar() == '/':
|
||||
depth.dec
|
||||
discard scanner.advance()
|
||||
discard scanner.advance()
|
||||
if depth == 0:
|
||||
break
|
||||
else:
|
||||
return
|
||||
else:
|
||||
return
|
||||
|
||||
proc scanString(scanner: Scanner): Token =
|
||||
while not scanner.isAtEnd() and scanner.peek() != '\"' :
|
||||
while not scanner.isAtEnd() and scanner.peek().toChar() != '\"' :
|
||||
if scanner.peek() == '\n':
|
||||
scanner.line.inc
|
||||
discard scanner.advance()
|
||||
|
@ -100,12 +129,12 @@ proc scanString(scanner: Scanner): Token =
|
|||
scanner.makeToken(tkString)
|
||||
|
||||
proc scanNumber(scanner: Scanner): Token =
|
||||
while scanner.peek() in Digits:
|
||||
while scanner.peek().toChar() in Digits:
|
||||
discard scanner.advance()
|
||||
|
||||
if scanner.peek() == '.' and scanner.peekNext() in Digits:
|
||||
if scanner.peek().toChar() == '.' and scanner.peekNext().toChar() in Digits:
|
||||
discard scanner.advance()
|
||||
while scanner.peek() in Digits:
|
||||
while scanner.peek().toChar() in Digits:
|
||||
discard scanner.advance()
|
||||
|
||||
return scanner.makeToken(tkNumber)
|
||||
|
@ -127,14 +156,14 @@ const keywords = {
|
|||
"while": tkWhile,
|
||||
}.toTable
|
||||
|
||||
proc canStartIdent(chr: char): bool =
|
||||
chr in Letters or chr in {'_'}
|
||||
proc canStartIdent(chr: Rune): bool =
|
||||
chr.isAlpha() or chr.toChar() == '_'
|
||||
|
||||
proc canContIdent(chr: char): bool =
|
||||
canStartIdent(chr) or chr in Digits
|
||||
proc canContIdent(chr: Rune): bool =
|
||||
canStartIdent(chr) or chr.toChar() in Digits
|
||||
|
||||
proc scanIdentifier(scanner: Scanner): Token =
|
||||
while scanner.peek.canContIdent:
|
||||
while scanner.peek().canContIdent():
|
||||
discard scanner.advance()
|
||||
|
||||
let text = scanner.source[scanner.start..scanner.current-1]
|
||||
|
@ -144,8 +173,8 @@ proc scanIdentifier(scanner: Scanner): Token =
|
|||
|
||||
return scanner.makeToken(tkIdentifier)
|
||||
|
||||
proc canContLabel(chr: char): bool =
|
||||
chr in Letters or chr == '_'
|
||||
proc canContLabel(chr: Rune): bool =
|
||||
chr.isAlpha() or chr.toChar() == '_'
|
||||
|
||||
proc scanLabel(scanner: Scanner): Token =
|
||||
if not scanner.peek.canContLabel:
|
||||
|
@ -164,7 +193,8 @@ proc scanToken*(scanner: Scanner): Token =
|
|||
if scanner.isAtEnd():
|
||||
return scanner.makeToken(tkEof)
|
||||
|
||||
let c = scanner.advance()
|
||||
let rune = scanner.advance()
|
||||
let c = rune.toChar()
|
||||
|
||||
case c:
|
||||
of '(': return scanner.makeToken(tkLeftParen)
|
||||
|
@ -207,7 +237,7 @@ proc scanToken*(scanner: Scanner): Token =
|
|||
elif scanner.peek().canContIdent(): return scanner.scanIdentifier()
|
||||
else: return scanner.makeToken(tkColon)
|
||||
else:
|
||||
if c.canStartIdent():
|
||||
if rune.canStartIdent():
|
||||
# ':' can start ident, but is not handled here
|
||||
return scanner.scanIdentifier()
|
||||
else:
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
// nice comments
|
||||
/*
|
||||
a multiline comment
|
||||
/*
|
||||
with nested multiline comments
|
||||
/*/
|
||||
this doesn't break it
|
||||
*/
|
||||
*/
|
||||
*/
|
||||
|
||||
// some utf8 letters in idents
|
||||
|
||||
var áéíóú = 5;
|
||||
print (áéíóú);
|
||||
//expect:5.0
|
||||
|
||||
{ @å
|
||||
print ("before");
|
||||
{
|
||||
:å = "result";
|
||||
break @å;
|
||||
// this convolution needed because breaks detect code after them and error
|
||||
};
|
||||
print ("after");
|
||||
} :: print;
|
||||
|
||||
//expect:before
|
||||
//expect:result
|
|
@ -1,5 +1,6 @@
|
|||
// testing syntactic sugars
|
||||
|
||||
|
||||
// :: piping function call
|
||||
|
||||
var double = funct(num) :result = num * 2;
|
||||
|
|
Loading…
Reference in New Issue