Added true,false,return,if,else keywords, Single character tokens, tests

and change Vec<char> to Vec<u8>
This commit is contained in:
Ishan Jain 2019-02-04 17:16:17 +05:30
parent 561c4c6089
commit f31e4bfee1
2 changed files with 212 additions and 42 deletions

View File

@ -1,15 +1,21 @@
use std::collections::HashMap;
use std::str;
lazy_static! {
static ref IDENTMAP: HashMap<&'static str, Token> = {
let mut m = HashMap::new();
m.insert("fn", Token::Function);
m.insert("let", Token::Let);
m.insert("true", Token::True);
m.insert("false", Token::False);
m.insert("return", Token::Return);
m.insert("if", Token::If);
m.insert("else", Token::Else);
m
};
}
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Illegal,
EOF,
@ -23,6 +29,9 @@ pub enum Token {
Multiply,
Divide,
Subtract,
ExclamationMark,
LessThan,
GreaterThan,
// Delimiter
Comma,
@ -34,30 +43,39 @@ pub enum Token {
// Keywords
Function,
If,
Let,
True,
Else,
False,
Return,
Ident(String),
}
#[derive(Debug)]
pub struct Lexer {
input: Vec<char>,
input: Vec<u8>,
position: usize,
read_position: usize,
ch: char,
ch: u8,
}
impl Lexer {
pub fn new(input: &str) -> Lexer {
Lexer {
input: input.chars().collect::<Vec<char>>(),
input: input.bytes().collect::<Vec<u8>>(),
position: 0,
read_position: 0,
ch: '0',
ch: 0,
}
}
fn read_char(&mut self) {
if self.read_position >= self.input.len() {
self.ch = '0';
if self.read_position == self.input.len() {
self.ch = 0;
} else if self.read_position > self.input.len() {
// 3 = ETX
self.ch = 3;
} else {
self.ch = self.input[self.read_position];
}
@ -71,8 +89,26 @@ impl Lexer {
while is_letter(self.ch) {
self.read_char();
}
self.read_position -= 1;
String::from_utf8_lossy(&self.input[pos..self.position]).to_string()
}
self.input[pos..self.position].iter().collect::<String>()
fn skip_whitespace(&mut self) {
while self.ch == b' ' || self.ch == b'\t' || self.ch == b'\n' || self.ch == b'\r' {
self.read_char();
}
}
// use i64 for all numbers for now.
fn read_number(&mut self) -> i64 {
let pos = self.position;
while self.ch.is_ascii_digit() {
self.read_char();
}
self.read_position -= 1;
String::from_utf8_lossy(&self.input[pos..self.position])
.parse::<i64>()
.unwrap()
}
}
@ -81,25 +117,47 @@ impl Iterator for Lexer {
fn next(&mut self) -> Option<Self::Item> {
self.read_char();
self.skip_whitespace();
match self.ch {
'=' => Some(Token::Assign),
'+' => Some(Token::Plus),
'*' => Some(Token::Multiply),
'/' => Some(Token::Divide),
'-' => Some(Token::Subtract),
',' => Some(Token::Comma),
';' => Some(Token::Semicolon),
'(' => Some(Token::LParen),
')' => Some(Token::RParen),
'[' => Some(Token::LBrace),
']' => Some(Token::RBrace),
'0' => Some(Token::EOF),
_ => None,
}
let v = match self.ch {
b'=' => Some(Token::Assign),
b'+' => Some(Token::Plus),
b'*' => Some(Token::Multiply),
b'/' => Some(Token::Divide),
b'-' => Some(Token::Subtract),
b',' => Some(Token::Comma),
b';' => Some(Token::Semicolon),
b'(' => Some(Token::LParen),
b')' => Some(Token::RParen),
b'{' => Some(Token::LBrace),
b'}' => Some(Token::RBrace),
b'!' => Some(Token::ExclamationMark),
b'>' => Some(Token::GreaterThan),
b'<' => Some(Token::LessThan),
0 => Some(Token::EOF),
//ETX-> End of text. It's the value of self.ch after all the text is parsed.
3 => None,
_ if is_letter(self.ch) => {
let ident = self.read_identifier();
Some(lookup_ident(&ident))
}
_ if self.ch.is_ascii_digit() => {
let number = self.read_number();
Some(Token::Int(number))
}
_ => Some(Token::Illegal),
};
v
}
}
fn is_letter(c: char) -> bool {
c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_'
fn is_letter(c: u8) -> bool {
c.is_ascii_alphabetic() || c == b'_'
}
fn lookup_ident(ident: &str) -> Token {
match IDENTMAP.get(ident) {
Some(v) => v.clone(),
None => Token::Ident(ident.to_string()),
}
}

View File

@ -8,28 +8,140 @@ fn main() {}
#[cfg(test)]
mod tests {
use lexer::{Lexer, Token};
use std::collections::HashMap;
#[test]
fn new_token() {
let input = "=+()[],;";
let expected = vec![
Token::Assign,
Token::Plus,
Token::LParen,
Token::RParen,
Token::LBrace,
Token::RBrace,
Token::Comma,
Token::Semicolon,
Token::EOF,
];
let mut tests = HashMap::new();
let tokenized_output = Lexer::new(input).collect::<Vec<Token>>();
tests.insert(
"=+(){},;",
vec![
Token::Assign,
Token::Plus,
Token::LParen,
Token::RParen,
Token::LBrace,
Token::RBrace,
Token::Comma,
Token::Semicolon,
Token::EOF,
],
);
tests.insert(
"let five = 5;
let ten = 10;
assert_eq!(expected.len(), tokenized_output.len());
println!("{:?}", tokenized_output);
let add = fn(x, y) {
x + y;
};
for (exp, actual) in expected.into_iter().zip(tokenized_output) {
assert_eq!(actual, exp);
let result = add(five, ten);",
vec![
Token::Let,
Token::Ident("five".to_string()),
Token::Assign,
Token::Int(5),
Token::Semicolon,
Token::Let,
Token::Ident("ten".to_string()),
Token::Assign,
Token::Int(10),
Token::Semicolon,
Token::Let,
Token::Ident("add".to_string()),
Token::Assign,
Token::Function,
Token::LParen,
Token::Ident("x".to_string()),
Token::Comma,
Token::Ident("y".to_string()),
Token::RParen,
Token::LBrace,
Token::Ident("x".to_string()),
Token::Plus,
Token::Ident("y".to_string()),
Token::Semicolon,
Token::RBrace,
Token::Semicolon,
Token::Let,
Token::Ident("result".to_string()),
Token::Assign,
Token::Ident("add".to_string()),
Token::LParen,
Token::Ident("five".to_string()),
Token::Comma,
Token::Ident("ten".to_string()),
Token::RParen,
Token::Semicolon,
Token::EOF,
],
);
tests.insert(
"let result = add(five, ten);
!-/*5;
5 < 10 > 5;
if(5 < 10) {
return true;
} else {
return false;
}
",
vec![
Token::Let,
Token::Ident("result".to_string()),
Token::Assign,
Token::Ident("add".to_string()),
Token::LParen,
Token::Ident("five".to_string()),
Token::Comma,
Token::Ident("ten".to_string()),
Token::RParen,
Token::Semicolon,
Token::ExclamationMark,
Token::Subtract,
Token::Divide,
Token::Multiply,
Token::Int(5),
Token::Semicolon,
Token::Int(5),
Token::LessThan,
Token::Int(10),
Token::GreaterThan,
Token::Int(5),
Token::Semicolon,
Token::If,
Token::LParen,
Token::Int(5),
Token::LessThan,
Token::Int(10),
Token::RParen,
Token::LBrace,
Token::Return,
Token::True,
Token::Semicolon,
Token::RBrace,
Token::Else,
Token::LBrace,
Token::Return,
Token::False,
Token::Semicolon,
Token::RBrace,
Token::EOF,
],
);
for (k, v) in tests {
let tokenized_output = Lexer::new(k).collect::<Vec<Token>>();
// assert_eq!(v.len(), tokenized_output.len());
for (exp, actual) in v.into_iter().zip(tokenized_output) {
if actual != exp {
println!("Expect: {:?}, Actual: {:?}", actual, exp);
}
assert_eq!(actual, exp);
}
}
}
}