Added true,false,return,if,else keywords, Single character tokens, tests

and change Vec<char> to Vec<u8>
2019-02-04 17:16:17 +05:30 · 2019-02-04 17:16:17 +05:30 · f31e4bfee1
commit f31e4bfee1
parent 561c4c6089
2 changed files with 212 additions and 42 deletions
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@ -1,15 +1,21 @@
 use std::collections::HashMap;
 use std::str;
 lazy_static! {
    static ref IDENTMAP: HashMap<&'static str, Token> = {
        let mut m = HashMap::new();
        m.insert("fn", Token::Function);
        m.insert("let", Token::Let);
        m.insert("true", Token::True);
        m.insert("false", Token::False);
        m.insert("return", Token::Return);
        m.insert("if", Token::If);
        m.insert("else", Token::Else);
        m
    };
 }
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Clone)]
 pub enum Token {
    Illegal,
    EOF,
@ -23,6 +29,9 @@ pub enum Token {
    Multiply,
    Divide,
    Subtract,
    ExclamationMark,
    LessThan,
    GreaterThan,
    // Delimiter
    Comma,
@ -34,30 +43,39 @@ pub enum Token {
    // Keywords
    Function,
    If,
    Let,
    True,
    Else,
    False,
    Return,
    Ident(String),
 }
 #[derive(Debug)]
 pub struct Lexer {
-    input: Vec<char>,
+    input: Vec<u8>,
    position: usize,
    read_position: usize,
-    ch: char,
+    ch: u8,
 }
 impl Lexer {
    pub fn new(input: &str) -> Lexer {
        Lexer {
-            input: input.chars().collect::<Vec<char>>(),
+            input: input.bytes().collect::<Vec<u8>>(),
            position: 0,
            read_position: 0,
-            ch: '0',
+            ch: 0,
        }
    }
    fn read_char(&mut self) {
-        if self.read_position >= self.input.len() {
+        if self.read_position == self.input.len() {
-            self.ch = '0';
+            self.ch = 0;
        } else if self.read_position > self.input.len() {
            // 3 = ETX
            self.ch = 3;
        } else {
            self.ch = self.input[self.read_position];
        }
@ -71,8 +89,26 @@ impl Lexer {
        while is_letter(self.ch) {
            self.read_char();
        }
        self.read_position -= 1;
        String::from_utf8_lossy(&self.input[pos..self.position]).to_string()
    }
-        self.input[pos..self.position].iter().collect::<String>()
+    fn skip_whitespace(&mut self) {
        while self.ch == b' ' || self.ch == b'\t' || self.ch == b'\n' || self.ch == b'\r' {
            self.read_char();
        }
    }
    // use i64 for all numbers for now.
    fn read_number(&mut self) -> i64 {
        let pos = self.position;
        while self.ch.is_ascii_digit() {
            self.read_char();
        }
        self.read_position -= 1;
        String::from_utf8_lossy(&self.input[pos..self.position])
            .parse::<i64>()
            .unwrap()
    }
 }
@ -81,25 +117,47 @@ impl Iterator for Lexer {
    fn next(&mut self) -> Option<Self::Item> {
        self.read_char();
        self.skip_whitespace();
-        match self.ch {
+        let v = match self.ch {
-            '=' => Some(Token::Assign),
+            b'=' => Some(Token::Assign),
-            '+' => Some(Token::Plus),
+            b'+' => Some(Token::Plus),
-            '*' => Some(Token::Multiply),
+            b'*' => Some(Token::Multiply),
-            '/' => Some(Token::Divide),
+            b'/' => Some(Token::Divide),
-            '-' => Some(Token::Subtract),
+            b'-' => Some(Token::Subtract),
-            ',' => Some(Token::Comma),
+            b',' => Some(Token::Comma),
-            ';' => Some(Token::Semicolon),
+            b';' => Some(Token::Semicolon),
-            '(' => Some(Token::LParen),
+            b'(' => Some(Token::LParen),
-            ')' => Some(Token::RParen),
+            b')' => Some(Token::RParen),
-            '[' => Some(Token::LBrace),
+            b'{' => Some(Token::LBrace),
-            ']' => Some(Token::RBrace),
+            b'}' => Some(Token::RBrace),
-            '0' => Some(Token::EOF),
+            b'!' => Some(Token::ExclamationMark),
-            _ => None,
+            b'>' => Some(Token::GreaterThan),
            b'<' => Some(Token::LessThan),
            0 => Some(Token::EOF),
            //ETX-> End of text. It's the value of self.ch after all the text is parsed.
            3 => None,
            _ if is_letter(self.ch) => {
                let ident = self.read_identifier();
                Some(lookup_ident(&ident))
            }
            _ if self.ch.is_ascii_digit() => {
                let number = self.read_number();
                Some(Token::Int(number))
            }
            _ => Some(Token::Illegal),
        };
        v
    }
 }
-fn is_letter(c: char) -> bool {
+fn is_letter(c: u8) -> bool {
-    c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_'
+    c.is_ascii_alphabetic() || c == b'_'
 }
 fn lookup_ident(ident: &str) -> Token {
    match IDENTMAP.get(ident) {
        Some(v) => v.clone(),
        None => Token::Ident(ident.to_string()),
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -8,10 +8,15 @@ fn main() {}
 #[cfg(test)]
 mod tests {
    use lexer::{Lexer, Token};
    use std::collections::HashMap;
    #[test]
    fn new_token() {
-        let input = "=+()[],;";
+        let mut tests = HashMap::new();
-        let expected = vec![
+
        tests.insert(
            "=+(){},;",
            vec![
                Token::Assign,
                Token::Plus,
                Token::LParen,
@ -21,15 +26,122 @@ mod tests {
                Token::Comma,
                Token::Semicolon,
                Token::EOF,
-        ];
+            ],
        );
        tests.insert(
            "let five = 5;
            let ten = 10;
-        let tokenized_output = Lexer::new(input).collect::<Vec<Token>>();
+        let add = fn(x, y) {
            x + y;
        };
-        assert_eq!(expected.len(), tokenized_output.len());
+        let result = add(five, ten);",
-        println!("{:?}", tokenized_output);
+            vec![
                Token::Let,
                Token::Ident("five".to_string()),
                Token::Assign,
                Token::Int(5),
                Token::Semicolon,
                Token::Let,
                Token::Ident("ten".to_string()),
                Token::Assign,
                Token::Int(10),
                Token::Semicolon,
                Token::Let,
                Token::Ident("add".to_string()),
                Token::Assign,
                Token::Function,
                Token::LParen,
                Token::Ident("x".to_string()),
                Token::Comma,
                Token::Ident("y".to_string()),
                Token::RParen,
                Token::LBrace,
                Token::Ident("x".to_string()),
                Token::Plus,
                Token::Ident("y".to_string()),
                Token::Semicolon,
                Token::RBrace,
                Token::Semicolon,
                Token::Let,
                Token::Ident("result".to_string()),
                Token::Assign,
                Token::Ident("add".to_string()),
                Token::LParen,
                Token::Ident("five".to_string()),
                Token::Comma,
                Token::Ident("ten".to_string()),
                Token::RParen,
                Token::Semicolon,
                Token::EOF,
            ],
        );
        tests.insert(
            "let result = add(five, ten);
        !-/*5;
        5 < 10 > 5;
-        for (exp, actual) in expected.into_iter().zip(tokenized_output) {
+        if(5 < 10) {
            return true;
        } else {
            return false;
        }
        ",
            vec![
                Token::Let,
                Token::Ident("result".to_string()),
                Token::Assign,
                Token::Ident("add".to_string()),
                Token::LParen,
                Token::Ident("five".to_string()),
                Token::Comma,
                Token::Ident("ten".to_string()),
                Token::RParen,
                Token::Semicolon,
                Token::ExclamationMark,
                Token::Subtract,
                Token::Divide,
                Token::Multiply,
                Token::Int(5),
                Token::Semicolon,
                Token::Int(5),
                Token::LessThan,
                Token::Int(10),
                Token::GreaterThan,
                Token::Int(5),
                Token::Semicolon,
                Token::If,
                Token::LParen,
                Token::Int(5),
                Token::LessThan,
                Token::Int(10),
                Token::RParen,
                Token::LBrace,
                Token::Return,
                Token::True,
                Token::Semicolon,
                Token::RBrace,
                Token::Else,
                Token::LBrace,
                Token::Return,
                Token::False,
                Token::Semicolon,
                Token::RBrace,
                Token::EOF,
            ],
        );
        for (k, v) in tests {
            let tokenized_output = Lexer::new(k).collect::<Vec<Token>>();
            //            assert_eq!(v.len(), tokenized_output.len());
            for (exp, actual) in v.into_iter().zip(tokenized_output) {
                if actual != exp {
                    println!("Expect: {:?}, Actual: {:?}", actual, exp);
                }
                assert_eq!(actual, exp);
            }
        }
    }
 }