From 7409606f3d8f66e1959c9fcc4ce501c1d39f423c Mon Sep 17 00:00:00 2001 From: ishanjain28 Date: Sun, 14 Apr 2019 16:45:40 +0530 Subject: [PATCH] Committing and saving work done on the parser --- Cargo.lock | 2 + src/lexer/mod.rs | 170 ++++++++++++++++++++++++++++++++++-- src/main.rs | 153 +------------------------------- src/parser/ast/mod.rs | 10 +++ src/parser/mod.rs | 50 +++++++++++ src/parser/statement/mod.rs | 21 +++++ src/repl/mod.rs | 6 +- 7 files changed, 249 insertions(+), 163 deletions(-) create mode 100644 src/parser/ast/mod.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/statement/mod.rs diff --git a/Cargo.lock b/Cargo.lock index b3f0ee0..41d5784 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,3 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. [[package]] name = "interpreter" version = "0.1.0" diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 44a583d..2d83b7c 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -55,7 +55,7 @@ pub enum Token { Ident(String), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Lexer<'a> { input: Peekable>, eof_sent: bool, @@ -85,7 +85,7 @@ impl<'a> Lexer<'a> { fn peek_is_letter(&mut self) -> bool { match self.input.peek() { - Some(v) => is_letter(v), + Some(v) => is_letter(*v), None => false, } } @@ -128,7 +128,7 @@ impl<'a> Iterator for Lexer<'a> { self.skip_whitespace(); let ch = self.read_char(); - let v = match ch { + match ch { Some('=') => { let is_e = match self.input.peek() { Some(v) if *v == '=' => true, @@ -165,11 +165,11 @@ impl<'a> Iterator for Lexer<'a> { } Some('>') => Some(Token::GreaterThan), Some('<') => Some(Token::LessThan), - Some(ch @ _) if is_letter(&ch) => { + Some(ch) if is_letter(ch) => { let ident = self.read_identifier(ch); Some(lookup_ident(&ident)) } - Some(ch @ _) if ch.is_ascii_digit() => { + Some(ch) if ch.is_ascii_digit() => { let number = self.read_number(ch); Some(Token::Int(number)) } @@ -179,13 +179,12 @@ impl<'a> Iterator for Lexer<'a> { } None => None, _ => Some(Token::Illegal), - }; - v + } } } -fn is_letter(c: &char) -> bool { - c.is_ascii_alphabetic() || *c == '_' +fn is_letter(c: char) -> bool { + c.is_ascii_alphabetic() || c == '_' } fn lookup_ident(ident: &str) -> Token { @@ -194,3 +193,156 @@ fn lookup_ident(ident: &str) -> Token { None => Token::Ident(ident.to_string()), } } + +#[cfg(test)] +mod tests { + use super::{Lexer, Token}; + use std::collections::HashMap; + + #[test] + fn new_token() { + let mut tests = HashMap::new(); + + tests.insert( + "=+(){},;", + vec![ + Token::Assign, + Token::Plus, + Token::LParen, + Token::RParen, + Token::LBrace, + Token::RBrace, + Token::Comma, + Token::Semicolon, + Token::EOF, + ], + ); + tests.insert( + "let five = 5; + let ten = 10; + + let add = fn(x, y) { + x + y; + }; + + let result = add(five, ten);", + vec![ + Token::Let, + Token::Ident("five".to_string()), + Token::Assign, + Token::Int(5), + Token::Semicolon, + Token::Let, + Token::Ident("ten".to_string()), + Token::Assign, + Token::Int(10), + Token::Semicolon, + Token::Let, + Token::Ident("add".to_string()), + Token::Assign, + Token::Function, + Token::LParen, + Token::Ident("x".to_string()), + Token::Comma, + Token::Ident("y".to_string()), + Token::RParen, + Token::LBrace, + Token::Ident("x".to_string()), + Token::Plus, + Token::Ident("y".to_string()), + Token::Semicolon, + Token::RBrace, + Token::Semicolon, + Token::Let, + Token::Ident("result".to_string()), + Token::Assign, + Token::Ident("add".to_string()), + Token::LParen, + Token::Ident("five".to_string()), + Token::Comma, + Token::Ident("ten".to_string()), + Token::RParen, + Token::Semicolon, + Token::EOF, + ], + ); + tests.insert( + "let result = add(five, ten); + !-/*5; + 5 < 10 > 5; + + if(5 < 10) { + return true; + } else { + return false; + } + + 10 == 10; + 9 != 10; + + ", + vec![ + Token::Let, + Token::Ident("result".to_string()), + Token::Assign, + Token::Ident("add".to_string()), + Token::LParen, + Token::Ident("five".to_string()), + Token::Comma, + Token::Ident("ten".to_string()), + Token::RParen, + Token::Semicolon, + Token::ExclamationMark, + Token::Subtract, + Token::Divide, + Token::Multiply, + Token::Int(5), + Token::Semicolon, + Token::Int(5), + Token::LessThan, + Token::Int(10), + Token::GreaterThan, + Token::Int(5), + Token::Semicolon, + Token::If, + Token::LParen, + Token::Int(5), + Token::LessThan, + Token::Int(10), + Token::RParen, + Token::LBrace, + Token::Return, + Token::True, + Token::Semicolon, + Token::RBrace, + Token::Else, + Token::LBrace, + Token::Return, + Token::False, + Token::Semicolon, + Token::RBrace, + Token::Int(10), + Token::Equals, + Token::Int(10), + Token::Semicolon, + Token::Int(9), + Token::NotEquals, + Token::Int(10), + Token::Semicolon, + Token::EOF, + ], + ); + + for (k, v) in tests { + let tokenized_output = Lexer::new(k).collect::>(); + assert_eq!(v.len(), tokenized_output.len()); + + for (exp, actual) in v.into_iter().zip(tokenized_output) { + if actual != exp { + println!("Expect: {:?}, Actual: {:?}", exp, actual); + } + assert_eq!(actual, exp); + } + } + } +} diff --git a/src/main.rs b/src/main.rs index b0c3820..33a6520 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,161 +2,10 @@ extern crate lazy_static; mod lexer; +mod parser; mod repl; fn main() { repl::init(); } -#[cfg(test)] -mod tests { - use lexer::{Lexer, Token}; - use std::collections::HashMap; - - #[test] - fn new_token() { - let mut tests = HashMap::new(); - - tests.insert( - "=+(){},;", - vec![ - Token::Assign, - Token::Plus, - Token::LParen, - Token::RParen, - Token::LBrace, - Token::RBrace, - Token::Comma, - Token::Semicolon, - Token::EOF, - ], - ); - tests.insert( - "let five = 5; - let ten = 10; - - let add = fn(x, y) { - x + y; - }; - - let result = add(five, ten);", - vec![ - Token::Let, - Token::Ident("five".to_string()), - Token::Assign, - Token::Int(5), - Token::Semicolon, - Token::Let, - Token::Ident("ten".to_string()), - Token::Assign, - Token::Int(10), - Token::Semicolon, - Token::Let, - Token::Ident("add".to_string()), - Token::Assign, - Token::Function, - Token::LParen, - Token::Ident("x".to_string()), - Token::Comma, - Token::Ident("y".to_string()), - Token::RParen, - Token::LBrace, - Token::Ident("x".to_string()), - Token::Plus, - Token::Ident("y".to_string()), - Token::Semicolon, - Token::RBrace, - Token::Semicolon, - Token::Let, - Token::Ident("result".to_string()), - Token::Assign, - Token::Ident("add".to_string()), - Token::LParen, - Token::Ident("five".to_string()), - Token::Comma, - Token::Ident("ten".to_string()), - Token::RParen, - Token::Semicolon, - Token::EOF, - ], - ); - tests.insert( - "let result = add(five, ten); - !-/*5; - 5 < 10 > 5; - - if(5 < 10) { - return true; - } else { - return false; - } - - 10 == 10; - 9 != 10; - - ", - vec![ - Token::Let, - Token::Ident("result".to_string()), - Token::Assign, - Token::Ident("add".to_string()), - Token::LParen, - Token::Ident("five".to_string()), - Token::Comma, - Token::Ident("ten".to_string()), - Token::RParen, - Token::Semicolon, - Token::ExclamationMark, - Token::Subtract, - Token::Divide, - Token::Multiply, - Token::Int(5), - Token::Semicolon, - Token::Int(5), - Token::LessThan, - Token::Int(10), - Token::GreaterThan, - Token::Int(5), - Token::Semicolon, - Token::If, - Token::LParen, - Token::Int(5), - Token::LessThan, - Token::Int(10), - Token::RParen, - Token::LBrace, - Token::Return, - Token::True, - Token::Semicolon, - Token::RBrace, - Token::Else, - Token::LBrace, - Token::Return, - Token::False, - Token::Semicolon, - Token::RBrace, - Token::Int(10), - Token::Equals, - Token::Int(10), - Token::Semicolon, - Token::Int(9), - Token::NotEquals, - Token::Int(10), - Token::Semicolon, - Token::EOF, - ], - ); - - for (k, v) in tests { - let tokenized_output = Lexer::new(k).collect::>(); - assert_eq!(v.len(), tokenized_output.len()); - - for (exp, actual) in v.into_iter().zip(tokenized_output) { - if actual != exp { - println!("Expect: {:?}, Actual: {:?}", exp, actual); - } - assert_eq!(actual, exp); - } - } - } -} diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs new file mode 100644 index 0000000..7eef0fe --- /dev/null +++ b/src/parser/ast/mod.rs @@ -0,0 +1,10 @@ +use crate::lexer::Lexer; + +#[derive(Debug)] +pub struct LetStatement { + name: String, +} + +impl LetStatement { + pub fn parse(lexer: &mut Lexer) -> Self {} +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..5223ed5 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,50 @@ +mod ast; +mod statement; + +use self::statement::Statement; +use crate::lexer::{Lexer, Token}; + +pub struct Parser<'a> { + lexer: Lexer<'a>, +} + +impl<'a> Parser<'a> { + pub fn new(lexer: Lexer<'a>) -> Parser<'a> { + return Parser { lexer }; + } +} + +impl<'a> Iterator for Parser<'a> { + type Item = Statement; + + fn next(&mut self) -> Option { + match self.lexer.next() { + Some(Token::Let) => Statement::Let.parse(&mut self.lexer), + _ => None, + None => None, + } + } +} + +#[cfg(test)] +mod tests { + use crate::lexer::Lexer; + use crate::parser::Parser; + + #[test] + fn let_statements() { + let ip = " + let yr = 5; + let qq = 10; + let foobar = 8388383; + "; + + let lexer = Lexer::new(ip); + + let stmts = Parser::new(lexer); + + for stmt in stmts { + println!("{:?}", stmt); + } + } +} diff --git a/src/parser/statement/mod.rs b/src/parser/statement/mod.rs new file mode 100644 index 0000000..8e0adfc --- /dev/null +++ b/src/parser/statement/mod.rs @@ -0,0 +1,21 @@ +use super::ast; +use crate::lexer::Lexer; + +#[derive(Debug)] +pub enum Statement { + Let, +} + +impl Statement { + pub fn token_literal(&self) -> String { + match self { + Let => "let".to_owned(), + } + } + + pub fn parse(&self, lexer: &mut Lexer) -> ast::Statement { + match self { + Let => Statement::Let(ast::LetStatement::parse(lexer)), + } + } +} diff --git a/src/repl/mod.rs b/src/repl/mod.rs index ae1b975..0e75a14 100644 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -1,4 +1,4 @@ -use crate::lexer::Lexer; +use crate::{lexer::Lexer, parser::Parser}; use std::io::{self, BufRead, Write}; const PROMPT: &'static str = ">> "; @@ -20,8 +20,10 @@ fn start(mut ip: R, mut out: W) { ip.read_line(&mut s).unwrap(); let tokens = Lexer::new(&s); - for token in tokens { + for token in tokens.clone() { println!("{:?}", token); } + + let parser = Parser::new(tokens); } }