From e6e1be2e000fe53de276776dbae872bb01a396ec Mon Sep 17 00:00:00 2001 From: ishanjain28 Date: Wed, 25 Dec 2019 23:07:39 +0530 Subject: [PATCH] Working on parser. It can parse return statements and log errors now --- src/lexer/mod.rs | 2 +- src/parser/ast/mod.rs | 22 +++++++- src/parser/mod.rs | 114 ++++++++++++++++++++++++++++++++---------- src/repl.rs | 2 +- 4 files changed, 109 insertions(+), 31 deletions(-) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 21910e1..a03a16c 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -18,7 +18,7 @@ lazy_static! { }; } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] +#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] pub enum TokenType { Illegal, EOF, diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index bf76dc9..5e4b314 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -3,7 +3,7 @@ use crate::{ parser::Parser, }; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Program { pub statements: Vec, } @@ -16,13 +16,15 @@ pub enum Node { #[derive(Debug, PartialEq)] pub enum Statement { Let(LetStatement), + Return(ReturnStatement), } impl<'a> Statement { pub fn parse(parser: &'a mut Parser, token: Token) -> Option { match token.name { TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)), - _ => todo!(), + TokenType::Return => Some(Statement::Return(ReturnStatement::parse(parser)?)), + _ => None, } } } @@ -57,12 +59,28 @@ impl LetStatement { // TODO: Right now, We are just skipping over all the expressions // That'll come later + // Also, Right now, It hangs forever in case there is no semicolon at the end while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {} Some(stmt) } } +#[derive(Debug, PartialEq)] +pub struct ReturnStatement { + return_value: Option, +} + +impl ReturnStatement { + fn parse(parser: &mut Parser) -> Option { + let stmt = ReturnStatement { + return_value: Some(Expression), + }; + while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {} + return Some(stmt); + } +} + // Identifier will be an expression // Identifier in a let statement like, let x = 5; where `x` is an identifier doesn't produce a value // but an identifier *can* produce value when used on rhs, e.g. let x = y; Here `y` is producing a value diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 318d8e0..fb8dd9b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4,32 +4,35 @@ use { lexer::{Lexer, Token, TokenType}, parser::ast::{Program, Statement}, }, - std::iter::Peekable, + std::{ + fmt::{Display, Error as FmtError, Formatter}, + iter::Peekable, + }, }; pub struct Parser<'a> { lexer: Peekable>, + pub errors: Vec, } impl<'a> Parser<'a> { pub fn new(lexer: Lexer<'a>) -> Self { Self { lexer: lexer.peekable(), + errors: vec![], } } - pub fn parse_program(mut self) -> Program { + pub fn parse_program(&mut self) -> Program { let mut program = Program { statements: vec![] }; - loop { - let token = self.lexer.next().unwrap(); + while let Some(token) = self.lexer.next() { if token.name == TokenType::EOF { break; } - - match Statement::parse(&mut self, token) { + match Statement::parse(self, token.clone()) { Some(v) => program.statements.push(v), - None => todo!(), // This will happen in case of a parsing error or something + None => {} // This will happen in case of a parsing error or something } } @@ -52,9 +55,32 @@ impl<'a> Parser<'a> { if self.peek_token_is(token) { self.lexer.next() } else { + let got_token = match self.lexer.peek() { + Some(v) => Some(v.name), + None => None, + }; + self.peek_error(token, got_token); None } } + + fn peek_error(&mut self, et: TokenType, gt: Option) { + let msg = match gt { + Some(v) => format!("expected next token to be {:?}, Got {:?} instead", et, v), + None => format!("expected next token to be {:?}, Got None instead", et), + }; + self.errors.push(ParserError { reason: msg }); + } +} + +pub struct ParserError { + reason: String, +} + +impl Display for ParserError { + fn fmt(&self, fmt: &mut Formatter) -> Result<(), FmtError> { + write!(fmt, "{}", self.reason) + } } #[cfg(test)] @@ -62,34 +88,68 @@ mod tests { use crate::{ lexer::{Lexer, TokenType}, parser::{ - ast::{Identifier, LetStatement, Statement}, + ast::{Identifier, LetStatement, Program, Statement}, Parser, }, }; #[test] fn let_statements() { - let lexer = Lexer::new("let x =5;let y=10; let foobar=538383;"); - let parser = Parser::new(lexer); + let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;"); + let mut parser = Parser::new(lexer); + let program = parser.parse_program(); + check_parser_errors(&parser); + assert_eq!(program.statements.len(), 3); + assert_eq!( + program, + Program { + statements: vec![ + Statement::Let(LetStatement { + name: Identifier::new(TokenType::Let, "x"), + value: None + }), + Statement::Let(LetStatement { + name: Identifier::new(TokenType::Let, "y"), + value: None + }), + Statement::Let(LetStatement { + name: Identifier::new(TokenType::Let, "foobar"), + value: None + }) + ], + } + ); + + lexer = Lexer::new("let x = 5;let x 5;let = 10; let 83838383;"); + parser = Parser::new(lexer); + let program = parser.parse_program(); + check_parser_errors(&parser); + assert_eq!(parser.errors.len(), 3); + assert_eq!(program.statements.len(), 1); + } + + #[test] + fn return_statements() { + let lexer = Lexer::new("return 5; return 10; return add(10);"); + let mut parser = Parser::new(lexer); let program = parser.parse_program(); + check_parser_errors(&parser); assert_eq!(program.statements.len(), 3); + assert_eq!(parser.errors.len(), 0); + } - assert_eq!( - program.statements, - vec![ - Statement::Let(LetStatement { - name: Identifier::new(TokenType::Let, "x"), - value: None - }), - Statement::Let(LetStatement { - name: Identifier::new(TokenType::Let, "y"), - value: None - }), - Statement::Let(LetStatement { - name: Identifier::new(TokenType::Let, "foobar"), - value: None - }) - ] - ); + fn check_parser_errors(p: &Parser) { + if p.errors.is_empty() { + return; + } else { + let mut out = String::new(); + + out.push_str(&format!("parser has {} errors\n", p.errors.len())); + + for error in &p.errors { + out.push_str(&format!("parser error: {}\n", error)); + } + eprintln!("{}", out); + } } } diff --git a/src/repl.rs b/src/repl.rs index 2a211f9..1ef6970 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -24,7 +24,7 @@ fn start(mut ip: R, mut out: W) { println!("{:?}", token); } - let parser = Parser::new(tokens); + let mut parser = Parser::new(tokens); let stmts = parser.parse_program(); println!("parser={:?}", stmts);