diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..ee41258 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly-2020-01-08 diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index a03a16c..9a0030e 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,5 +1,6 @@ use std::{ collections::HashMap, + convert::Into, iter::Peekable, str::{self, Chars}, }; @@ -28,7 +29,7 @@ pub enum TokenType { // Ident is basically most things that are not covered // by other variants of this enum. Ident, - + Int, // Operators Assign, Plus, @@ -52,13 +53,44 @@ pub enum TokenType { // Keywords Function, If, + Else, Let, True, - Else, False, Return, } +impl Into<&'static str> for TokenType { + fn into(self) -> &'static str { + match self { + TokenType::Assign => "=", + TokenType::Plus => "+", + TokenType::Multiply => "*", + TokenType::Divide => "/", + TokenType::Subtract => "-", + TokenType::ExclamationMark => "!", + TokenType::LessThan => "<=", + TokenType::GreaterThan => ">=", + TokenType::Equals => "==", + TokenType::NotEquals => "!=", + TokenType::Comma => ",", + TokenType::Semicolon => ";", + TokenType::LParen => "(", + TokenType::RParen => ")", + TokenType::LBrace => "{", + TokenType::RBrace => "}", + TokenType::Function => "fn", + TokenType::If => "if", + TokenType::Else => "else", + TokenType::Let => "let", + TokenType::True => "true", + TokenType::False => "false", + TokenType::Return => "return", + _ => unreachable!(), + } + } +} + #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub struct Token { pub name: TokenType, @@ -81,6 +113,10 @@ impl Token { literal: Some(value.to_string()), } } + + pub fn to_string(&self) -> &'static str { + self.name.into() + } } #[derive(Debug, Clone)] @@ -200,7 +236,7 @@ impl<'a> Iterator for Lexer<'a> { } Some(ch) if ch.is_ascii_digit() => { let number = self.read_number(ch); - Some(Token::with_value(TokenType::Ident, &number)) + Some(Token::with_value(TokenType::Int, &number)) } None if !self.eof_sent => { self.eof_sent = true; @@ -261,12 +297,12 @@ mod tests { Token::new(TokenType::Let), Token::with_value(TokenType::Ident, "five"), Token::new(TokenType::Assign), - Token::with_value(TokenType::Ident, "5"), + Token::with_value(TokenType::Int, "5"), Token::new(TokenType::Semicolon), Token::new(TokenType::Let), Token::with_value(TokenType::Ident, "ten"), Token::new(TokenType::Assign), - Token::with_value(TokenType::Ident, "10"), + Token::with_value(TokenType::Int, "10"), Token::new(TokenType::Semicolon), Token::new(TokenType::Let), Token::with_value(TokenType::Ident, "add"), @@ -303,16 +339,16 @@ mod tests { "let result = add(five, ten); !-/*5; 5 < 10 > 5; - + if(5 < 10) { return true; } else { return false; } - + 10 == 10; 9 != 10; - + " ) .collect::>(), @@ -331,19 +367,19 @@ mod tests { Token::new(TokenType::Subtract), Token::new(TokenType::Divide), Token::new(TokenType::Multiply), - Token::with_value(TokenType::Ident, "5"), + Token::with_value(TokenType::Int, "5"), Token::new(TokenType::Semicolon), - Token::with_value(TokenType::Ident, "5"), + Token::with_value(TokenType::Int, "5"), Token::new(TokenType::LessThan), - Token::with_value(TokenType::Ident, "10"), + Token::with_value(TokenType::Int, "10"), Token::new(TokenType::GreaterThan), - Token::with_value(TokenType::Ident, "5"), + Token::with_value(TokenType::Int, "5"), Token::new(TokenType::Semicolon), Token::new(TokenType::If), Token::new(TokenType::LParen), - Token::with_value(TokenType::Ident, "5"), + Token::with_value(TokenType::Int, "5"), Token::new(TokenType::LessThan), - Token::with_value(TokenType::Ident, "10"), + Token::with_value(TokenType::Int, "10"), Token::new(TokenType::RParen), Token::new(TokenType::LBrace), Token::new(TokenType::Return), @@ -356,13 +392,13 @@ mod tests { Token::new(TokenType::False), Token::new(TokenType::Semicolon), Token::new(TokenType::RBrace), - Token::with_value(TokenType::Ident, "10"), + Token::with_value(TokenType::Int, "10"), Token::new(TokenType::Equals), - Token::with_value(TokenType::Ident, "10"), + Token::with_value(TokenType::Int, "10"), Token::new(TokenType::Semicolon), - Token::with_value(TokenType::Ident, "9"), + Token::with_value(TokenType::Int, "9"), Token::new(TokenType::NotEquals), - Token::with_value(TokenType::Ident, "10"), + Token::with_value(TokenType::Int, "10"), Token::new(TokenType::Semicolon), Token::new(TokenType::EOF), ], diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 5e4b314..4997bf9 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1,6 +1,10 @@ -use crate::{ - lexer::{Token, TokenType}, - parser::Parser, +// TODO: Maybe implement String method to pretty print all AST nodes +use { + crate::{ + lexer::{Token, TokenType}, + parser::Parser, + }, + std::convert::From, }; #[derive(Debug, PartialEq)] @@ -8,6 +12,18 @@ pub struct Program { pub statements: Vec, } +impl ToString for Program { + fn to_string(&self) -> String { + let mut out = String::new(); + + for statement in &self.statements { + out.push_str(&statement.to_string()); + out.push('\n'); + } + out + } +} + pub enum Node { Statement(Statement), Expression(Expression), @@ -17,6 +33,7 @@ pub enum Node { pub enum Statement { Let(LetStatement), Return(ReturnStatement), + ExpressionStatement(ExpressionStatement), } impl<'a> Statement { @@ -24,14 +41,22 @@ impl<'a> Statement { match token.name { TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)), TokenType::Return => Some(Statement::Return(ReturnStatement::parse(parser)?)), - _ => None, + _ => Some(Statement::ExpressionStatement(ExpressionStatement::parse( + parser, token, + )?)), } } } -// TODO: Expressions are not going to be a struct so using this here just as a placeholder -#[derive(Debug, PartialEq)] -pub struct Expression; +impl ToString for Statement { + fn to_string(&self) -> String { + match self { + Statement::Let(v) => v.to_string(), + Statement::Return(v) => v.to_string(), + Statement::ExpressionStatement(v) => v.to_string(), + } + } +} #[derive(Debug, PartialEq)] pub struct LetStatement { @@ -49,11 +74,8 @@ impl LetStatement { value: None, }; - if let Some(v) = parser.expect_peek(TokenType::Ident) { - stmt.name.value = v.literal?; - } else { - return None; - } + let ident = parser.expect_peek(TokenType::Ident)?; + stmt.name.value = ident.literal?; parser.expect_peek(TokenType::Assign)?; @@ -64,6 +86,23 @@ impl LetStatement { Some(stmt) } + + const fn token_literal() -> &'static str { + "let" + } +} + +impl ToString for LetStatement { + fn to_string(&self) -> String { + let mut out = format!("{} {} = ", Self::token_literal(), self.name.value); + + if let Some(v) = &self.value { + let a: String = v.into(); + out.push_str(&a); + } + out.push(';'); + out + } } #[derive(Debug, PartialEq)] @@ -74,11 +113,105 @@ pub struct ReturnStatement { impl ReturnStatement { fn parse(parser: &mut Parser) -> Option { let stmt = ReturnStatement { - return_value: Some(Expression), + return_value: Some(Expression::None), }; while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {} return Some(stmt); } + + // TODO: REMOVE THIS! + const fn token_literal() -> &'static str { + "return" + } +} + +impl ToString for ReturnStatement { + fn to_string(&self) -> String { + let mut out = String::from(Self::token_literal()); + + if let Some(v) = &self.return_value { + out.push(' '); + let a: String = v.into(); + out.push_str(&a); + } + out.push(';'); + out + } +} + +#[derive(Debug, PartialEq)] +pub struct ExpressionStatement { + pub token: Token, + pub expression: Expression, +} + +impl ExpressionStatement { + fn parse(parser: &mut Parser, current_token: Token) -> Option { + // let expr = Expression::parse(parser, token.clone(), ExpressionPriority::Lowest)?; + let stmt = ExpressionStatement { + token: current_token.clone(), + expression: Expression::parse(parser, current_token, ExpressionPriority::Lowest)?, + }; + if parser.peek_token_is(TokenType::Semicolon) { + parser.lexer.next(); + } + Some(stmt) + } +} + +impl ToString for ExpressionStatement { + fn to_string(&self) -> String { + self.expression.to_string() + } +} + +#[derive(Debug, PartialEq)] +enum ExpressionPriority { + Lowest = 0, + Equals = 1, + LessGreater = 2, + Sum = 3, + Product = 4, + Prefix = 5, + Call = 6, +} + +// TODO: Expressions are not going to be a struct so using this here just as a placeholder + +#[derive(Debug, PartialEq)] +pub enum Expression { + Identifier(Identifier), + // TODO: Temporary placeholder value. Should be removed once this section is done + None, +} + +impl Expression { + fn parse(parser: &mut Parser, token: Token, precedence: ExpressionPriority) -> Option { + match token.name { + TokenType::Ident => Self::parse_identifier(parser, token), + _ => None, + } + } + + fn parse_identifier(parser: &mut Parser, token: Token) -> Option { + Some(Self::Identifier(Identifier::new( + token.name, + &token.literal?, + ))) + } + + fn to_string(&self) -> String { + match self { + Expression::Identifier(v) => v.to_string(), + Expression::None => "None".into(), + } + } +} + +impl From<&Expression> for String { + fn from(expr: &Expression) -> String { + expr.to_string() + } } // Identifier will be an expression @@ -97,4 +230,45 @@ impl Identifier { value: v.to_string(), } } + + pub fn to_string(&self) -> String { + self.value.clone() + } +} + +#[cfg(test)] +mod tests { + use crate::{ + lexer::{Token, TokenType}, + parser::{ + ast::{Expression, Identifier, LetStatement, ReturnStatement, Statement}, + Program, + }, + }; + + #[test] + fn test_string() { + let program = Program { + statements: vec![ + Statement::Let(LetStatement { + name: Identifier::new(TokenType::Ident, "myVar"), + value: Some(Expression::Identifier(Identifier::new( + TokenType::Ident, + "anotherVar", + ))), + }), + Statement::Return(ReturnStatement { + return_value: Some(Expression::Identifier(Identifier::new( + TokenType::Int, + "5", + ))), + }), + Statement::Return(ReturnStatement { return_value: None }), + ], + }; + assert_eq!( + program.to_string(), + "let myVar = anotherVar;\nreturn 5;\nreturn;\n" + ); + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fb8dd9b..102eade 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12,7 +12,7 @@ use { pub struct Parser<'a> { lexer: Peekable>, - pub errors: Vec, + errors: Vec, } impl<'a> Parser<'a> { @@ -30,7 +30,7 @@ impl<'a> Parser<'a> { if token.name == TokenType::EOF { break; } - match Statement::parse(self, token.clone()) { + match Statement::parse(self, token) { Some(v) => program.statements.push(v), None => {} // This will happen in case of a parsing error or something } @@ -86,12 +86,28 @@ impl Display for ParserError { #[cfg(test)] mod tests { use crate::{ - lexer::{Lexer, TokenType}, + lexer::{Lexer, Token, TokenType}, parser::{ - ast::{Identifier, LetStatement, Program, Statement}, + ast::{Expression, ExpressionStatement, Identifier, LetStatement, Program, Statement}, Parser, }, }; + + fn check_parser_errors(p: &Parser) { + if p.errors.is_empty() { + return; + } else { + let mut out = String::new(); + + out.push_str(&format!("parser has {} errors\n", p.errors.len())); + + for error in &p.errors { + out.push_str(&format!("parser error: {}\n", error)); + } + eprintln!("{}", out); + } + } + #[test] fn let_statements() { let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;"); @@ -119,10 +135,11 @@ mod tests { } ); - lexer = Lexer::new("let x = 5;let x 5;let = 10; let 83838383;"); + lexer = Lexer::new("let x = 5;let x 5; let 83838383; let = 10;"); parser = Parser::new(lexer); let program = parser.parse_program(); check_parser_errors(&parser); + // println!("{:?}", program); assert_eq!(parser.errors.len(), 3); assert_eq!(program.statements.len(), 1); } @@ -137,19 +154,4 @@ mod tests { assert_eq!(program.statements.len(), 3); assert_eq!(parser.errors.len(), 0); } - - fn check_parser_errors(p: &Parser) { - if p.errors.is_empty() { - return; - } else { - let mut out = String::new(); - - out.push_str(&format!("parser has {} errors\n", p.errors.len())); - - for error in &p.errors { - out.push_str(&format!("parser error: {}\n", error)); - } - eprintln!("{}", out); - } - } } diff --git a/src/repl.rs b/src/repl.rs index 1ef6970..fe523ce 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -27,6 +27,5 @@ fn start(mut ip: R, mut out: W) { let mut parser = Parser::new(tokens); let stmts = parser.parse_program(); - println!("parser={:?}", stmts); } }