Working on parser

1. Implemented ToString on AST nodes that'll allow convert
AST back to source code in tests and help with ensuring correctness
2. Locked Rust toolchain to a specific version
This commit is contained in:
Ishan Jain 2020-01-11 23:25:51 +05:30
parent e6e1be2e00
commit 2fd6c9ca6d
5 changed files with 264 additions and 52 deletions

1
rust-toolchain Normal file
View File

@ -0,0 +1 @@
nightly-2020-01-08

View File

@ -1,5 +1,6 @@
use std::{ use std::{
collections::HashMap, collections::HashMap,
convert::Into,
iter::Peekable, iter::Peekable,
str::{self, Chars}, str::{self, Chars},
}; };
@ -28,7 +29,7 @@ pub enum TokenType {
// Ident is basically most things that are not covered // Ident is basically most things that are not covered
// by other variants of this enum. // by other variants of this enum.
Ident, Ident,
Int,
// Operators // Operators
Assign, Assign,
Plus, Plus,
@ -52,13 +53,44 @@ pub enum TokenType {
// Keywords // Keywords
Function, Function,
If, If,
Else,
Let, Let,
True, True,
Else,
False, False,
Return, Return,
} }
impl Into<&'static str> for TokenType {
fn into(self) -> &'static str {
match self {
TokenType::Assign => "=",
TokenType::Plus => "+",
TokenType::Multiply => "*",
TokenType::Divide => "/",
TokenType::Subtract => "-",
TokenType::ExclamationMark => "!",
TokenType::LessThan => "<=",
TokenType::GreaterThan => ">=",
TokenType::Equals => "==",
TokenType::NotEquals => "!=",
TokenType::Comma => ",",
TokenType::Semicolon => ";",
TokenType::LParen => "(",
TokenType::RParen => ")",
TokenType::LBrace => "{",
TokenType::RBrace => "}",
TokenType::Function => "fn",
TokenType::If => "if",
TokenType::Else => "else",
TokenType::Let => "let",
TokenType::True => "true",
TokenType::False => "false",
TokenType::Return => "return",
_ => unreachable!(),
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Hash)] #[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct Token { pub struct Token {
pub name: TokenType, pub name: TokenType,
@ -81,6 +113,10 @@ impl Token {
literal: Some(value.to_string()), literal: Some(value.to_string()),
} }
} }
pub fn to_string(&self) -> &'static str {
self.name.into()
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -200,7 +236,7 @@ impl<'a> Iterator for Lexer<'a> {
} }
Some(ch) if ch.is_ascii_digit() => { Some(ch) if ch.is_ascii_digit() => {
let number = self.read_number(ch); let number = self.read_number(ch);
Some(Token::with_value(TokenType::Ident, &number)) Some(Token::with_value(TokenType::Int, &number))
} }
None if !self.eof_sent => { None if !self.eof_sent => {
self.eof_sent = true; self.eof_sent = true;
@ -261,12 +297,12 @@ mod tests {
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "five"), Token::with_value(TokenType::Ident, "five"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::with_value(TokenType::Ident, "5"), Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "ten"), Token::with_value(TokenType::Ident, "ten"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::with_value(TokenType::Ident, "10"), Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "add"), Token::with_value(TokenType::Ident, "add"),
@ -331,19 +367,19 @@ mod tests {
Token::new(TokenType::Subtract), Token::new(TokenType::Subtract),
Token::new(TokenType::Divide), Token::new(TokenType::Divide),
Token::new(TokenType::Multiply), Token::new(TokenType::Multiply),
Token::with_value(TokenType::Ident, "5"), Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::with_value(TokenType::Ident, "5"), Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::LessThan), Token::new(TokenType::LessThan),
Token::with_value(TokenType::Ident, "10"), Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::GreaterThan), Token::new(TokenType::GreaterThan),
Token::with_value(TokenType::Ident, "5"), Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::If), Token::new(TokenType::If),
Token::new(TokenType::LParen), Token::new(TokenType::LParen),
Token::with_value(TokenType::Ident, "5"), Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::LessThan), Token::new(TokenType::LessThan),
Token::with_value(TokenType::Ident, "10"), Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::RParen), Token::new(TokenType::RParen),
Token::new(TokenType::LBrace), Token::new(TokenType::LBrace),
Token::new(TokenType::Return), Token::new(TokenType::Return),
@ -356,13 +392,13 @@ mod tests {
Token::new(TokenType::False), Token::new(TokenType::False),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::RBrace), Token::new(TokenType::RBrace),
Token::with_value(TokenType::Ident, "10"), Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Equals), Token::new(TokenType::Equals),
Token::with_value(TokenType::Ident, "10"), Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::with_value(TokenType::Ident, "9"), Token::with_value(TokenType::Int, "9"),
Token::new(TokenType::NotEquals), Token::new(TokenType::NotEquals),
Token::with_value(TokenType::Ident, "10"), Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::EOF), Token::new(TokenType::EOF),
], ],

View File

@ -1,6 +1,10 @@
use crate::{ // TODO: Maybe implement String method to pretty print all AST nodes
lexer::{Token, TokenType}, use {
parser::Parser, crate::{
lexer::{Token, TokenType},
parser::Parser,
},
std::convert::From,
}; };
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -8,6 +12,18 @@ pub struct Program {
pub statements: Vec<Statement>, pub statements: Vec<Statement>,
} }
impl ToString for Program {
fn to_string(&self) -> String {
let mut out = String::new();
for statement in &self.statements {
out.push_str(&statement.to_string());
out.push('\n');
}
out
}
}
pub enum Node { pub enum Node {
Statement(Statement), Statement(Statement),
Expression(Expression), Expression(Expression),
@ -17,6 +33,7 @@ pub enum Node {
pub enum Statement { pub enum Statement {
Let(LetStatement), Let(LetStatement),
Return(ReturnStatement), Return(ReturnStatement),
ExpressionStatement(ExpressionStatement),
} }
impl<'a> Statement { impl<'a> Statement {
@ -24,14 +41,22 @@ impl<'a> Statement {
match token.name { match token.name {
TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)), TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)),
TokenType::Return => Some(Statement::Return(ReturnStatement::parse(parser)?)), TokenType::Return => Some(Statement::Return(ReturnStatement::parse(parser)?)),
_ => None, _ => Some(Statement::ExpressionStatement(ExpressionStatement::parse(
parser, token,
)?)),
} }
} }
} }
// TODO: Expressions are not going to be a struct so using this here just as a placeholder impl ToString for Statement {
#[derive(Debug, PartialEq)] fn to_string(&self) -> String {
pub struct Expression; match self {
Statement::Let(v) => v.to_string(),
Statement::Return(v) => v.to_string(),
Statement::ExpressionStatement(v) => v.to_string(),
}
}
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct LetStatement { pub struct LetStatement {
@ -49,11 +74,8 @@ impl LetStatement {
value: None, value: None,
}; };
if let Some(v) = parser.expect_peek(TokenType::Ident) { let ident = parser.expect_peek(TokenType::Ident)?;
stmt.name.value = v.literal?; stmt.name.value = ident.literal?;
} else {
return None;
}
parser.expect_peek(TokenType::Assign)?; parser.expect_peek(TokenType::Assign)?;
@ -64,6 +86,23 @@ impl LetStatement {
Some(stmt) Some(stmt)
} }
const fn token_literal() -> &'static str {
"let"
}
}
impl ToString for LetStatement {
fn to_string(&self) -> String {
let mut out = format!("{} {} = ", Self::token_literal(), self.name.value);
if let Some(v) = &self.value {
let a: String = v.into();
out.push_str(&a);
}
out.push(';');
out
}
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -74,11 +113,105 @@ pub struct ReturnStatement {
impl ReturnStatement { impl ReturnStatement {
fn parse(parser: &mut Parser) -> Option<Self> { fn parse(parser: &mut Parser) -> Option<Self> {
let stmt = ReturnStatement { let stmt = ReturnStatement {
return_value: Some(Expression), return_value: Some(Expression::None),
}; };
while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {} while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {}
return Some(stmt); return Some(stmt);
} }
// TODO: REMOVE THIS!
const fn token_literal() -> &'static str {
"return"
}
}
impl ToString for ReturnStatement {
fn to_string(&self) -> String {
let mut out = String::from(Self::token_literal());
if let Some(v) = &self.return_value {
out.push(' ');
let a: String = v.into();
out.push_str(&a);
}
out.push(';');
out
}
}
#[derive(Debug, PartialEq)]
pub struct ExpressionStatement {
pub token: Token,
pub expression: Expression,
}
impl ExpressionStatement {
fn parse(parser: &mut Parser, current_token: Token) -> Option<Self> {
// let expr = Expression::parse(parser, token.clone(), ExpressionPriority::Lowest)?;
let stmt = ExpressionStatement {
token: current_token.clone(),
expression: Expression::parse(parser, current_token, ExpressionPriority::Lowest)?,
};
if parser.peek_token_is(TokenType::Semicolon) {
parser.lexer.next();
}
Some(stmt)
}
}
impl ToString for ExpressionStatement {
fn to_string(&self) -> String {
self.expression.to_string()
}
}
#[derive(Debug, PartialEq)]
enum ExpressionPriority {
Lowest = 0,
Equals = 1,
LessGreater = 2,
Sum = 3,
Product = 4,
Prefix = 5,
Call = 6,
}
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
#[derive(Debug, PartialEq)]
pub enum Expression {
Identifier(Identifier),
// TODO: Temporary placeholder value. Should be removed once this section is done
None,
}
impl Expression {
fn parse(parser: &mut Parser, token: Token, precedence: ExpressionPriority) -> Option<Self> {
match token.name {
TokenType::Ident => Self::parse_identifier(parser, token),
_ => None,
}
}
fn parse_identifier(parser: &mut Parser, token: Token) -> Option<Self> {
Some(Self::Identifier(Identifier::new(
token.name,
&token.literal?,
)))
}
fn to_string(&self) -> String {
match self {
Expression::Identifier(v) => v.to_string(),
Expression::None => "None".into(),
}
}
}
impl From<&Expression> for String {
fn from(expr: &Expression) -> String {
expr.to_string()
}
} }
// Identifier will be an expression // Identifier will be an expression
@ -97,4 +230,45 @@ impl Identifier {
value: v.to_string(), value: v.to_string(),
} }
} }
pub fn to_string(&self) -> String {
self.value.clone()
}
}
#[cfg(test)]
mod tests {
use crate::{
lexer::{Token, TokenType},
parser::{
ast::{Expression, Identifier, LetStatement, ReturnStatement, Statement},
Program,
},
};
#[test]
fn test_string() {
let program = Program {
statements: vec![
Statement::Let(LetStatement {
name: Identifier::new(TokenType::Ident, "myVar"),
value: Some(Expression::Identifier(Identifier::new(
TokenType::Ident,
"anotherVar",
))),
}),
Statement::Return(ReturnStatement {
return_value: Some(Expression::Identifier(Identifier::new(
TokenType::Int,
"5",
))),
}),
Statement::Return(ReturnStatement { return_value: None }),
],
};
assert_eq!(
program.to_string(),
"let myVar = anotherVar;\nreturn 5;\nreturn;\n"
);
}
} }

View File

@ -12,7 +12,7 @@ use {
pub struct Parser<'a> { pub struct Parser<'a> {
lexer: Peekable<Lexer<'a>>, lexer: Peekable<Lexer<'a>>,
pub errors: Vec<ParserError>, errors: Vec<ParserError>,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@ -30,7 +30,7 @@ impl<'a> Parser<'a> {
if token.name == TokenType::EOF { if token.name == TokenType::EOF {
break; break;
} }
match Statement::parse(self, token.clone()) { match Statement::parse(self, token) {
Some(v) => program.statements.push(v), Some(v) => program.statements.push(v),
None => {} // This will happen in case of a parsing error or something None => {} // This will happen in case of a parsing error or something
} }
@ -86,12 +86,28 @@ impl Display for ParserError {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::{ use crate::{
lexer::{Lexer, TokenType}, lexer::{Lexer, Token, TokenType},
parser::{ parser::{
ast::{Identifier, LetStatement, Program, Statement}, ast::{Expression, ExpressionStatement, Identifier, LetStatement, Program, Statement},
Parser, Parser,
}, },
}; };
fn check_parser_errors(p: &Parser) {
if p.errors.is_empty() {
return;
} else {
let mut out = String::new();
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
for error in &p.errors {
out.push_str(&format!("parser error: {}\n", error));
}
eprintln!("{}", out);
}
}
#[test] #[test]
fn let_statements() { fn let_statements() {
let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;"); let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
@ -119,10 +135,11 @@ mod tests {
} }
); );
lexer = Lexer::new("let x = 5;let x 5;let = 10; let 83838383;"); lexer = Lexer::new("let x = 5;let x 5; let 83838383; let = 10;");
parser = Parser::new(lexer); parser = Parser::new(lexer);
let program = parser.parse_program(); let program = parser.parse_program();
check_parser_errors(&parser); check_parser_errors(&parser);
// println!("{:?}", program);
assert_eq!(parser.errors.len(), 3); assert_eq!(parser.errors.len(), 3);
assert_eq!(program.statements.len(), 1); assert_eq!(program.statements.len(), 1);
} }
@ -137,19 +154,4 @@ mod tests {
assert_eq!(program.statements.len(), 3); assert_eq!(program.statements.len(), 3);
assert_eq!(parser.errors.len(), 0); assert_eq!(parser.errors.len(), 0);
} }
fn check_parser_errors(p: &Parser) {
if p.errors.is_empty() {
return;
} else {
let mut out = String::new();
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
for error in &p.errors {
out.push_str(&format!("parser error: {}\n", error));
}
eprintln!("{}", out);
}
}
} }

View File

@ -27,6 +27,5 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
let stmts = parser.parse_program(); let stmts = parser.parse_program();
println!("parser={:?}", stmts);
} }
} }