Working on parser

1. Implemented ToString on AST nodes that'll allow convert
AST back to source code in tests and help with ensuring correctness
2. Locked Rust toolchain to a specific version
This commit is contained in:
Ishan Jain 2020-01-11 23:25:51 +05:30
parent e6e1be2e00
commit 2fd6c9ca6d
5 changed files with 264 additions and 52 deletions

1
rust-toolchain Normal file
View File

@ -0,0 +1 @@
nightly-2020-01-08

View File

@ -1,5 +1,6 @@
use std::{
collections::HashMap,
convert::Into,
iter::Peekable,
str::{self, Chars},
};
@ -28,7 +29,7 @@ pub enum TokenType {
// Ident is basically most things that are not covered
// by other variants of this enum.
Ident,
Int,
// Operators
Assign,
Plus,
@ -52,13 +53,44 @@ pub enum TokenType {
// Keywords
Function,
If,
Else,
Let,
True,
Else,
False,
Return,
}
impl Into<&'static str> for TokenType {
fn into(self) -> &'static str {
match self {
TokenType::Assign => "=",
TokenType::Plus => "+",
TokenType::Multiply => "*",
TokenType::Divide => "/",
TokenType::Subtract => "-",
TokenType::ExclamationMark => "!",
TokenType::LessThan => "<=",
TokenType::GreaterThan => ">=",
TokenType::Equals => "==",
TokenType::NotEquals => "!=",
TokenType::Comma => ",",
TokenType::Semicolon => ";",
TokenType::LParen => "(",
TokenType::RParen => ")",
TokenType::LBrace => "{",
TokenType::RBrace => "}",
TokenType::Function => "fn",
TokenType::If => "if",
TokenType::Else => "else",
TokenType::Let => "let",
TokenType::True => "true",
TokenType::False => "false",
TokenType::Return => "return",
_ => unreachable!(),
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct Token {
pub name: TokenType,
@ -81,6 +113,10 @@ impl Token {
literal: Some(value.to_string()),
}
}
pub fn to_string(&self) -> &'static str {
self.name.into()
}
}
#[derive(Debug, Clone)]
@ -200,7 +236,7 @@ impl<'a> Iterator for Lexer<'a> {
}
Some(ch) if ch.is_ascii_digit() => {
let number = self.read_number(ch);
Some(Token::with_value(TokenType::Ident, &number))
Some(Token::with_value(TokenType::Int, &number))
}
None if !self.eof_sent => {
self.eof_sent = true;
@ -261,12 +297,12 @@ mod tests {
Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "five"),
Token::new(TokenType::Assign),
Token::with_value(TokenType::Ident, "5"),
Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::Semicolon),
Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "ten"),
Token::new(TokenType::Assign),
Token::with_value(TokenType::Ident, "10"),
Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Semicolon),
Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "add"),
@ -331,19 +367,19 @@ mod tests {
Token::new(TokenType::Subtract),
Token::new(TokenType::Divide),
Token::new(TokenType::Multiply),
Token::with_value(TokenType::Ident, "5"),
Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::Semicolon),
Token::with_value(TokenType::Ident, "5"),
Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::LessThan),
Token::with_value(TokenType::Ident, "10"),
Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::GreaterThan),
Token::with_value(TokenType::Ident, "5"),
Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::Semicolon),
Token::new(TokenType::If),
Token::new(TokenType::LParen),
Token::with_value(TokenType::Ident, "5"),
Token::with_value(TokenType::Int, "5"),
Token::new(TokenType::LessThan),
Token::with_value(TokenType::Ident, "10"),
Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::RParen),
Token::new(TokenType::LBrace),
Token::new(TokenType::Return),
@ -356,13 +392,13 @@ mod tests {
Token::new(TokenType::False),
Token::new(TokenType::Semicolon),
Token::new(TokenType::RBrace),
Token::with_value(TokenType::Ident, "10"),
Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Equals),
Token::with_value(TokenType::Ident, "10"),
Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Semicolon),
Token::with_value(TokenType::Ident, "9"),
Token::with_value(TokenType::Int, "9"),
Token::new(TokenType::NotEquals),
Token::with_value(TokenType::Ident, "10"),
Token::with_value(TokenType::Int, "10"),
Token::new(TokenType::Semicolon),
Token::new(TokenType::EOF),
],

View File

@ -1,6 +1,10 @@
use crate::{
lexer::{Token, TokenType},
parser::Parser,
// TODO: Maybe implement String method to pretty print all AST nodes
use {
crate::{
lexer::{Token, TokenType},
parser::Parser,
},
std::convert::From,
};
#[derive(Debug, PartialEq)]
@ -8,6 +12,18 @@ pub struct Program {
pub statements: Vec<Statement>,
}
impl ToString for Program {
fn to_string(&self) -> String {
let mut out = String::new();
for statement in &self.statements {
out.push_str(&statement.to_string());
out.push('\n');
}
out
}
}
pub enum Node {
Statement(Statement),
Expression(Expression),
@ -17,6 +33,7 @@ pub enum Node {
pub enum Statement {
Let(LetStatement),
Return(ReturnStatement),
ExpressionStatement(ExpressionStatement),
}
impl<'a> Statement {
@ -24,14 +41,22 @@ impl<'a> Statement {
match token.name {
TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)),
TokenType::Return => Some(Statement::Return(ReturnStatement::parse(parser)?)),
_ => None,
_ => Some(Statement::ExpressionStatement(ExpressionStatement::parse(
parser, token,
)?)),
}
}
}
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
#[derive(Debug, PartialEq)]
pub struct Expression;
impl ToString for Statement {
fn to_string(&self) -> String {
match self {
Statement::Let(v) => v.to_string(),
Statement::Return(v) => v.to_string(),
Statement::ExpressionStatement(v) => v.to_string(),
}
}
}
#[derive(Debug, PartialEq)]
pub struct LetStatement {
@ -49,11 +74,8 @@ impl LetStatement {
value: None,
};
if let Some(v) = parser.expect_peek(TokenType::Ident) {
stmt.name.value = v.literal?;
} else {
return None;
}
let ident = parser.expect_peek(TokenType::Ident)?;
stmt.name.value = ident.literal?;
parser.expect_peek(TokenType::Assign)?;
@ -64,6 +86,23 @@ impl LetStatement {
Some(stmt)
}
const fn token_literal() -> &'static str {
"let"
}
}
impl ToString for LetStatement {
fn to_string(&self) -> String {
let mut out = format!("{} {} = ", Self::token_literal(), self.name.value);
if let Some(v) = &self.value {
let a: String = v.into();
out.push_str(&a);
}
out.push(';');
out
}
}
#[derive(Debug, PartialEq)]
@ -74,11 +113,105 @@ pub struct ReturnStatement {
impl ReturnStatement {
fn parse(parser: &mut Parser) -> Option<Self> {
let stmt = ReturnStatement {
return_value: Some(Expression),
return_value: Some(Expression::None),
};
while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {}
return Some(stmt);
}
// TODO: REMOVE THIS!
const fn token_literal() -> &'static str {
"return"
}
}
impl ToString for ReturnStatement {
fn to_string(&self) -> String {
let mut out = String::from(Self::token_literal());
if let Some(v) = &self.return_value {
out.push(' ');
let a: String = v.into();
out.push_str(&a);
}
out.push(';');
out
}
}
#[derive(Debug, PartialEq)]
pub struct ExpressionStatement {
pub token: Token,
pub expression: Expression,
}
impl ExpressionStatement {
fn parse(parser: &mut Parser, current_token: Token) -> Option<Self> {
// let expr = Expression::parse(parser, token.clone(), ExpressionPriority::Lowest)?;
let stmt = ExpressionStatement {
token: current_token.clone(),
expression: Expression::parse(parser, current_token, ExpressionPriority::Lowest)?,
};
if parser.peek_token_is(TokenType::Semicolon) {
parser.lexer.next();
}
Some(stmt)
}
}
impl ToString for ExpressionStatement {
fn to_string(&self) -> String {
self.expression.to_string()
}
}
#[derive(Debug, PartialEq)]
enum ExpressionPriority {
Lowest = 0,
Equals = 1,
LessGreater = 2,
Sum = 3,
Product = 4,
Prefix = 5,
Call = 6,
}
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
#[derive(Debug, PartialEq)]
pub enum Expression {
Identifier(Identifier),
// TODO: Temporary placeholder value. Should be removed once this section is done
None,
}
impl Expression {
fn parse(parser: &mut Parser, token: Token, precedence: ExpressionPriority) -> Option<Self> {
match token.name {
TokenType::Ident => Self::parse_identifier(parser, token),
_ => None,
}
}
fn parse_identifier(parser: &mut Parser, token: Token) -> Option<Self> {
Some(Self::Identifier(Identifier::new(
token.name,
&token.literal?,
)))
}
fn to_string(&self) -> String {
match self {
Expression::Identifier(v) => v.to_string(),
Expression::None => "None".into(),
}
}
}
impl From<&Expression> for String {
fn from(expr: &Expression) -> String {
expr.to_string()
}
}
// Identifier will be an expression
@ -97,4 +230,45 @@ impl Identifier {
value: v.to_string(),
}
}
pub fn to_string(&self) -> String {
self.value.clone()
}
}
#[cfg(test)]
mod tests {
use crate::{
lexer::{Token, TokenType},
parser::{
ast::{Expression, Identifier, LetStatement, ReturnStatement, Statement},
Program,
},
};
#[test]
fn test_string() {
let program = Program {
statements: vec![
Statement::Let(LetStatement {
name: Identifier::new(TokenType::Ident, "myVar"),
value: Some(Expression::Identifier(Identifier::new(
TokenType::Ident,
"anotherVar",
))),
}),
Statement::Return(ReturnStatement {
return_value: Some(Expression::Identifier(Identifier::new(
TokenType::Int,
"5",
))),
}),
Statement::Return(ReturnStatement { return_value: None }),
],
};
assert_eq!(
program.to_string(),
"let myVar = anotherVar;\nreturn 5;\nreturn;\n"
);
}
}

View File

@ -12,7 +12,7 @@ use {
pub struct Parser<'a> {
lexer: Peekable<Lexer<'a>>,
pub errors: Vec<ParserError>,
errors: Vec<ParserError>,
}
impl<'a> Parser<'a> {
@ -30,7 +30,7 @@ impl<'a> Parser<'a> {
if token.name == TokenType::EOF {
break;
}
match Statement::parse(self, token.clone()) {
match Statement::parse(self, token) {
Some(v) => program.statements.push(v),
None => {} // This will happen in case of a parsing error or something
}
@ -86,12 +86,28 @@ impl Display for ParserError {
#[cfg(test)]
mod tests {
use crate::{
lexer::{Lexer, TokenType},
lexer::{Lexer, Token, TokenType},
parser::{
ast::{Identifier, LetStatement, Program, Statement},
ast::{Expression, ExpressionStatement, Identifier, LetStatement, Program, Statement},
Parser,
},
};
fn check_parser_errors(p: &Parser) {
if p.errors.is_empty() {
return;
} else {
let mut out = String::new();
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
for error in &p.errors {
out.push_str(&format!("parser error: {}\n", error));
}
eprintln!("{}", out);
}
}
#[test]
fn let_statements() {
let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
@ -119,10 +135,11 @@ mod tests {
}
);
lexer = Lexer::new("let x = 5;let x 5;let = 10; let 83838383;");
lexer = Lexer::new("let x = 5;let x 5; let 83838383; let = 10;");
parser = Parser::new(lexer);
let program = parser.parse_program();
check_parser_errors(&parser);
// println!("{:?}", program);
assert_eq!(parser.errors.len(), 3);
assert_eq!(program.statements.len(), 1);
}
@ -137,19 +154,4 @@ mod tests {
assert_eq!(program.statements.len(), 3);
assert_eq!(parser.errors.len(), 0);
}
fn check_parser_errors(p: &Parser) {
if p.errors.is_empty() {
return;
} else {
let mut out = String::new();
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
for error in &p.errors {
out.push_str(&format!("parser error: {}\n", error));
}
eprintln!("{}", out);
}
}
}

View File

@ -27,6 +27,5 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
let mut parser = Parser::new(tokens);
let stmts = parser.parse_program();
println!("parser={:?}", stmts);
}
}