Working on parser
1. Implemented ToString on AST nodes that'll allow convert AST back to source code in tests and help with ensuring correctness 2. Locked Rust toolchain to a specific version
This commit is contained in:
parent
e6e1be2e00
commit
2fd6c9ca6d
1
rust-toolchain
Normal file
1
rust-toolchain
Normal file
|
@ -0,0 +1 @@
|
|||
nightly-2020-01-08
|
|
@ -1,5 +1,6 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
convert::Into,
|
||||
iter::Peekable,
|
||||
str::{self, Chars},
|
||||
};
|
||||
|
@ -28,7 +29,7 @@ pub enum TokenType {
|
|||
// Ident is basically most things that are not covered
|
||||
// by other variants of this enum.
|
||||
Ident,
|
||||
|
||||
Int,
|
||||
// Operators
|
||||
Assign,
|
||||
Plus,
|
||||
|
@ -52,13 +53,44 @@ pub enum TokenType {
|
|||
// Keywords
|
||||
Function,
|
||||
If,
|
||||
Else,
|
||||
Let,
|
||||
True,
|
||||
Else,
|
||||
False,
|
||||
Return,
|
||||
}
|
||||
|
||||
impl Into<&'static str> for TokenType {
|
||||
fn into(self) -> &'static str {
|
||||
match self {
|
||||
TokenType::Assign => "=",
|
||||
TokenType::Plus => "+",
|
||||
TokenType::Multiply => "*",
|
||||
TokenType::Divide => "/",
|
||||
TokenType::Subtract => "-",
|
||||
TokenType::ExclamationMark => "!",
|
||||
TokenType::LessThan => "<=",
|
||||
TokenType::GreaterThan => ">=",
|
||||
TokenType::Equals => "==",
|
||||
TokenType::NotEquals => "!=",
|
||||
TokenType::Comma => ",",
|
||||
TokenType::Semicolon => ";",
|
||||
TokenType::LParen => "(",
|
||||
TokenType::RParen => ")",
|
||||
TokenType::LBrace => "{",
|
||||
TokenType::RBrace => "}",
|
||||
TokenType::Function => "fn",
|
||||
TokenType::If => "if",
|
||||
TokenType::Else => "else",
|
||||
TokenType::Let => "let",
|
||||
TokenType::True => "true",
|
||||
TokenType::False => "false",
|
||||
TokenType::Return => "return",
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub struct Token {
|
||||
pub name: TokenType,
|
||||
|
@ -81,6 +113,10 @@ impl Token {
|
|||
literal: Some(value.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_string(&self) -> &'static str {
|
||||
self.name.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -200,7 +236,7 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
}
|
||||
Some(ch) if ch.is_ascii_digit() => {
|
||||
let number = self.read_number(ch);
|
||||
Some(Token::with_value(TokenType::Ident, &number))
|
||||
Some(Token::with_value(TokenType::Int, &number))
|
||||
}
|
||||
None if !self.eof_sent => {
|
||||
self.eof_sent = true;
|
||||
|
@ -261,12 +297,12 @@ mod tests {
|
|||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "five"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::with_value(TokenType::Int, "5"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "ten"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::with_value(TokenType::Int, "10"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "add"),
|
||||
|
@ -303,16 +339,16 @@ mod tests {
|
|||
"let result = add(five, ten);
|
||||
!-/*5;
|
||||
5 < 10 > 5;
|
||||
|
||||
|
||||
if(5 < 10) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
10 == 10;
|
||||
9 != 10;
|
||||
|
||||
|
||||
"
|
||||
)
|
||||
.collect::<Vec<Token>>(),
|
||||
|
@ -331,19 +367,19 @@ mod tests {
|
|||
Token::new(TokenType::Subtract),
|
||||
Token::new(TokenType::Divide),
|
||||
Token::new(TokenType::Multiply),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::with_value(TokenType::Int, "5"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::with_value(TokenType::Int, "5"),
|
||||
Token::new(TokenType::LessThan),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::with_value(TokenType::Int, "10"),
|
||||
Token::new(TokenType::GreaterThan),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::with_value(TokenType::Int, "5"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::If),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::with_value(TokenType::Int, "5"),
|
||||
Token::new(TokenType::LessThan),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::with_value(TokenType::Int, "10"),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::new(TokenType::Return),
|
||||
|
@ -356,13 +392,13 @@ mod tests {
|
|||
Token::new(TokenType::False),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::with_value(TokenType::Int, "10"),
|
||||
Token::new(TokenType::Equals),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::with_value(TokenType::Int, "10"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::with_value(TokenType::Ident, "9"),
|
||||
Token::with_value(TokenType::Int, "9"),
|
||||
Token::new(TokenType::NotEquals),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::with_value(TokenType::Int, "10"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::EOF),
|
||||
],
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
use crate::{
|
||||
lexer::{Token, TokenType},
|
||||
parser::Parser,
|
||||
// TODO: Maybe implement String method to pretty print all AST nodes
|
||||
use {
|
||||
crate::{
|
||||
lexer::{Token, TokenType},
|
||||
parser::Parser,
|
||||
},
|
||||
std::convert::From,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -8,6 +12,18 @@ pub struct Program {
|
|||
pub statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
impl ToString for Program {
|
||||
fn to_string(&self) -> String {
|
||||
let mut out = String::new();
|
||||
|
||||
for statement in &self.statements {
|
||||
out.push_str(&statement.to_string());
|
||||
out.push('\n');
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Node {
|
||||
Statement(Statement),
|
||||
Expression(Expression),
|
||||
|
@ -17,6 +33,7 @@ pub enum Node {
|
|||
pub enum Statement {
|
||||
Let(LetStatement),
|
||||
Return(ReturnStatement),
|
||||
ExpressionStatement(ExpressionStatement),
|
||||
}
|
||||
|
||||
impl<'a> Statement {
|
||||
|
@ -24,14 +41,22 @@ impl<'a> Statement {
|
|||
match token.name {
|
||||
TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)),
|
||||
TokenType::Return => Some(Statement::Return(ReturnStatement::parse(parser)?)),
|
||||
_ => None,
|
||||
_ => Some(Statement::ExpressionStatement(ExpressionStatement::parse(
|
||||
parser, token,
|
||||
)?)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Expression;
|
||||
impl ToString for Statement {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Statement::Let(v) => v.to_string(),
|
||||
Statement::Return(v) => v.to_string(),
|
||||
Statement::ExpressionStatement(v) => v.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct LetStatement {
|
||||
|
@ -49,11 +74,8 @@ impl LetStatement {
|
|||
value: None,
|
||||
};
|
||||
|
||||
if let Some(v) = parser.expect_peek(TokenType::Ident) {
|
||||
stmt.name.value = v.literal?;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
let ident = parser.expect_peek(TokenType::Ident)?;
|
||||
stmt.name.value = ident.literal?;
|
||||
|
||||
parser.expect_peek(TokenType::Assign)?;
|
||||
|
||||
|
@ -64,6 +86,23 @@ impl LetStatement {
|
|||
|
||||
Some(stmt)
|
||||
}
|
||||
|
||||
const fn token_literal() -> &'static str {
|
||||
"let"
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for LetStatement {
|
||||
fn to_string(&self) -> String {
|
||||
let mut out = format!("{} {} = ", Self::token_literal(), self.name.value);
|
||||
|
||||
if let Some(v) = &self.value {
|
||||
let a: String = v.into();
|
||||
out.push_str(&a);
|
||||
}
|
||||
out.push(';');
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -74,11 +113,105 @@ pub struct ReturnStatement {
|
|||
impl ReturnStatement {
|
||||
fn parse(parser: &mut Parser) -> Option<Self> {
|
||||
let stmt = ReturnStatement {
|
||||
return_value: Some(Expression),
|
||||
return_value: Some(Expression::None),
|
||||
};
|
||||
while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {}
|
||||
return Some(stmt);
|
||||
}
|
||||
|
||||
// TODO: REMOVE THIS!
|
||||
const fn token_literal() -> &'static str {
|
||||
"return"
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for ReturnStatement {
|
||||
fn to_string(&self) -> String {
|
||||
let mut out = String::from(Self::token_literal());
|
||||
|
||||
if let Some(v) = &self.return_value {
|
||||
out.push(' ');
|
||||
let a: String = v.into();
|
||||
out.push_str(&a);
|
||||
}
|
||||
out.push(';');
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ExpressionStatement {
|
||||
pub token: Token,
|
||||
pub expression: Expression,
|
||||
}
|
||||
|
||||
impl ExpressionStatement {
|
||||
fn parse(parser: &mut Parser, current_token: Token) -> Option<Self> {
|
||||
// let expr = Expression::parse(parser, token.clone(), ExpressionPriority::Lowest)?;
|
||||
let stmt = ExpressionStatement {
|
||||
token: current_token.clone(),
|
||||
expression: Expression::parse(parser, current_token, ExpressionPriority::Lowest)?,
|
||||
};
|
||||
if parser.peek_token_is(TokenType::Semicolon) {
|
||||
parser.lexer.next();
|
||||
}
|
||||
Some(stmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for ExpressionStatement {
|
||||
fn to_string(&self) -> String {
|
||||
self.expression.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ExpressionPriority {
|
||||
Lowest = 0,
|
||||
Equals = 1,
|
||||
LessGreater = 2,
|
||||
Sum = 3,
|
||||
Product = 4,
|
||||
Prefix = 5,
|
||||
Call = 6,
|
||||
}
|
||||
|
||||
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Expression {
|
||||
Identifier(Identifier),
|
||||
// TODO: Temporary placeholder value. Should be removed once this section is done
|
||||
None,
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
fn parse(parser: &mut Parser, token: Token, precedence: ExpressionPriority) -> Option<Self> {
|
||||
match token.name {
|
||||
TokenType::Ident => Self::parse_identifier(parser, token),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_identifier(parser: &mut Parser, token: Token) -> Option<Self> {
|
||||
Some(Self::Identifier(Identifier::new(
|
||||
token.name,
|
||||
&token.literal?,
|
||||
)))
|
||||
}
|
||||
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Expression::Identifier(v) => v.to_string(),
|
||||
Expression::None => "None".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Expression> for String {
|
||||
fn from(expr: &Expression) -> String {
|
||||
expr.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// Identifier will be an expression
|
||||
|
@ -97,4 +230,45 @@ impl Identifier {
|
|||
value: v.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_string(&self) -> String {
|
||||
self.value.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
lexer::{Token, TokenType},
|
||||
parser::{
|
||||
ast::{Expression, Identifier, LetStatement, ReturnStatement, Statement},
|
||||
Program,
|
||||
},
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_string() {
|
||||
let program = Program {
|
||||
statements: vec![
|
||||
Statement::Let(LetStatement {
|
||||
name: Identifier::new(TokenType::Ident, "myVar"),
|
||||
value: Some(Expression::Identifier(Identifier::new(
|
||||
TokenType::Ident,
|
||||
"anotherVar",
|
||||
))),
|
||||
}),
|
||||
Statement::Return(ReturnStatement {
|
||||
return_value: Some(Expression::Identifier(Identifier::new(
|
||||
TokenType::Int,
|
||||
"5",
|
||||
))),
|
||||
}),
|
||||
Statement::Return(ReturnStatement { return_value: None }),
|
||||
],
|
||||
};
|
||||
assert_eq!(
|
||||
program.to_string(),
|
||||
"let myVar = anotherVar;\nreturn 5;\nreturn;\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ use {
|
|||
|
||||
pub struct Parser<'a> {
|
||||
lexer: Peekable<Lexer<'a>>,
|
||||
pub errors: Vec<ParserError>,
|
||||
errors: Vec<ParserError>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
|
@ -30,7 +30,7 @@ impl<'a> Parser<'a> {
|
|||
if token.name == TokenType::EOF {
|
||||
break;
|
||||
}
|
||||
match Statement::parse(self, token.clone()) {
|
||||
match Statement::parse(self, token) {
|
||||
Some(v) => program.statements.push(v),
|
||||
None => {} // This will happen in case of a parsing error or something
|
||||
}
|
||||
|
@ -86,12 +86,28 @@ impl Display for ParserError {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
lexer::{Lexer, TokenType},
|
||||
lexer::{Lexer, Token, TokenType},
|
||||
parser::{
|
||||
ast::{Identifier, LetStatement, Program, Statement},
|
||||
ast::{Expression, ExpressionStatement, Identifier, LetStatement, Program, Statement},
|
||||
Parser,
|
||||
},
|
||||
};
|
||||
|
||||
fn check_parser_errors(p: &Parser) {
|
||||
if p.errors.is_empty() {
|
||||
return;
|
||||
} else {
|
||||
let mut out = String::new();
|
||||
|
||||
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
|
||||
|
||||
for error in &p.errors {
|
||||
out.push_str(&format!("parser error: {}\n", error));
|
||||
}
|
||||
eprintln!("{}", out);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn let_statements() {
|
||||
let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
|
||||
|
@ -119,10 +135,11 @@ mod tests {
|
|||
}
|
||||
);
|
||||
|
||||
lexer = Lexer::new("let x = 5;let x 5;let = 10; let 83838383;");
|
||||
lexer = Lexer::new("let x = 5;let x 5; let 83838383; let = 10;");
|
||||
parser = Parser::new(lexer);
|
||||
let program = parser.parse_program();
|
||||
check_parser_errors(&parser);
|
||||
// println!("{:?}", program);
|
||||
assert_eq!(parser.errors.len(), 3);
|
||||
assert_eq!(program.statements.len(), 1);
|
||||
}
|
||||
|
@ -137,19 +154,4 @@ mod tests {
|
|||
assert_eq!(program.statements.len(), 3);
|
||||
assert_eq!(parser.errors.len(), 0);
|
||||
}
|
||||
|
||||
fn check_parser_errors(p: &Parser) {
|
||||
if p.errors.is_empty() {
|
||||
return;
|
||||
} else {
|
||||
let mut out = String::new();
|
||||
|
||||
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
|
||||
|
||||
for error in &p.errors {
|
||||
out.push_str(&format!("parser error: {}\n", error));
|
||||
}
|
||||
eprintln!("{}", out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,5 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
|||
let mut parser = Parser::new(tokens);
|
||||
|
||||
let stmts = parser.parse_program();
|
||||
println!("parser={:?}", stmts);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user