2019-09-06 19:46:52 +00:00
|
|
|
pub mod ast;
|
2019-12-25 12:25:15 +00:00
|
|
|
use {
|
|
|
|
crate::{
|
|
|
|
lexer::{Lexer, Token, TokenType},
|
2020-01-11 18:33:18 +00:00
|
|
|
parser::ast::{Expression, Program, Statement},
|
2019-12-25 12:25:15 +00:00
|
|
|
},
|
2019-12-25 17:37:39 +00:00
|
|
|
std::{
|
2020-01-11 18:33:18 +00:00
|
|
|
collections::HashMap,
|
2019-12-25 17:37:39 +00:00
|
|
|
fmt::{Display, Error as FmtError, Formatter},
|
|
|
|
iter::Peekable,
|
|
|
|
},
|
2019-12-25 12:25:15 +00:00
|
|
|
};
|
2019-09-06 19:46:52 +00:00
|
|
|
|
2020-01-11 18:33:18 +00:00
|
|
|
type PrefixParseFn = fn(&mut Parser, token: Token) -> Option<Expression>;
|
|
|
|
type InfixParseFn = fn(Expression) -> Option<Expression>;
|
|
|
|
|
2019-09-07 15:00:31 +00:00
|
|
|
pub struct Parser<'a> {
|
2019-09-07 14:02:44 +00:00
|
|
|
lexer: Peekable<Lexer<'a>>,
|
2020-01-11 17:55:51 +00:00
|
|
|
errors: Vec<ParserError>,
|
2020-01-11 18:33:18 +00:00
|
|
|
prefix_parse_fns: HashMap<TokenType, PrefixParseFn>,
|
|
|
|
infix_parse_fns: HashMap<TokenType, InfixParseFn>,
|
2019-09-07 14:02:44 +00:00
|
|
|
}
|
2019-04-14 11:15:40 +00:00
|
|
|
|
2019-09-07 14:02:44 +00:00
|
|
|
impl<'a> Parser<'a> {
|
2019-12-25 12:25:15 +00:00
|
|
|
pub fn new(lexer: Lexer<'a>) -> Self {
|
2020-01-11 18:33:18 +00:00
|
|
|
let mut parser = Parser {
|
2019-09-07 14:02:44 +00:00
|
|
|
lexer: lexer.peekable(),
|
2019-12-25 17:37:39 +00:00
|
|
|
errors: vec![],
|
2020-01-11 18:33:18 +00:00
|
|
|
prefix_parse_fns: HashMap::new(),
|
|
|
|
infix_parse_fns: HashMap::new(),
|
|
|
|
};
|
|
|
|
|
|
|
|
parser.register_prefix(TokenType::Ident, Expression::parse_identifier);
|
|
|
|
parser.register_prefix(TokenType::Int, Expression::parse_integer_literal);
|
|
|
|
parser
|
2019-09-07 14:02:44 +00:00
|
|
|
}
|
|
|
|
|
2019-12-25 17:37:39 +00:00
|
|
|
pub fn parse_program(&mut self) -> Program {
|
2019-12-25 12:25:15 +00:00
|
|
|
let mut program = Program { statements: vec![] };
|
2019-09-12 19:38:09 +00:00
|
|
|
|
2019-12-25 17:37:39 +00:00
|
|
|
while let Some(token) = self.lexer.next() {
|
2019-12-25 12:25:15 +00:00
|
|
|
if token.name == TokenType::EOF {
|
|
|
|
break;
|
|
|
|
}
|
2020-01-11 17:55:51 +00:00
|
|
|
match Statement::parse(self, token) {
|
2019-12-25 12:25:15 +00:00
|
|
|
Some(v) => program.statements.push(v),
|
2019-12-25 17:37:39 +00:00
|
|
|
None => {} // This will happen in case of a parsing error or something
|
2019-09-07 14:02:44 +00:00
|
|
|
}
|
2019-08-30 19:58:20 +00:00
|
|
|
}
|
2019-09-07 14:02:44 +00:00
|
|
|
|
2019-12-25 12:25:15 +00:00
|
|
|
program
|
2019-09-07 14:02:44 +00:00
|
|
|
}
|
2019-09-12 17:19:57 +00:00
|
|
|
|
2019-12-25 12:25:15 +00:00
|
|
|
fn peek_token_is(&mut self, token: TokenType) -> bool {
|
|
|
|
match self.lexer.peek() {
|
|
|
|
Some(v) => v.name == token,
|
|
|
|
None => false,
|
|
|
|
}
|
2019-09-06 19:46:52 +00:00
|
|
|
}
|
2019-04-14 11:15:40 +00:00
|
|
|
|
2019-12-25 12:25:15 +00:00
|
|
|
// TODO: Remove this. We most likely don't need it anywhere
|
|
|
|
// fn current_token_is(&self, token: TokenType) -> bool {
|
|
|
|
// false
|
|
|
|
// }
|
2019-04-14 11:15:40 +00:00
|
|
|
|
2019-12-25 12:25:15 +00:00
|
|
|
fn expect_peek(&mut self, token: TokenType) -> Option<Token> {
|
|
|
|
if self.peek_token_is(token) {
|
|
|
|
self.lexer.next()
|
|
|
|
} else {
|
2019-12-25 17:37:39 +00:00
|
|
|
let got_token = match self.lexer.peek() {
|
|
|
|
Some(v) => Some(v.name),
|
|
|
|
None => None,
|
|
|
|
};
|
|
|
|
self.peek_error(token, got_token);
|
2019-12-25 12:25:15 +00:00
|
|
|
None
|
2019-09-07 14:02:44 +00:00
|
|
|
}
|
2019-04-14 11:15:40 +00:00
|
|
|
}
|
2019-12-25 17:37:39 +00:00
|
|
|
|
|
|
|
fn peek_error(&mut self, et: TokenType, gt: Option<TokenType>) {
|
|
|
|
let msg = match gt {
|
|
|
|
Some(v) => format!("expected next token to be {:?}, Got {:?} instead", et, v),
|
|
|
|
None => format!("expected next token to be {:?}, Got None instead", et),
|
|
|
|
};
|
|
|
|
self.errors.push(ParserError { reason: msg });
|
|
|
|
}
|
2020-01-11 18:33:18 +00:00
|
|
|
|
|
|
|
fn register_prefix(&mut self, token: TokenType, f: PrefixParseFn) {
|
|
|
|
self.prefix_parse_fns.insert(token, f);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn register_infix(&mut self, token: TokenType, f: InfixParseFn) {
|
|
|
|
self.infix_parse_fns.insert(token, f);
|
|
|
|
}
|
2019-12-25 17:37:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct ParserError {
|
|
|
|
reason: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Display for ParserError {
|
|
|
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FmtError> {
|
|
|
|
write!(fmt, "{}", self.reason)
|
|
|
|
}
|
2019-04-14 11:15:40 +00:00
|
|
|
}
|
2019-09-12 17:19:57 +00:00
|
|
|
|
2019-12-25 12:25:15 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use crate::{
|
2020-01-11 17:55:51 +00:00
|
|
|
lexer::{Lexer, Token, TokenType},
|
2019-12-25 12:25:15 +00:00
|
|
|
parser::{
|
2020-01-11 18:33:18 +00:00
|
|
|
ast::{
|
|
|
|
Expression, ExpressionStatement, Identifier, IntegerLiteral, LetStatement, Program,
|
|
|
|
Statement,
|
|
|
|
},
|
2019-12-25 12:25:15 +00:00
|
|
|
Parser,
|
|
|
|
},
|
|
|
|
};
|
2020-01-11 17:55:51 +00:00
|
|
|
|
|
|
|
fn check_parser_errors(p: &Parser) {
|
|
|
|
if p.errors.is_empty() {
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
let mut out = String::new();
|
|
|
|
out.push_str(&format!("parser has {} errors\n", p.errors.len()));
|
|
|
|
for error in &p.errors {
|
|
|
|
out.push_str(&format!("parser error: {}\n", error));
|
|
|
|
}
|
|
|
|
eprintln!("{}", out);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-25 12:25:15 +00:00
|
|
|
#[test]
|
|
|
|
fn let_statements() {
|
2019-12-25 17:37:39 +00:00
|
|
|
let mut lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
|
|
|
|
let mut parser = Parser::new(lexer);
|
2019-12-25 12:25:15 +00:00
|
|
|
let program = parser.parse_program();
|
2019-12-25 17:37:39 +00:00
|
|
|
check_parser_errors(&parser);
|
2019-12-25 12:25:15 +00:00
|
|
|
assert_eq!(program.statements.len(), 3);
|
|
|
|
assert_eq!(
|
2019-12-25 17:37:39 +00:00
|
|
|
program,
|
|
|
|
Program {
|
|
|
|
statements: vec![
|
|
|
|
Statement::Let(LetStatement {
|
|
|
|
name: Identifier::new(TokenType::Let, "x"),
|
|
|
|
value: None
|
|
|
|
}),
|
|
|
|
Statement::Let(LetStatement {
|
|
|
|
name: Identifier::new(TokenType::Let, "y"),
|
|
|
|
value: None
|
|
|
|
}),
|
|
|
|
Statement::Let(LetStatement {
|
|
|
|
name: Identifier::new(TokenType::Let, "foobar"),
|
|
|
|
value: None
|
|
|
|
})
|
|
|
|
],
|
|
|
|
}
|
2019-12-25 12:25:15 +00:00
|
|
|
);
|
2019-12-25 17:37:39 +00:00
|
|
|
|
2020-01-11 18:33:18 +00:00
|
|
|
lexer = Lexer::new("let x 5; let 10; let 83838383;");
|
2019-12-25 17:37:39 +00:00
|
|
|
parser = Parser::new(lexer);
|
2020-01-11 18:33:18 +00:00
|
|
|
let _program = parser.parse_program();
|
2019-12-25 17:37:39 +00:00
|
|
|
check_parser_errors(&parser);
|
|
|
|
assert_eq!(parser.errors.len(), 3);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn return_statements() {
|
|
|
|
let lexer = Lexer::new("return 5; return 10; return add(10);");
|
|
|
|
let mut parser = Parser::new(lexer);
|
|
|
|
let program = parser.parse_program();
|
|
|
|
|
|
|
|
check_parser_errors(&parser);
|
|
|
|
assert_eq!(program.statements.len(), 3);
|
|
|
|
assert_eq!(parser.errors.len(), 0);
|
|
|
|
}
|
2020-01-11 18:33:18 +00:00
|
|
|
#[test]
|
|
|
|
fn identifier_expression() {
|
|
|
|
let lexer = Lexer::new("foobar;");
|
|
|
|
let mut parser = Parser::new(lexer);
|
|
|
|
let program = parser.parse_program();
|
|
|
|
|
|
|
|
check_parser_errors(&parser);
|
|
|
|
assert_eq!(program.statements.len(), 1);
|
|
|
|
assert_eq!(
|
|
|
|
program.statements,
|
|
|
|
vec![Statement::ExpressionStatement(ExpressionStatement {
|
|
|
|
token: Token::with_value(TokenType::Ident, "foobar"),
|
|
|
|
expression: Expression::Identifier(Identifier::new(TokenType::Ident, "foobar")),
|
|
|
|
})]
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn integer_literal_expression() {
|
|
|
|
let lexer = Lexer::new("5;");
|
|
|
|
let mut parser = Parser::new(lexer);
|
|
|
|
let program = parser.parse_program();
|
|
|
|
check_parser_errors(&parser);
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
program.statements,
|
|
|
|
vec![Statement::ExpressionStatement(ExpressionStatement {
|
|
|
|
token: Token::with_value(TokenType::Int, "5"),
|
|
|
|
expression: Expression::IntegerLiteral(IntegerLiteral::new(TokenType::Int, 5))
|
|
|
|
})]
|
|
|
|
);
|
|
|
|
}
|
2019-09-12 17:19:57 +00:00
|
|
|
}
|