Refactor and added identifier expression parser
1. Refactor Lexer::Token to Lexer::Token + Lexer::TokenType. 2. Added From/TryFrom on Lexer::Literal. 3. Added ExpressionStatement Parser 4. Added From<String> for ParseError. 5. Added prefix_parse_fn and infix_parse_fn table. 6. Added parse_expression 7. Added tests 8. Fixed lint issues reported by Clippy
This commit is contained in:
parent
77ecd6e3dd
commit
2863ad8aa4
320
src/lexer/mod.rs
320
src/lexer/mod.rs
|
@ -1,28 +1,29 @@
|
|||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::iter::Peekable;
|
||||
use std::str::{self, Chars};
|
||||
|
||||
lazy_static! {
|
||||
static ref IDENTMAP: HashMap<&'static str, Token> = {
|
||||
let mut m = HashMap::new();
|
||||
m.insert("fn", Token::Function);
|
||||
m.insert("let", Token::Let);
|
||||
m.insert("true", Token::True);
|
||||
m.insert("false", Token::False);
|
||||
m.insert("return", Token::Return);
|
||||
m.insert("if", Token::If);
|
||||
m.insert("else", Token::Else);
|
||||
m.insert("fn", Token::new(TokenType::Function));
|
||||
m.insert("let", Token::new(TokenType::Let));
|
||||
m.insert("true", Token::new(TokenType::True));
|
||||
m.insert("false", Token::new(TokenType::False));
|
||||
m.insert("return", Token::new(TokenType::Return));
|
||||
m.insert("if", Token::new(TokenType::If));
|
||||
m.insert("else", Token::new(TokenType::Else));
|
||||
m
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub enum TokenType {
|
||||
Illegal,
|
||||
EOF,
|
||||
|
||||
// Identifiers
|
||||
Int(i64),
|
||||
Int,
|
||||
|
||||
// Operators
|
||||
Assign,
|
||||
|
@ -52,7 +53,72 @@ pub enum Token {
|
|||
Else,
|
||||
False,
|
||||
Return,
|
||||
Ident(String),
|
||||
Ident,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub struct Token {
|
||||
pub name: TokenType,
|
||||
pub value: Option<Literal>,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
#[inline]
|
||||
pub fn new(name: TokenType) -> Self {
|
||||
Token { name, value: None }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn with_value(name: TokenType, value: Literal) -> Self {
|
||||
Token {
|
||||
name,
|
||||
value: Some(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
|
||||
pub enum Literal {
|
||||
String(String),
|
||||
Int(i64),
|
||||
}
|
||||
|
||||
impl From<String> for Literal {
|
||||
fn from(s: String) -> Literal {
|
||||
Literal::String(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Literal {
|
||||
fn from(s: &str) -> Literal {
|
||||
Literal::String(s.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Literal {
|
||||
fn from(i: i64) -> Literal {
|
||||
Literal::Int(i)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Literal> for String {
|
||||
type Error = &'static str;
|
||||
fn try_from(l: Literal) -> Result<String, Self::Error> {
|
||||
match l {
|
||||
Literal::String(v) => Ok(v),
|
||||
Literal::Int(_) => Err("can not convert Int to String"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Literal> for i64 {
|
||||
type Error = &'static str;
|
||||
fn try_from(l: Literal) -> Result<i64, Self::Error> {
|
||||
match l {
|
||||
Literal::Int(v) => Ok(v),
|
||||
Literal::String(_) => Err("can not convert String to Int"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -137,21 +203,21 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
};
|
||||
if is_e {
|
||||
self.read_char();
|
||||
Some(Token::Equals)
|
||||
Some(Token::new(TokenType::Equals))
|
||||
} else {
|
||||
Some(Token::Assign)
|
||||
Some(Token::new(TokenType::Assign))
|
||||
}
|
||||
}
|
||||
Some('+') => Some(Token::Plus),
|
||||
Some('*') => Some(Token::Multiply),
|
||||
Some('/') => Some(Token::Divide),
|
||||
Some('-') => Some(Token::Subtract),
|
||||
Some(',') => Some(Token::Comma),
|
||||
Some(';') => Some(Token::Semicolon),
|
||||
Some('(') => Some(Token::LParen),
|
||||
Some(')') => Some(Token::RParen),
|
||||
Some('{') => Some(Token::LBrace),
|
||||
Some('}') => Some(Token::RBrace),
|
||||
Some('+') => Some(Token::new(TokenType::Plus)),
|
||||
Some('*') => Some(Token::new(TokenType::Multiply)),
|
||||
Some('/') => Some(Token::new(TokenType::Divide)),
|
||||
Some('-') => Some(Token::new(TokenType::Subtract)),
|
||||
Some(',') => Some(Token::new(TokenType::Comma)),
|
||||
Some(';') => Some(Token::new(TokenType::Semicolon)),
|
||||
Some('(') => Some(Token::new(TokenType::LParen)),
|
||||
Some(')') => Some(Token::new(TokenType::RParen)),
|
||||
Some('{') => Some(Token::new(TokenType::LBrace)),
|
||||
Some('}') => Some(Token::new(TokenType::RBrace)),
|
||||
Some('!') => {
|
||||
let is_ne = match self.input.peek() {
|
||||
Some(v) if *v == '=' => true,
|
||||
|
@ -159,27 +225,27 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
};
|
||||
if is_ne {
|
||||
self.read_char();
|
||||
Some(Token::NotEquals)
|
||||
Some(Token::new(TokenType::NotEquals))
|
||||
} else {
|
||||
Some(Token::ExclamationMark)
|
||||
Some(Token::new(TokenType::ExclamationMark))
|
||||
}
|
||||
}
|
||||
Some('>') => Some(Token::GreaterThan),
|
||||
Some('<') => Some(Token::LessThan),
|
||||
Some('>') => Some(Token::new(TokenType::GreaterThan)),
|
||||
Some('<') => Some(Token::new(TokenType::LessThan)),
|
||||
Some(ch) if is_letter(ch) => {
|
||||
let ident = self.read_identifier(ch);
|
||||
Some(lookup_ident(&ident))
|
||||
}
|
||||
Some(ch) if ch.is_ascii_digit() => {
|
||||
let number = self.read_number(ch);
|
||||
Some(Token::Int(number))
|
||||
Some(Token::with_value(TokenType::Int, (number as i64).into()))
|
||||
}
|
||||
None if !self.eof_sent => {
|
||||
self.eof_sent = true;
|
||||
Some(Token::EOF)
|
||||
Some(Token::new(TokenType::EOF))
|
||||
}
|
||||
None => None,
|
||||
_ => Some(Token::Illegal),
|
||||
_ => Some(Token::new(TokenType::Illegal)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -191,31 +257,31 @@ fn is_letter(c: char) -> bool {
|
|||
fn lookup_ident(ident: &str) -> Token {
|
||||
match IDENTMAP.get(ident) {
|
||||
Some(v) => v.clone(),
|
||||
None => Token::Ident(ident.to_string()),
|
||||
None => Token::with_value(TokenType::Ident, ident.into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Lexer, Token};
|
||||
use super::{Lexer, Literal, Token, TokenType};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn new_token() {
|
||||
fn new() {
|
||||
let mut tests = HashMap::new();
|
||||
|
||||
tests.insert(
|
||||
"=+(){},;",
|
||||
vec![
|
||||
Token::Assign,
|
||||
Token::Plus,
|
||||
Token::LParen,
|
||||
Token::RParen,
|
||||
Token::LBrace,
|
||||
Token::RBrace,
|
||||
Token::Comma,
|
||||
Token::Semicolon,
|
||||
Token::EOF,
|
||||
Token::new(TokenType::Assign),
|
||||
Token::new(TokenType::Plus),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::EOF),
|
||||
],
|
||||
);
|
||||
tests.insert(
|
||||
|
@ -228,43 +294,43 @@ mod tests {
|
|||
|
||||
let result = add(five, ten);",
|
||||
vec![
|
||||
Token::Let,
|
||||
Token::Ident("five".to_string()),
|
||||
Token::Assign,
|
||||
Token::Int(5),
|
||||
Token::Semicolon,
|
||||
Token::Let,
|
||||
Token::Ident("ten".to_string()),
|
||||
Token::Assign,
|
||||
Token::Int(10),
|
||||
Token::Semicolon,
|
||||
Token::Let,
|
||||
Token::Ident("add".to_string()),
|
||||
Token::Assign,
|
||||
Token::Function,
|
||||
Token::LParen,
|
||||
Token::Ident("x".to_string()),
|
||||
Token::Comma,
|
||||
Token::Ident("y".to_string()),
|
||||
Token::RParen,
|
||||
Token::LBrace,
|
||||
Token::Ident("x".to_string()),
|
||||
Token::Plus,
|
||||
Token::Ident("y".to_string()),
|
||||
Token::Semicolon,
|
||||
Token::RBrace,
|
||||
Token::Semicolon,
|
||||
Token::Let,
|
||||
Token::Ident("result".to_string()),
|
||||
Token::Assign,
|
||||
Token::Ident("add".to_string()),
|
||||
Token::LParen,
|
||||
Token::Ident("five".to_string()),
|
||||
Token::Comma,
|
||||
Token::Ident("ten".to_string()),
|
||||
Token::RParen,
|
||||
Token::Semicolon,
|
||||
Token::EOF,
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "five".into()),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "ten".into()),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "add".into()),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::new(TokenType::Function),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "x".into()),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::with_value(TokenType::Ident, "y".into()),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::with_value(TokenType::Ident, "x".into()),
|
||||
Token::new(TokenType::Plus),
|
||||
Token::with_value(TokenType::Ident, "y".into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "result".into()),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Ident, "add".into()),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "five".into()),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::with_value(TokenType::Ident, "ten".into()),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::EOF),
|
||||
],
|
||||
);
|
||||
tests.insert(
|
||||
|
@ -283,54 +349,54 @@ mod tests {
|
|||
|
||||
",
|
||||
vec![
|
||||
Token::Let,
|
||||
Token::Ident("result".to_string()),
|
||||
Token::Assign,
|
||||
Token::Ident("add".to_string()),
|
||||
Token::LParen,
|
||||
Token::Ident("five".to_string()),
|
||||
Token::Comma,
|
||||
Token::Ident("ten".to_string()),
|
||||
Token::RParen,
|
||||
Token::Semicolon,
|
||||
Token::ExclamationMark,
|
||||
Token::Subtract,
|
||||
Token::Divide,
|
||||
Token::Multiply,
|
||||
Token::Int(5),
|
||||
Token::Semicolon,
|
||||
Token::Int(5),
|
||||
Token::LessThan,
|
||||
Token::Int(10),
|
||||
Token::GreaterThan,
|
||||
Token::Int(5),
|
||||
Token::Semicolon,
|
||||
Token::If,
|
||||
Token::LParen,
|
||||
Token::Int(5),
|
||||
Token::LessThan,
|
||||
Token::Int(10),
|
||||
Token::RParen,
|
||||
Token::LBrace,
|
||||
Token::Return,
|
||||
Token::True,
|
||||
Token::Semicolon,
|
||||
Token::RBrace,
|
||||
Token::Else,
|
||||
Token::LBrace,
|
||||
Token::Return,
|
||||
Token::False,
|
||||
Token::Semicolon,
|
||||
Token::RBrace,
|
||||
Token::Int(10),
|
||||
Token::Equals,
|
||||
Token::Int(10),
|
||||
Token::Semicolon,
|
||||
Token::Int(9),
|
||||
Token::NotEquals,
|
||||
Token::Int(10),
|
||||
Token::Semicolon,
|
||||
Token::EOF,
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "result".into()),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Ident, "add".into()),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "five".into()),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::with_value(TokenType::Ident, "ten".into()),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::ExclamationMark),
|
||||
Token::new(TokenType::Subtract),
|
||||
Token::new(TokenType::Divide),
|
||||
Token::new(TokenType::Multiply),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::new(TokenType::LessThan),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::new(TokenType::GreaterThan),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::If),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::new(TokenType::LessThan),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::new(TokenType::Return),
|
||||
Token::new(TokenType::True),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::new(TokenType::Else),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::new(TokenType::Return),
|
||||
Token::new(TokenType::False),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::new(TokenType::Equals),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::with_value(TokenType::Int, 9.into()),
|
||||
Token::new(TokenType::NotEquals),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::EOF),
|
||||
],
|
||||
);
|
||||
|
||||
|
|
|
@ -1,95 +1,114 @@
|
|||
use crate::{
|
||||
lexer::Token,
|
||||
parser::{ParseError, Parser},
|
||||
lexer::{Literal, Token, TokenType},
|
||||
parser::{ExpressionPriority, ParseError, Parser},
|
||||
};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Statement {
|
||||
Let(Let),
|
||||
|
||||
Return(Return),
|
||||
//ExpressionStatement(ExpressionStatement),
|
||||
ExpressionStatement(ExpressionStatement),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Expression {
|
||||
Ident(Identifier),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Let {
|
||||
name: Identifier,
|
||||
// value: dyn Expression,
|
||||
value: Option<Expression>,
|
||||
}
|
||||
|
||||
impl Let {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(identifier: Identifier) -> Let {
|
||||
Let { name: identifier }
|
||||
pub fn new(name: Identifier, value: Option<Expression>) -> Let {
|
||||
Let { name, value }
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Let, ParseError> {
|
||||
let name;
|
||||
if !parser.expect_peek(Token::new(TokenType::Ident)) {
|
||||
return Err(ParseError::new("expected ident, Couldn't find it"));
|
||||
}
|
||||
|
||||
//TODO: Add expression parser
|
||||
match parser.lexer.next() {
|
||||
Some(v) => match v {
|
||||
Token::Ident(q) => name = Identifier { name: q },
|
||||
n @ _ => {
|
||||
return Err(ParseError::new(&format!("expected IDENT, Found {:?}", n)));
|
||||
}
|
||||
},
|
||||
None => {
|
||||
return Err(ParseError::new(
|
||||
"expected IDENT after let, Could not find it",
|
||||
))
|
||||
}
|
||||
};
|
||||
let literal = String::try_from(parser.current_token.clone().unwrap().value.unwrap())?;
|
||||
let name = Identifier::new(Token::new(TokenType::Let), &literal);
|
||||
|
||||
if !parser.expect_peek(Token::Assign) {
|
||||
if !parser.expect_peek(Token::new(TokenType::Assign)) {
|
||||
return Err(ParseError::new("expected =, Could not find it"));
|
||||
}
|
||||
|
||||
// TODO: Replace this with code to parse expressions correctly
|
||||
while !parser.current_token_is(Token::Semicolon) {
|
||||
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
||||
parser.current_token = parser.lexer.next();
|
||||
}
|
||||
|
||||
Ok(Let { name })
|
||||
Ok(Let::new(name, None))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Expr;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Return {
|
||||
return_value: Expr,
|
||||
return_value: Expression,
|
||||
}
|
||||
|
||||
impl Return {
|
||||
pub fn new() -> Return {
|
||||
Return { return_value: Expr }
|
||||
Return {
|
||||
return_value: Expression::Ident(Identifier::new(
|
||||
Token::new(TokenType::Return),
|
||||
"return",
|
||||
)), //TODO FIX THIS
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Return, ParseError> {
|
||||
while !parser.current_token_is(Token::Semicolon) {
|
||||
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
||||
parser.current_token = parser.lexer.next();
|
||||
}
|
||||
|
||||
Ok(Return { return_value: Expr })
|
||||
Ok(Return::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Identifier {
|
||||
name: String,
|
||||
name: Token,
|
||||
value: Literal,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(name: &str) -> Identifier {
|
||||
pub fn new(token: Token, name: &str) -> Identifier {
|
||||
Identifier {
|
||||
name: name.to_owned(),
|
||||
name: token,
|
||||
value: name.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ExpressionStatement {
|
||||
token: Option<Token>, // The first token in Expression
|
||||
expression: Expression,
|
||||
}
|
||||
|
||||
impl ExpressionStatement {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(token: Option<Token>, expression: Expression) -> Self {
|
||||
ExpressionStatement { token, expression }
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
|
||||
let ct = parser.current_token.clone();
|
||||
|
||||
let expr = parser.parse_expression(ExpressionPriority::Lowest)?;
|
||||
|
||||
let s = Token::new(TokenType::Semicolon);
|
||||
if parser.peek_token_is(&s) {
|
||||
parser.current_token = parser.lexer.next();
|
||||
}
|
||||
|
||||
Ok(ExpressionStatement::new(ct, expr))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,42 +3,93 @@ mod program;
|
|||
|
||||
pub use self::program::Program;
|
||||
|
||||
use self::ast::{Let, Return, Statement};
|
||||
use crate::lexer::{Lexer, Token};
|
||||
use std::iter::Peekable;
|
||||
use self::ast::{Expression, ExpressionStatement, Identifier, Let, Return, Statement};
|
||||
use crate::lexer::{Lexer, Token, TokenType};
|
||||
use std::{collections::HashMap, convert::TryFrom, iter::Peekable};
|
||||
|
||||
type PrefixParseFn = fn(&mut Parser) -> Result<Expression, ParseError>;
|
||||
type InfixParseFn = fn(Expression) -> Result<Expression, ParseError>;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)]
|
||||
enum ExpressionPriority {
|
||||
Lowest,
|
||||
Equals,
|
||||
LessGreater,
|
||||
Sum,
|
||||
Product,
|
||||
Prefix,
|
||||
Call,
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
lexer: Peekable<Lexer<'a>>,
|
||||
current_token: Option<Token>,
|
||||
prefix_parse_fns: HashMap<TokenType, PrefixParseFn>,
|
||||
infix_parse_fns: HashMap<TokenType, InfixParseFn>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
fn new(lexer: Lexer<'a>) -> Parser {
|
||||
Parser {
|
||||
let prefix_parse_fns = HashMap::new();
|
||||
|
||||
let mut parser = Parser {
|
||||
lexer: lexer.peekable(),
|
||||
current_token: None,
|
||||
}
|
||||
infix_parse_fns: HashMap::new(),
|
||||
prefix_parse_fns,
|
||||
};
|
||||
parser.register_prefix_fn(TokenType::Ident, Parser::parse_identifier);
|
||||
parser
|
||||
}
|
||||
fn parse_statement(&mut self, token: Token) -> Result<Statement, ParseError> {
|
||||
|
||||
fn parse_statement(&mut self, token: TokenType) -> Result<Statement, ParseError> {
|
||||
match token {
|
||||
Token::Let => match Let::parse(self) {
|
||||
TokenType::Let => match Let::parse(self) {
|
||||
Ok(v) => Ok(Statement::Let(v)),
|
||||
Err(e) => Err(e), //TODO: Return appropriate error
|
||||
},
|
||||
Token::Return => match Return::parse(self) {
|
||||
TokenType::Return => match Return::parse(self) {
|
||||
Ok(v) => Ok(Statement::Return(v)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
n @ _ => {
|
||||
println!("{:?}", n);
|
||||
unimplemented!();
|
||||
}
|
||||
_ => match ExpressionStatement::parse(self) {
|
||||
Ok(v) => Ok(Statement::ExpressionStatement(v)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expression(&mut self, priority: ExpressionPriority) -> Result<Expression, ParseError> {
|
||||
let current_token = if let Some(token) = &self.current_token {
|
||||
token
|
||||
} else {
|
||||
return Err(ParseError::new(
|
||||
"parser.current_token is None. This *should* not have happened.",
|
||||
));
|
||||
};
|
||||
let prefix = match self.prefix_parse_fns.get(¤t_token.name) {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
return Err(ParseError::new(&format!(
|
||||
"no prefix parse function with token {:?} found in parser",
|
||||
current_token
|
||||
)))
|
||||
}
|
||||
};
|
||||
prefix(self)
|
||||
}
|
||||
|
||||
fn parse_identifier(parser: &mut Parser) -> Result<Expression, ParseError> {
|
||||
let ct = parser.current_token.clone().unwrap();
|
||||
Ok(Expression::Ident(Identifier::new(
|
||||
ct.clone(), // TODO: Correction needed, Can be a source of subtle error in some cases
|
||||
&String::try_from(ct.value.unwrap())?,
|
||||
)))
|
||||
}
|
||||
|
||||
fn expect_peek(&mut self, token: Token) -> bool {
|
||||
match self.lexer.peek() {
|
||||
Some(v) if v == &token => {
|
||||
Some(v) if v.name == token.name => {
|
||||
self.current_token = self.lexer.next();
|
||||
true
|
||||
}
|
||||
|
@ -49,6 +100,18 @@ impl<'a> Parser<'a> {
|
|||
fn current_token_is(&self, token: Token) -> bool {
|
||||
self.current_token == Some(token)
|
||||
}
|
||||
|
||||
fn peek_token_is(&mut self, token: &Token) -> bool {
|
||||
self.lexer.peek() == Some(token)
|
||||
}
|
||||
|
||||
fn register_infix_fn(&mut self, token: TokenType, f: InfixParseFn) {
|
||||
self.infix_parse_fns.insert(token, f);
|
||||
}
|
||||
|
||||
fn register_prefix_fn(&mut self, token: TokenType, f: PrefixParseFn) {
|
||||
self.prefix_parse_fns.insert(token, f);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -75,3 +138,15 @@ impl ParseError {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for ParseError {
|
||||
fn from(desc: String) -> ParseError {
|
||||
ParseError { desc }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for ParseError {
|
||||
fn from(s: &str) -> ParseError {
|
||||
ParseError { desc: s.to_owned() }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use crate::lexer::{Lexer, Token};
|
||||
use crate::lexer::{Lexer, Token, TokenType};
|
||||
use crate::parser::{ast::Statement, Parser};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Program {
|
||||
statements: Vec<Statement>,
|
||||
}
|
||||
|
@ -9,32 +10,29 @@ impl Program {
|
|||
pub fn parse(lexer: Lexer) -> Program {
|
||||
let mut statements = vec![];
|
||||
let mut parser = Parser::new(lexer);
|
||||
loop {
|
||||
if let Some(token) = parser.lexer.next() {
|
||||
parser.current_token = Some(token.clone());
|
||||
if parser.current_token_is(Token::EOF) {
|
||||
break;
|
||||
}
|
||||
|
||||
match parser.parse_statement(token) {
|
||||
Ok(v) => statements.push(v),
|
||||
Err(e) => {
|
||||
println!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
while let Some(token) = parser.lexer.next() {
|
||||
parser.current_token = Some(token.clone());
|
||||
if parser.current_token_is(Token::new(TokenType::EOF)) {
|
||||
break;
|
||||
}
|
||||
|
||||
match parser.parse_statement(token.name) {
|
||||
Ok(v) => statements.push(v),
|
||||
Err(e) => {
|
||||
println!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Program { statements }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexer::Lexer;
|
||||
use crate::parser::ast::{Identifier, Let, Statement};
|
||||
use crate::lexer::{Lexer, Token, TokenType};
|
||||
use crate::parser::ast::{Expression, ExpressionStatement, Identifier, Let, Statement};
|
||||
use crate::parser::Program;
|
||||
|
||||
#[test]
|
||||
|
@ -45,19 +43,39 @@ mod tests {
|
|||
let foobar = 8388383;
|
||||
";
|
||||
|
||||
let expected_out = vec![
|
||||
Statement::Let(Let::new(Identifier::new("yr"))),
|
||||
Statement::Let(Let::new(Identifier::new("qq"))),
|
||||
Statement::Let(Let::new(Identifier::new("foobar"))),
|
||||
];
|
||||
let expected_out = Program {
|
||||
statements: vec![
|
||||
Statement::Let(Let::new(
|
||||
Identifier::new(Token::new(TokenType::Let), "yr"),
|
||||
None
|
||||
// Some(Expression::Ident(Identifier::new(
|
||||
// Token::new(TokenType::Let),
|
||||
// "5",
|
||||
// ))),
|
||||
)),
|
||||
Statement::Let(Let::new(
|
||||
Identifier::new(Token::new(TokenType::Let), "qq"),
|
||||
None
|
||||
// Some(Expression::Ident(Identifier::new(
|
||||
// Token::new(TokenType::Let),
|
||||
// "10",
|
||||
// ))),
|
||||
)),
|
||||
Statement::Let(Let::new(
|
||||
Identifier::new(Token::new(TokenType::Let), "foobar"),
|
||||
None
|
||||
// Some(Expression::Ident(Identifier::new(
|
||||
// Token::new(TokenType::Let),
|
||||
// "8388383",
|
||||
// ))),
|
||||
)),
|
||||
],
|
||||
};
|
||||
let lexer = Lexer::new(ip);
|
||||
let as_tree = Program::parse(lexer);
|
||||
|
||||
assert_eq!(as_tree.statements.len(), 3);
|
||||
|
||||
for (out, expected_out) in as_tree.statements.into_iter().zip(expected_out.into_iter()) {
|
||||
assert_eq!(out, expected_out);
|
||||
}
|
||||
assert_eq!(as_tree, expected_out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -72,4 +90,26 @@ mod tests {
|
|||
let as_tree = Program::parse(lexer);
|
||||
assert_eq!(as_tree.statements.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifier_expression() {
|
||||
let ip = "
|
||||
foobar;
|
||||
";
|
||||
let lexer = Lexer::new(ip);
|
||||
let as_tree = Program::parse(lexer);
|
||||
let expected_out = Program {
|
||||
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
|
||||
Some(Token::with_value(TokenType::Ident, "foobar".into())),
|
||||
Expression::Ident(Identifier::new(
|
||||
Token::with_value(TokenType::Ident, "foobar".into()),
|
||||
"foobar",
|
||||
)),
|
||||
))],
|
||||
};
|
||||
|
||||
println!("{:?}", as_tree);
|
||||
assert_eq!(as_tree.statements.len(), 1);
|
||||
assert_eq!(as_tree, expected_out);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::{lexer::Lexer, parser::Program};
|
||||
use std::io::{self, BufRead, Write};
|
||||
|
||||
const PROMPT: &'static str = ">> ";
|
||||
const PROMPT: &str = ">> ";
|
||||
|
||||
pub fn init() {
|
||||
let stdin = io::stdin();
|
||||
|
@ -14,7 +14,7 @@ pub fn init() {
|
|||
|
||||
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
||||
loop {
|
||||
out.write(PROMPT.as_bytes()).unwrap();
|
||||
out.write_all(PROMPT.as_bytes()).unwrap();
|
||||
out.flush().unwrap();
|
||||
let mut s = String::new();
|
||||
ip.read_line(&mut s).unwrap();
|
||||
|
@ -24,6 +24,7 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
|||
println!("{:?}", token);
|
||||
}
|
||||
|
||||
let _parser = Program::parse(tokens);
|
||||
let parser = Program::parse(tokens);
|
||||
println!("parser={:?}", parser);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user