Refactor and added identifier expression parser
1. Refactor Lexer::Token to Lexer::Token + Lexer::TokenType. 2. Added From/TryFrom on Lexer::Literal. 3. Added ExpressionStatement Parser 4. Added From<String> for ParseError. 5. Added prefix_parse_fn and infix_parse_fn table. 6. Added parse_expression 7. Added tests 8. Fixed lint issues reported by Clippy
This commit is contained in:
parent
77ecd6e3dd
commit
2863ad8aa4
320
src/lexer/mod.rs
320
src/lexer/mod.rs
|
@ -1,28 +1,29 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::convert::TryFrom;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
use std::str::{self, Chars};
|
use std::str::{self, Chars};
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref IDENTMAP: HashMap<&'static str, Token> = {
|
static ref IDENTMAP: HashMap<&'static str, Token> = {
|
||||||
let mut m = HashMap::new();
|
let mut m = HashMap::new();
|
||||||
m.insert("fn", Token::Function);
|
m.insert("fn", Token::new(TokenType::Function));
|
||||||
m.insert("let", Token::Let);
|
m.insert("let", Token::new(TokenType::Let));
|
||||||
m.insert("true", Token::True);
|
m.insert("true", Token::new(TokenType::True));
|
||||||
m.insert("false", Token::False);
|
m.insert("false", Token::new(TokenType::False));
|
||||||
m.insert("return", Token::Return);
|
m.insert("return", Token::new(TokenType::Return));
|
||||||
m.insert("if", Token::If);
|
m.insert("if", Token::new(TokenType::If));
|
||||||
m.insert("else", Token::Else);
|
m.insert("else", Token::new(TokenType::Else));
|
||||||
m
|
m
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||||
pub enum Token {
|
pub enum TokenType {
|
||||||
Illegal,
|
Illegal,
|
||||||
EOF,
|
EOF,
|
||||||
|
|
||||||
// Identifiers
|
// Identifiers
|
||||||
Int(i64),
|
Int,
|
||||||
|
|
||||||
// Operators
|
// Operators
|
||||||
Assign,
|
Assign,
|
||||||
|
@ -52,7 +53,72 @@ pub enum Token {
|
||||||
Else,
|
Else,
|
||||||
False,
|
False,
|
||||||
Return,
|
Return,
|
||||||
Ident(String),
|
Ident,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||||
|
pub struct Token {
|
||||||
|
pub name: TokenType,
|
||||||
|
pub value: Option<Literal>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
#[inline]
|
||||||
|
pub fn new(name: TokenType) -> Self {
|
||||||
|
Token { name, value: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn with_value(name: TokenType, value: Literal) -> Self {
|
||||||
|
Token {
|
||||||
|
name,
|
||||||
|
value: Some(value),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
|
||||||
|
pub enum Literal {
|
||||||
|
String(String),
|
||||||
|
Int(i64),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for Literal {
|
||||||
|
fn from(s: String) -> Literal {
|
||||||
|
Literal::String(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for Literal {
|
||||||
|
fn from(s: &str) -> Literal {
|
||||||
|
Literal::String(s.to_owned())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<i64> for Literal {
|
||||||
|
fn from(i: i64) -> Literal {
|
||||||
|
Literal::Int(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Literal> for String {
|
||||||
|
type Error = &'static str;
|
||||||
|
fn try_from(l: Literal) -> Result<String, Self::Error> {
|
||||||
|
match l {
|
||||||
|
Literal::String(v) => Ok(v),
|
||||||
|
Literal::Int(_) => Err("can not convert Int to String"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Literal> for i64 {
|
||||||
|
type Error = &'static str;
|
||||||
|
fn try_from(l: Literal) -> Result<i64, Self::Error> {
|
||||||
|
match l {
|
||||||
|
Literal::Int(v) => Ok(v),
|
||||||
|
Literal::String(_) => Err("can not convert String to Int"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -137,21 +203,21 @@ impl<'a> Iterator for Lexer<'a> {
|
||||||
};
|
};
|
||||||
if is_e {
|
if is_e {
|
||||||
self.read_char();
|
self.read_char();
|
||||||
Some(Token::Equals)
|
Some(Token::new(TokenType::Equals))
|
||||||
} else {
|
} else {
|
||||||
Some(Token::Assign)
|
Some(Token::new(TokenType::Assign))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some('+') => Some(Token::Plus),
|
Some('+') => Some(Token::new(TokenType::Plus)),
|
||||||
Some('*') => Some(Token::Multiply),
|
Some('*') => Some(Token::new(TokenType::Multiply)),
|
||||||
Some('/') => Some(Token::Divide),
|
Some('/') => Some(Token::new(TokenType::Divide)),
|
||||||
Some('-') => Some(Token::Subtract),
|
Some('-') => Some(Token::new(TokenType::Subtract)),
|
||||||
Some(',') => Some(Token::Comma),
|
Some(',') => Some(Token::new(TokenType::Comma)),
|
||||||
Some(';') => Some(Token::Semicolon),
|
Some(';') => Some(Token::new(TokenType::Semicolon)),
|
||||||
Some('(') => Some(Token::LParen),
|
Some('(') => Some(Token::new(TokenType::LParen)),
|
||||||
Some(')') => Some(Token::RParen),
|
Some(')') => Some(Token::new(TokenType::RParen)),
|
||||||
Some('{') => Some(Token::LBrace),
|
Some('{') => Some(Token::new(TokenType::LBrace)),
|
||||||
Some('}') => Some(Token::RBrace),
|
Some('}') => Some(Token::new(TokenType::RBrace)),
|
||||||
Some('!') => {
|
Some('!') => {
|
||||||
let is_ne = match self.input.peek() {
|
let is_ne = match self.input.peek() {
|
||||||
Some(v) if *v == '=' => true,
|
Some(v) if *v == '=' => true,
|
||||||
|
@ -159,27 +225,27 @@ impl<'a> Iterator for Lexer<'a> {
|
||||||
};
|
};
|
||||||
if is_ne {
|
if is_ne {
|
||||||
self.read_char();
|
self.read_char();
|
||||||
Some(Token::NotEquals)
|
Some(Token::new(TokenType::NotEquals))
|
||||||
} else {
|
} else {
|
||||||
Some(Token::ExclamationMark)
|
Some(Token::new(TokenType::ExclamationMark))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some('>') => Some(Token::GreaterThan),
|
Some('>') => Some(Token::new(TokenType::GreaterThan)),
|
||||||
Some('<') => Some(Token::LessThan),
|
Some('<') => Some(Token::new(TokenType::LessThan)),
|
||||||
Some(ch) if is_letter(ch) => {
|
Some(ch) if is_letter(ch) => {
|
||||||
let ident = self.read_identifier(ch);
|
let ident = self.read_identifier(ch);
|
||||||
Some(lookup_ident(&ident))
|
Some(lookup_ident(&ident))
|
||||||
}
|
}
|
||||||
Some(ch) if ch.is_ascii_digit() => {
|
Some(ch) if ch.is_ascii_digit() => {
|
||||||
let number = self.read_number(ch);
|
let number = self.read_number(ch);
|
||||||
Some(Token::Int(number))
|
Some(Token::with_value(TokenType::Int, (number as i64).into()))
|
||||||
}
|
}
|
||||||
None if !self.eof_sent => {
|
None if !self.eof_sent => {
|
||||||
self.eof_sent = true;
|
self.eof_sent = true;
|
||||||
Some(Token::EOF)
|
Some(Token::new(TokenType::EOF))
|
||||||
}
|
}
|
||||||
None => None,
|
None => None,
|
||||||
_ => Some(Token::Illegal),
|
_ => Some(Token::new(TokenType::Illegal)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,31 +257,31 @@ fn is_letter(c: char) -> bool {
|
||||||
fn lookup_ident(ident: &str) -> Token {
|
fn lookup_ident(ident: &str) -> Token {
|
||||||
match IDENTMAP.get(ident) {
|
match IDENTMAP.get(ident) {
|
||||||
Some(v) => v.clone(),
|
Some(v) => v.clone(),
|
||||||
None => Token::Ident(ident.to_string()),
|
None => Token::with_value(TokenType::Ident, ident.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{Lexer, Token};
|
use super::{Lexer, Literal, Token, TokenType};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn new_token() {
|
fn new() {
|
||||||
let mut tests = HashMap::new();
|
let mut tests = HashMap::new();
|
||||||
|
|
||||||
tests.insert(
|
tests.insert(
|
||||||
"=+(){},;",
|
"=+(){},;",
|
||||||
vec![
|
vec![
|
||||||
Token::Assign,
|
Token::new(TokenType::Assign),
|
||||||
Token::Plus,
|
Token::new(TokenType::Plus),
|
||||||
Token::LParen,
|
Token::new(TokenType::LParen),
|
||||||
Token::RParen,
|
Token::new(TokenType::RParen),
|
||||||
Token::LBrace,
|
Token::new(TokenType::LBrace),
|
||||||
Token::RBrace,
|
Token::new(TokenType::RBrace),
|
||||||
Token::Comma,
|
Token::new(TokenType::Comma),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::EOF,
|
Token::new(TokenType::EOF),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
tests.insert(
|
tests.insert(
|
||||||
|
@ -228,43 +294,43 @@ mod tests {
|
||||||
|
|
||||||
let result = add(five, ten);",
|
let result = add(five, ten);",
|
||||||
vec![
|
vec![
|
||||||
Token::Let,
|
Token::new(TokenType::Let),
|
||||||
Token::Ident("five".to_string()),
|
Token::with_value(TokenType::Ident, "five".into()),
|
||||||
Token::Assign,
|
Token::new(TokenType::Assign),
|
||||||
Token::Int(5),
|
Token::with_value(TokenType::Int, 5.into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::Let,
|
Token::new(TokenType::Let),
|
||||||
Token::Ident("ten".to_string()),
|
Token::with_value(TokenType::Ident, "ten".into()),
|
||||||
Token::Assign,
|
Token::new(TokenType::Assign),
|
||||||
Token::Int(10),
|
Token::with_value(TokenType::Int, 10.into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::Let,
|
Token::new(TokenType::Let),
|
||||||
Token::Ident("add".to_string()),
|
Token::with_value(TokenType::Ident, "add".into()),
|
||||||
Token::Assign,
|
Token::new(TokenType::Assign),
|
||||||
Token::Function,
|
Token::new(TokenType::Function),
|
||||||
Token::LParen,
|
Token::new(TokenType::LParen),
|
||||||
Token::Ident("x".to_string()),
|
Token::with_value(TokenType::Ident, "x".into()),
|
||||||
Token::Comma,
|
Token::new(TokenType::Comma),
|
||||||
Token::Ident("y".to_string()),
|
Token::with_value(TokenType::Ident, "y".into()),
|
||||||
Token::RParen,
|
Token::new(TokenType::RParen),
|
||||||
Token::LBrace,
|
Token::new(TokenType::LBrace),
|
||||||
Token::Ident("x".to_string()),
|
Token::with_value(TokenType::Ident, "x".into()),
|
||||||
Token::Plus,
|
Token::new(TokenType::Plus),
|
||||||
Token::Ident("y".to_string()),
|
Token::with_value(TokenType::Ident, "y".into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::RBrace,
|
Token::new(TokenType::RBrace),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::Let,
|
Token::new(TokenType::Let),
|
||||||
Token::Ident("result".to_string()),
|
Token::with_value(TokenType::Ident, "result".into()),
|
||||||
Token::Assign,
|
Token::new(TokenType::Assign),
|
||||||
Token::Ident("add".to_string()),
|
Token::with_value(TokenType::Ident, "add".into()),
|
||||||
Token::LParen,
|
Token::new(TokenType::LParen),
|
||||||
Token::Ident("five".to_string()),
|
Token::with_value(TokenType::Ident, "five".into()),
|
||||||
Token::Comma,
|
Token::new(TokenType::Comma),
|
||||||
Token::Ident("ten".to_string()),
|
Token::with_value(TokenType::Ident, "ten".into()),
|
||||||
Token::RParen,
|
Token::new(TokenType::RParen),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::EOF,
|
Token::new(TokenType::EOF),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
tests.insert(
|
tests.insert(
|
||||||
|
@ -283,54 +349,54 @@ mod tests {
|
||||||
|
|
||||||
",
|
",
|
||||||
vec![
|
vec![
|
||||||
Token::Let,
|
Token::new(TokenType::Let),
|
||||||
Token::Ident("result".to_string()),
|
Token::with_value(TokenType::Ident, "result".into()),
|
||||||
Token::Assign,
|
Token::new(TokenType::Assign),
|
||||||
Token::Ident("add".to_string()),
|
Token::with_value(TokenType::Ident, "add".into()),
|
||||||
Token::LParen,
|
Token::new(TokenType::LParen),
|
||||||
Token::Ident("five".to_string()),
|
Token::with_value(TokenType::Ident, "five".into()),
|
||||||
Token::Comma,
|
Token::new(TokenType::Comma),
|
||||||
Token::Ident("ten".to_string()),
|
Token::with_value(TokenType::Ident, "ten".into()),
|
||||||
Token::RParen,
|
Token::new(TokenType::RParen),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::ExclamationMark,
|
Token::new(TokenType::ExclamationMark),
|
||||||
Token::Subtract,
|
Token::new(TokenType::Subtract),
|
||||||
Token::Divide,
|
Token::new(TokenType::Divide),
|
||||||
Token::Multiply,
|
Token::new(TokenType::Multiply),
|
||||||
Token::Int(5),
|
Token::with_value(TokenType::Int, 5.into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::Int(5),
|
Token::with_value(TokenType::Int, 5.into()),
|
||||||
Token::LessThan,
|
Token::new(TokenType::LessThan),
|
||||||
Token::Int(10),
|
Token::with_value(TokenType::Int, 10.into()),
|
||||||
Token::GreaterThan,
|
Token::new(TokenType::GreaterThan),
|
||||||
Token::Int(5),
|
Token::with_value(TokenType::Int, 5.into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::If,
|
Token::new(TokenType::If),
|
||||||
Token::LParen,
|
Token::new(TokenType::LParen),
|
||||||
Token::Int(5),
|
Token::with_value(TokenType::Int, 5.into()),
|
||||||
Token::LessThan,
|
Token::new(TokenType::LessThan),
|
||||||
Token::Int(10),
|
Token::with_value(TokenType::Int, 10.into()),
|
||||||
Token::RParen,
|
Token::new(TokenType::RParen),
|
||||||
Token::LBrace,
|
Token::new(TokenType::LBrace),
|
||||||
Token::Return,
|
Token::new(TokenType::Return),
|
||||||
Token::True,
|
Token::new(TokenType::True),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::RBrace,
|
Token::new(TokenType::RBrace),
|
||||||
Token::Else,
|
Token::new(TokenType::Else),
|
||||||
Token::LBrace,
|
Token::new(TokenType::LBrace),
|
||||||
Token::Return,
|
Token::new(TokenType::Return),
|
||||||
Token::False,
|
Token::new(TokenType::False),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::RBrace,
|
Token::new(TokenType::RBrace),
|
||||||
Token::Int(10),
|
Token::with_value(TokenType::Int, 10.into()),
|
||||||
Token::Equals,
|
Token::new(TokenType::Equals),
|
||||||
Token::Int(10),
|
Token::with_value(TokenType::Int, 10.into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::Int(9),
|
Token::with_value(TokenType::Int, 9.into()),
|
||||||
Token::NotEquals,
|
Token::new(TokenType::NotEquals),
|
||||||
Token::Int(10),
|
Token::with_value(TokenType::Int, 10.into()),
|
||||||
Token::Semicolon,
|
Token::new(TokenType::Semicolon),
|
||||||
Token::EOF,
|
Token::new(TokenType::EOF),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -1,95 +1,114 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
lexer::Token,
|
lexer::{Literal, Token, TokenType},
|
||||||
parser::{ParseError, Parser},
|
parser::{ExpressionPriority, ParseError, Parser},
|
||||||
};
|
};
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Statement {
|
pub enum Statement {
|
||||||
Let(Let),
|
Let(Let),
|
||||||
|
|
||||||
Return(Return),
|
Return(Return),
|
||||||
//ExpressionStatement(ExpressionStatement),
|
ExpressionStatement(ExpressionStatement),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
Ident(Identifier),
|
Ident(Identifier),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Let {
|
pub struct Let {
|
||||||
name: Identifier,
|
name: Identifier,
|
||||||
// value: dyn Expression,
|
value: Option<Expression>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Let {
|
impl Let {
|
||||||
#[allow(dead_code)]
|
pub fn new(name: Identifier, value: Option<Expression>) -> Let {
|
||||||
pub fn new(identifier: Identifier) -> Let {
|
Let { name, value }
|
||||||
Let { name: identifier }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(parser: &mut Parser) -> Result<Let, ParseError> {
|
pub fn parse(parser: &mut Parser) -> Result<Let, ParseError> {
|
||||||
let name;
|
if !parser.expect_peek(Token::new(TokenType::Ident)) {
|
||||||
|
return Err(ParseError::new("expected ident, Couldn't find it"));
|
||||||
|
}
|
||||||
|
|
||||||
//TODO: Add expression parser
|
let literal = String::try_from(parser.current_token.clone().unwrap().value.unwrap())?;
|
||||||
match parser.lexer.next() {
|
let name = Identifier::new(Token::new(TokenType::Let), &literal);
|
||||||
Some(v) => match v {
|
|
||||||
Token::Ident(q) => name = Identifier { name: q },
|
|
||||||
n @ _ => {
|
|
||||||
return Err(ParseError::new(&format!("expected IDENT, Found {:?}", n)));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
return Err(ParseError::new(
|
|
||||||
"expected IDENT after let, Could not find it",
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if !parser.expect_peek(Token::Assign) {
|
if !parser.expect_peek(Token::new(TokenType::Assign)) {
|
||||||
return Err(ParseError::new("expected =, Could not find it"));
|
return Err(ParseError::new("expected =, Could not find it"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Replace this with code to parse expressions correctly
|
// TODO: Replace this with code to parse expressions correctly
|
||||||
while !parser.current_token_is(Token::Semicolon) {
|
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
||||||
parser.current_token = parser.lexer.next();
|
parser.current_token = parser.lexer.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Let { name })
|
Ok(Let::new(name, None))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct Expr;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Return {
|
pub struct Return {
|
||||||
return_value: Expr,
|
return_value: Expression,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Return {
|
impl Return {
|
||||||
pub fn new() -> Return {
|
pub fn new() -> Return {
|
||||||
Return { return_value: Expr }
|
Return {
|
||||||
|
return_value: Expression::Ident(Identifier::new(
|
||||||
|
Token::new(TokenType::Return),
|
||||||
|
"return",
|
||||||
|
)), //TODO FIX THIS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(parser: &mut Parser) -> Result<Return, ParseError> {
|
pub fn parse(parser: &mut Parser) -> Result<Return, ParseError> {
|
||||||
while !parser.current_token_is(Token::Semicolon) {
|
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
||||||
parser.current_token = parser.lexer.next();
|
parser.current_token = parser.lexer.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Return { return_value: Expr })
|
Ok(Return::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Identifier {
|
pub struct Identifier {
|
||||||
name: String,
|
name: Token,
|
||||||
|
value: Literal,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Identifier {
|
impl Identifier {
|
||||||
#[allow(dead_code)]
|
pub fn new(token: Token, name: &str) -> Identifier {
|
||||||
pub fn new(name: &str) -> Identifier {
|
|
||||||
Identifier {
|
Identifier {
|
||||||
name: name.to_owned(),
|
name: token,
|
||||||
|
value: name.into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct ExpressionStatement {
|
||||||
|
token: Option<Token>, // The first token in Expression
|
||||||
|
expression: Expression,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExpressionStatement {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn new(token: Option<Token>, expression: Expression) -> Self {
|
||||||
|
ExpressionStatement { token, expression }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
|
||||||
|
let ct = parser.current_token.clone();
|
||||||
|
|
||||||
|
let expr = parser.parse_expression(ExpressionPriority::Lowest)?;
|
||||||
|
|
||||||
|
let s = Token::new(TokenType::Semicolon);
|
||||||
|
if parser.peek_token_is(&s) {
|
||||||
|
parser.current_token = parser.lexer.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ExpressionStatement::new(ct, expr))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -3,42 +3,93 @@ mod program;
|
||||||
|
|
||||||
pub use self::program::Program;
|
pub use self::program::Program;
|
||||||
|
|
||||||
use self::ast::{Let, Return, Statement};
|
use self::ast::{Expression, ExpressionStatement, Identifier, Let, Return, Statement};
|
||||||
use crate::lexer::{Lexer, Token};
|
use crate::lexer::{Lexer, Token, TokenType};
|
||||||
use std::iter::Peekable;
|
use std::{collections::HashMap, convert::TryFrom, iter::Peekable};
|
||||||
|
|
||||||
|
type PrefixParseFn = fn(&mut Parser) -> Result<Expression, ParseError>;
|
||||||
|
type InfixParseFn = fn(Expression) -> Result<Expression, ParseError>;
|
||||||
|
|
||||||
|
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)]
|
||||||
|
enum ExpressionPriority {
|
||||||
|
Lowest,
|
||||||
|
Equals,
|
||||||
|
LessGreater,
|
||||||
|
Sum,
|
||||||
|
Product,
|
||||||
|
Prefix,
|
||||||
|
Call,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Parser<'a> {
|
pub struct Parser<'a> {
|
||||||
lexer: Peekable<Lexer<'a>>,
|
lexer: Peekable<Lexer<'a>>,
|
||||||
current_token: Option<Token>,
|
current_token: Option<Token>,
|
||||||
|
prefix_parse_fns: HashMap<TokenType, PrefixParseFn>,
|
||||||
|
infix_parse_fns: HashMap<TokenType, InfixParseFn>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Parser<'a> {
|
impl<'a> Parser<'a> {
|
||||||
fn new(lexer: Lexer<'a>) -> Parser {
|
fn new(lexer: Lexer<'a>) -> Parser {
|
||||||
Parser {
|
let prefix_parse_fns = HashMap::new();
|
||||||
|
|
||||||
|
let mut parser = Parser {
|
||||||
lexer: lexer.peekable(),
|
lexer: lexer.peekable(),
|
||||||
current_token: None,
|
current_token: None,
|
||||||
}
|
infix_parse_fns: HashMap::new(),
|
||||||
|
prefix_parse_fns,
|
||||||
|
};
|
||||||
|
parser.register_prefix_fn(TokenType::Ident, Parser::parse_identifier);
|
||||||
|
parser
|
||||||
}
|
}
|
||||||
fn parse_statement(&mut self, token: Token) -> Result<Statement, ParseError> {
|
|
||||||
|
fn parse_statement(&mut self, token: TokenType) -> Result<Statement, ParseError> {
|
||||||
match token {
|
match token {
|
||||||
Token::Let => match Let::parse(self) {
|
TokenType::Let => match Let::parse(self) {
|
||||||
Ok(v) => Ok(Statement::Let(v)),
|
Ok(v) => Ok(Statement::Let(v)),
|
||||||
Err(e) => Err(e), //TODO: Return appropriate error
|
Err(e) => Err(e), //TODO: Return appropriate error
|
||||||
},
|
},
|
||||||
Token::Return => match Return::parse(self) {
|
TokenType::Return => match Return::parse(self) {
|
||||||
Ok(v) => Ok(Statement::Return(v)),
|
Ok(v) => Ok(Statement::Return(v)),
|
||||||
Err(e) => Err(e),
|
Err(e) => Err(e),
|
||||||
},
|
},
|
||||||
n @ _ => {
|
_ => match ExpressionStatement::parse(self) {
|
||||||
println!("{:?}", n);
|
Ok(v) => Ok(Statement::ExpressionStatement(v)),
|
||||||
unimplemented!();
|
Err(e) => Err(e),
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_expression(&mut self, priority: ExpressionPriority) -> Result<Expression, ParseError> {
|
||||||
|
let current_token = if let Some(token) = &self.current_token {
|
||||||
|
token
|
||||||
|
} else {
|
||||||
|
return Err(ParseError::new(
|
||||||
|
"parser.current_token is None. This *should* not have happened.",
|
||||||
|
));
|
||||||
|
};
|
||||||
|
let prefix = match self.prefix_parse_fns.get(¤t_token.name) {
|
||||||
|
Some(v) => v,
|
||||||
|
None => {
|
||||||
|
return Err(ParseError::new(&format!(
|
||||||
|
"no prefix parse function with token {:?} found in parser",
|
||||||
|
current_token
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
prefix(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_identifier(parser: &mut Parser) -> Result<Expression, ParseError> {
|
||||||
|
let ct = parser.current_token.clone().unwrap();
|
||||||
|
Ok(Expression::Ident(Identifier::new(
|
||||||
|
ct.clone(), // TODO: Correction needed, Can be a source of subtle error in some cases
|
||||||
|
&String::try_from(ct.value.unwrap())?,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
fn expect_peek(&mut self, token: Token) -> bool {
|
fn expect_peek(&mut self, token: Token) -> bool {
|
||||||
match self.lexer.peek() {
|
match self.lexer.peek() {
|
||||||
Some(v) if v == &token => {
|
Some(v) if v.name == token.name => {
|
||||||
self.current_token = self.lexer.next();
|
self.current_token = self.lexer.next();
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
@ -49,6 +100,18 @@ impl<'a> Parser<'a> {
|
||||||
fn current_token_is(&self, token: Token) -> bool {
|
fn current_token_is(&self, token: Token) -> bool {
|
||||||
self.current_token == Some(token)
|
self.current_token == Some(token)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn peek_token_is(&mut self, token: &Token) -> bool {
|
||||||
|
self.lexer.peek() == Some(token)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn register_infix_fn(&mut self, token: TokenType, f: InfixParseFn) {
|
||||||
|
self.infix_parse_fns.insert(token, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn register_prefix_fn(&mut self, token: TokenType, f: PrefixParseFn) {
|
||||||
|
self.prefix_parse_fns.insert(token, f);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -75,3 +138,15 @@ impl ParseError {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<String> for ParseError {
|
||||||
|
fn from(desc: String) -> ParseError {
|
||||||
|
ParseError { desc }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for ParseError {
|
||||||
|
fn from(s: &str) -> ParseError {
|
||||||
|
ParseError { desc: s.to_owned() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use crate::lexer::{Lexer, Token};
|
use crate::lexer::{Lexer, Token, TokenType};
|
||||||
use crate::parser::{ast::Statement, Parser};
|
use crate::parser::{ast::Statement, Parser};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Program {
|
pub struct Program {
|
||||||
statements: Vec<Statement>,
|
statements: Vec<Statement>,
|
||||||
}
|
}
|
||||||
|
@ -9,32 +10,29 @@ impl Program {
|
||||||
pub fn parse(lexer: Lexer) -> Program {
|
pub fn parse(lexer: Lexer) -> Program {
|
||||||
let mut statements = vec![];
|
let mut statements = vec![];
|
||||||
let mut parser = Parser::new(lexer);
|
let mut parser = Parser::new(lexer);
|
||||||
loop {
|
while let Some(token) = parser.lexer.next() {
|
||||||
if let Some(token) = parser.lexer.next() {
|
parser.current_token = Some(token.clone());
|
||||||
parser.current_token = Some(token.clone());
|
if parser.current_token_is(Token::new(TokenType::EOF)) {
|
||||||
if parser.current_token_is(Token::EOF) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
match parser.parse_statement(token) {
|
|
||||||
Ok(v) => statements.push(v),
|
|
||||||
Err(e) => {
|
|
||||||
println!("{:?}", e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match parser.parse_statement(token.name) {
|
||||||
|
Ok(v) => statements.push(v),
|
||||||
|
Err(e) => {
|
||||||
|
println!("{:?}", e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Program { statements }
|
Program { statements }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::lexer::Lexer;
|
use crate::lexer::{Lexer, Token, TokenType};
|
||||||
use crate::parser::ast::{Identifier, Let, Statement};
|
use crate::parser::ast::{Expression, ExpressionStatement, Identifier, Let, Statement};
|
||||||
use crate::parser::Program;
|
use crate::parser::Program;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -45,19 +43,39 @@ mod tests {
|
||||||
let foobar = 8388383;
|
let foobar = 8388383;
|
||||||
";
|
";
|
||||||
|
|
||||||
let expected_out = vec![
|
let expected_out = Program {
|
||||||
Statement::Let(Let::new(Identifier::new("yr"))),
|
statements: vec![
|
||||||
Statement::Let(Let::new(Identifier::new("qq"))),
|
Statement::Let(Let::new(
|
||||||
Statement::Let(Let::new(Identifier::new("foobar"))),
|
Identifier::new(Token::new(TokenType::Let), "yr"),
|
||||||
];
|
None
|
||||||
|
// Some(Expression::Ident(Identifier::new(
|
||||||
|
// Token::new(TokenType::Let),
|
||||||
|
// "5",
|
||||||
|
// ))),
|
||||||
|
)),
|
||||||
|
Statement::Let(Let::new(
|
||||||
|
Identifier::new(Token::new(TokenType::Let), "qq"),
|
||||||
|
None
|
||||||
|
// Some(Expression::Ident(Identifier::new(
|
||||||
|
// Token::new(TokenType::Let),
|
||||||
|
// "10",
|
||||||
|
// ))),
|
||||||
|
)),
|
||||||
|
Statement::Let(Let::new(
|
||||||
|
Identifier::new(Token::new(TokenType::Let), "foobar"),
|
||||||
|
None
|
||||||
|
// Some(Expression::Ident(Identifier::new(
|
||||||
|
// Token::new(TokenType::Let),
|
||||||
|
// "8388383",
|
||||||
|
// ))),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
};
|
||||||
let lexer = Lexer::new(ip);
|
let lexer = Lexer::new(ip);
|
||||||
let as_tree = Program::parse(lexer);
|
let as_tree = Program::parse(lexer);
|
||||||
|
|
||||||
assert_eq!(as_tree.statements.len(), 3);
|
assert_eq!(as_tree.statements.len(), 3);
|
||||||
|
assert_eq!(as_tree, expected_out);
|
||||||
for (out, expected_out) in as_tree.statements.into_iter().zip(expected_out.into_iter()) {
|
|
||||||
assert_eq!(out, expected_out);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -72,4 +90,26 @@ mod tests {
|
||||||
let as_tree = Program::parse(lexer);
|
let as_tree = Program::parse(lexer);
|
||||||
assert_eq!(as_tree.statements.len(), 3);
|
assert_eq!(as_tree.statements.len(), 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn identifier_expression() {
|
||||||
|
let ip = "
|
||||||
|
foobar;
|
||||||
|
";
|
||||||
|
let lexer = Lexer::new(ip);
|
||||||
|
let as_tree = Program::parse(lexer);
|
||||||
|
let expected_out = Program {
|
||||||
|
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
|
||||||
|
Some(Token::with_value(TokenType::Ident, "foobar".into())),
|
||||||
|
Expression::Ident(Identifier::new(
|
||||||
|
Token::with_value(TokenType::Ident, "foobar".into()),
|
||||||
|
"foobar",
|
||||||
|
)),
|
||||||
|
))],
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("{:?}", as_tree);
|
||||||
|
assert_eq!(as_tree.statements.len(), 1);
|
||||||
|
assert_eq!(as_tree, expected_out);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::{lexer::Lexer, parser::Program};
|
use crate::{lexer::Lexer, parser::Program};
|
||||||
use std::io::{self, BufRead, Write};
|
use std::io::{self, BufRead, Write};
|
||||||
|
|
||||||
const PROMPT: &'static str = ">> ";
|
const PROMPT: &str = ">> ";
|
||||||
|
|
||||||
pub fn init() {
|
pub fn init() {
|
||||||
let stdin = io::stdin();
|
let stdin = io::stdin();
|
||||||
|
@ -14,7 +14,7 @@ pub fn init() {
|
||||||
|
|
||||||
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
||||||
loop {
|
loop {
|
||||||
out.write(PROMPT.as_bytes()).unwrap();
|
out.write_all(PROMPT.as_bytes()).unwrap();
|
||||||
out.flush().unwrap();
|
out.flush().unwrap();
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
ip.read_line(&mut s).unwrap();
|
ip.read_line(&mut s).unwrap();
|
||||||
|
@ -24,6 +24,7 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
||||||
println!("{:?}", token);
|
println!("{:?}", token);
|
||||||
}
|
}
|
||||||
|
|
||||||
let _parser = Program::parse(tokens);
|
let parser = Program::parse(tokens);
|
||||||
|
println!("parser={:?}", parser);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user