Fixed Parser, Can parse Let statements now
This commit is contained in:
parent
631c2d8b1a
commit
5ac3b5e29a
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
target/
|
||||
.vscode/settings.json
|
||||
.vscode/launch.json
|
||||
.idea/*
|
||||
|
|
228
src/lexer/mod.rs
228
src/lexer/mod.rs
|
@ -1,7 +1,8 @@
|
|||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::iter::Peekable;
|
||||
use std::str::{self, Chars};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
iter::Peekable,
|
||||
str::{self, Chars},
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref IDENTMAP: HashMap<&'static str, Token> = {
|
||||
|
@ -22,8 +23,11 @@ pub enum TokenType {
|
|||
Illegal,
|
||||
EOF,
|
||||
|
||||
// Identifiers
|
||||
Int,
|
||||
// Identifiers and Literals
|
||||
|
||||
// Ident is basically most things that are not covered
|
||||
// by other variants of this enum.
|
||||
Ident,
|
||||
|
||||
// Operators
|
||||
Assign,
|
||||
|
@ -53,70 +57,28 @@ pub enum TokenType {
|
|||
Else,
|
||||
False,
|
||||
Return,
|
||||
Ident,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub struct Token {
|
||||
pub name: TokenType,
|
||||
pub value: Option<Literal>,
|
||||
pub literal: Option<String>,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
#[inline]
|
||||
pub fn new(name: TokenType) -> Self {
|
||||
Token { name, value: None }
|
||||
Token {
|
||||
name,
|
||||
literal: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn with_value(name: TokenType, value: Literal) -> Self {
|
||||
pub fn with_value(name: TokenType, value: &str) -> Self {
|
||||
Token {
|
||||
name,
|
||||
value: Some(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
|
||||
pub enum Literal {
|
||||
String(String),
|
||||
Int(i64),
|
||||
}
|
||||
|
||||
impl From<String> for Literal {
|
||||
fn from(s: String) -> Literal {
|
||||
Literal::String(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Literal {
|
||||
fn from(s: &str) -> Literal {
|
||||
Literal::String(s.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Literal {
|
||||
fn from(i: i64) -> Literal {
|
||||
Literal::Int(i)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Literal> for String {
|
||||
type Error = &'static str;
|
||||
fn try_from(l: Literal) -> Result<String, Self::Error> {
|
||||
match l {
|
||||
Literal::String(v) => Ok(v),
|
||||
Literal::Int(_) => Err("can not convert Int to String"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Literal> for i64 {
|
||||
type Error = &'static str;
|
||||
fn try_from(l: Literal) -> Result<i64, Self::Error> {
|
||||
match l {
|
||||
Literal::Int(v) => Ok(v),
|
||||
Literal::String(_) => Err("can not convert String to Int"),
|
||||
literal: Some(value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -128,7 +90,7 @@ pub struct Lexer<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(input: &'a str) -> Lexer<'a> {
|
||||
pub fn new(input: &'a str) -> Lexer {
|
||||
let input = input.chars().peekable();
|
||||
Lexer {
|
||||
input,
|
||||
|
@ -136,17 +98,20 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// This consumes one char from input
|
||||
#[inline]
|
||||
fn read_char(&mut self) -> Option<char> {
|
||||
self.input.next()
|
||||
}
|
||||
|
||||
// This reads an Identifier from input
|
||||
fn read_identifier(&mut self, first: char) -> String {
|
||||
let mut ident = Vec::new();
|
||||
let mut ident = String::new();
|
||||
ident.push(first);
|
||||
while self.peek_is_letter() {
|
||||
ident.push(self.read_char().unwrap());
|
||||
}
|
||||
ident.into_iter().collect::<String>()
|
||||
ident
|
||||
}
|
||||
|
||||
fn peek_is_letter(&mut self) -> bool {
|
||||
|
@ -165,26 +130,23 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(&v) = self.input.peek() {
|
||||
if v == ' ' || v == '\t' || v == '\n' || v == '\r' {
|
||||
self.read_char();
|
||||
} else {
|
||||
break;
|
||||
match v {
|
||||
' ' | '\t' | '\n' | '\r' => {
|
||||
self.read_char();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// use i64 for all numbers for now.
|
||||
fn read_number(&mut self, first: char) -> i64 {
|
||||
fn read_number(&mut self, first: char) -> String {
|
||||
let mut number = Vec::new();
|
||||
number.push(first);
|
||||
while self.peek_is_ascii_digit() {
|
||||
number.push(self.read_char().unwrap());
|
||||
}
|
||||
number
|
||||
.into_iter()
|
||||
.collect::<String>()
|
||||
.parse::<i64>()
|
||||
.unwrap()
|
||||
number.into_iter().collect()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,7 +200,7 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
}
|
||||
Some(ch) if ch.is_ascii_digit() => {
|
||||
let number = self.read_number(ch);
|
||||
Some(Token::with_value(TokenType::Int, (number as i64).into()))
|
||||
Some(Token::with_value(TokenType::Ident, &number))
|
||||
}
|
||||
None if !self.eof_sent => {
|
||||
self.eof_sent = true;
|
||||
|
@ -250,28 +212,26 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_letter(c: char) -> bool {
|
||||
c.is_ascii_alphabetic() || c == '_'
|
||||
}
|
||||
|
||||
fn lookup_ident(ident: &str) -> Token {
|
||||
match IDENTMAP.get(ident) {
|
||||
match IDENTMAP.get(&ident) {
|
||||
Some(v) => v.clone(),
|
||||
None => Token::with_value(TokenType::Ident, ident.into()),
|
||||
None => Token::with_value(TokenType::Ident, ident),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Lexer, Token, TokenType};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn new() {
|
||||
let mut tests = HashMap::new();
|
||||
|
||||
tests.insert(
|
||||
"=+(){},;",
|
||||
assert_eq!(
|
||||
Lexer::new("=+(){},;").collect::<Vec<Token>>(),
|
||||
vec![
|
||||
Token::new(TokenType::Assign),
|
||||
Token::new(TokenType::Plus),
|
||||
|
@ -284,98 +244,106 @@ mod tests {
|
|||
Token::new(TokenType::EOF),
|
||||
],
|
||||
);
|
||||
tests.insert(
|
||||
"let five = 5;
|
||||
let ten = 10;
|
||||
|
||||
let add = fn(x, y) {
|
||||
x + y;
|
||||
};
|
||||
assert_eq!(
|
||||
Lexer::new(
|
||||
"let five = 5;
|
||||
let ten = 10;
|
||||
|
||||
let result = add(five, ten);",
|
||||
let add = fn(x, y) {
|
||||
x + y;
|
||||
};
|
||||
|
||||
let result = add(five, ten);"
|
||||
)
|
||||
.collect::<Vec<Token>>(),
|
||||
vec![
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "five".into()),
|
||||
Token::with_value(TokenType::Ident, "five"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "ten".into()),
|
||||
Token::with_value(TokenType::Ident, "ten"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "add".into()),
|
||||
Token::with_value(TokenType::Ident, "add"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::new(TokenType::Function),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "x".into()),
|
||||
Token::with_value(TokenType::Ident, "x"),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::with_value(TokenType::Ident, "y".into()),
|
||||
Token::with_value(TokenType::Ident, "y"),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::with_value(TokenType::Ident, "x".into()),
|
||||
Token::with_value(TokenType::Ident, "x"),
|
||||
Token::new(TokenType::Plus),
|
||||
Token::with_value(TokenType::Ident, "y".into()),
|
||||
Token::with_value(TokenType::Ident, "y"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "result".into()),
|
||||
Token::with_value(TokenType::Ident, "result"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Ident, "add".into()),
|
||||
Token::with_value(TokenType::Ident, "add"),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "five".into()),
|
||||
Token::with_value(TokenType::Ident, "five"),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::with_value(TokenType::Ident, "ten".into()),
|
||||
Token::with_value(TokenType::Ident, "ten"),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::EOF),
|
||||
],
|
||||
);
|
||||
tests.insert(
|
||||
"let result = add(five, ten);
|
||||
!-/*5;
|
||||
5 < 10 > 5;
|
||||
|
||||
if(5 < 10) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
10 == 10;
|
||||
9 != 10;
|
||||
|
||||
",
|
||||
assert_eq!(
|
||||
Lexer::new(
|
||||
"let result = add(five, ten);
|
||||
!-/*5;
|
||||
5 < 10 > 5;
|
||||
|
||||
if(5 < 10) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
10 == 10;
|
||||
9 != 10;
|
||||
|
||||
"
|
||||
)
|
||||
.collect::<Vec<Token>>(),
|
||||
vec![
|
||||
Token::new(TokenType::Let),
|
||||
Token::with_value(TokenType::Ident, "result".into()),
|
||||
Token::with_value(TokenType::Ident, "result"),
|
||||
Token::new(TokenType::Assign),
|
||||
Token::with_value(TokenType::Ident, "add".into()),
|
||||
Token::with_value(TokenType::Ident, "add"),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Ident, "five".into()),
|
||||
Token::with_value(TokenType::Ident, "five"),
|
||||
Token::new(TokenType::Comma),
|
||||
Token::with_value(TokenType::Ident, "ten".into()),
|
||||
Token::with_value(TokenType::Ident, "ten"),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::ExclamationMark),
|
||||
Token::new(TokenType::Subtract),
|
||||
Token::new(TokenType::Divide),
|
||||
Token::new(TokenType::Multiply),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::new(TokenType::LessThan),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::new(TokenType::GreaterThan),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::If),
|
||||
Token::new(TokenType::LParen),
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
Token::with_value(TokenType::Ident, "5"),
|
||||
Token::new(TokenType::LessThan),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::new(TokenType::RParen),
|
||||
Token::new(TokenType::LBrace),
|
||||
Token::new(TokenType::Return),
|
||||
|
@ -388,28 +356,16 @@ mod tests {
|
|||
Token::new(TokenType::False),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::RBrace),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::new(TokenType::Equals),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::with_value(TokenType::Int, 9.into()),
|
||||
Token::with_value(TokenType::Ident, "9"),
|
||||
Token::new(TokenType::NotEquals),
|
||||
Token::with_value(TokenType::Int, 10.into()),
|
||||
Token::with_value(TokenType::Ident, "10"),
|
||||
Token::new(TokenType::Semicolon),
|
||||
Token::new(TokenType::EOF),
|
||||
],
|
||||
);
|
||||
|
||||
for (k, v) in tests {
|
||||
let tokenized_output = Lexer::new(k).collect::<Vec<Token>>();
|
||||
assert_eq!(v.len(), tokenized_output.len());
|
||||
|
||||
for (exp, actual) in v.into_iter().zip(tokenized_output) {
|
||||
if actual != exp {
|
||||
println!("Expect: {:?}, Actual: {:?}", exp, actual);
|
||||
}
|
||||
assert_eq!(actual, exp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,4 +7,4 @@ mod repl;
|
|||
|
||||
fn main() {
|
||||
repl::init();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,115 +1,82 @@
|
|||
use crate::{
|
||||
lexer::{Literal, Token, TokenType},
|
||||
parser::{ExpressionPriority, ParseError, Parser},
|
||||
lexer::{Token, TokenType},
|
||||
parser::Parser,
|
||||
};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Program {
|
||||
pub statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
pub enum Node {
|
||||
Statement(Statement),
|
||||
Expression(Expression),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Statement {
|
||||
Let(Let),
|
||||
Return(Return),
|
||||
ExpressionStatement(ExpressionStatement),
|
||||
Let(LetStatement),
|
||||
}
|
||||
|
||||
impl<'a> Statement {
|
||||
pub fn parse(parser: &'a mut Parser, token: Token) -> Option<Self> {
|
||||
match token.name {
|
||||
TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)),
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Expression;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Expression {
|
||||
Ident(Identifier),
|
||||
pub struct LetStatement {
|
||||
// name field is to store the identifier of the binding
|
||||
pub name: Identifier,
|
||||
// value is to store the expression that'll produce value
|
||||
pub value: Option<Expression>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Let {
|
||||
name: Identifier,
|
||||
value: Option<Expression>,
|
||||
}
|
||||
impl LetStatement {
|
||||
// TODO: Implement code to parse let statement
|
||||
pub fn parse(parser: &mut Parser) -> Option<Self> {
|
||||
let mut stmt = LetStatement {
|
||||
name: Identifier::new(TokenType::Let, "placeholder_value"),
|
||||
value: None,
|
||||
};
|
||||
|
||||
impl Let {
|
||||
pub fn new(name: Identifier, value: Option<Expression>) -> Let {
|
||||
Let { name, value }
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Let, ParseError> {
|
||||
if !parser.expect_peek(Token::new(TokenType::Ident)) {
|
||||
return Err(ParseError::new("expected ident, Couldn't find it"));
|
||||
if let Some(v) = parser.expect_peek(TokenType::Ident) {
|
||||
stmt.name.value = v.literal?;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
|
||||
let literal = String::try_from(parser.current_token.clone().unwrap().value.unwrap())?;
|
||||
let name = Identifier::new(Token::new(TokenType::Let), literal.into());
|
||||
parser.expect_peek(TokenType::Assign)?;
|
||||
|
||||
if !parser.expect_peek(Token::new(TokenType::Assign)) {
|
||||
return Err(ParseError::new("expected =, Could not find it"));
|
||||
}
|
||||
// TODO: Right now, We are just skipping over all the expressions
|
||||
// That'll come later
|
||||
while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {}
|
||||
|
||||
// TODO: Replace this with code to parse expressions correctly
|
||||
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
||||
parser.current_token = parser.lexer.next();
|
||||
}
|
||||
|
||||
Ok(Let::new(name, None))
|
||||
Some(stmt)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Return {
|
||||
return_value: Expression,
|
||||
}
|
||||
|
||||
impl Return {
|
||||
pub fn new() -> Return {
|
||||
Return {
|
||||
return_value: Expression::Ident(Identifier::new(
|
||||
Token::new(TokenType::Return),
|
||||
"return".into(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Return, ParseError> {
|
||||
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
||||
parser.current_token = parser.lexer.next();
|
||||
}
|
||||
|
||||
Ok(Return::new())
|
||||
}
|
||||
}
|
||||
|
||||
// Identifier is used to represent variable names and other user created identifiers.
|
||||
// `Literal` can be an int as well. So, Identifier can be a Integer Literal
|
||||
// The wording sounds a little confusing, maybe?
|
||||
// TODO: possible @refactor
|
||||
// Identifier will be an expression
|
||||
// Identifier in a let statement like, let x = 5; where `x` is an identifier doesn't produce a value
|
||||
// but an identifier *can* produce value when used on rhs, e.g. let x = y; Here `y` is producing a value
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Identifier {
|
||||
name: Token,
|
||||
value: Literal,
|
||||
pub token: TokenType,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
pub fn new(name: Token, value: Literal) -> Identifier {
|
||||
Identifier { name, value }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ExpressionStatement {
|
||||
token: Option<Token>, // The first token in Expression
|
||||
expression: Expression,
|
||||
}
|
||||
|
||||
impl ExpressionStatement {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(token: Option<Token>, expression: Expression) -> Self {
|
||||
ExpressionStatement { token, expression }
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
|
||||
let ct = parser.current_token.clone();
|
||||
|
||||
let expr = parser.parse_expression(ExpressionPriority::Lowest)?;
|
||||
|
||||
let s = Token::new(TokenType::Semicolon);
|
||||
if parser.peek_token_is(&s) {
|
||||
parser.current_token = parser.lexer.next();
|
||||
pub fn new(token: TokenType, v: &str) -> Self {
|
||||
Identifier {
|
||||
token: token,
|
||||
value: v.to_string(),
|
||||
}
|
||||
|
||||
Ok(ExpressionStatement::new(ct, expr))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,165 +1,95 @@
|
|||
pub mod ast;
|
||||
mod program;
|
||||
|
||||
pub use self::program::Program;
|
||||
|
||||
use self::ast::{Expression, ExpressionStatement, Identifier, Let, Return, Statement};
|
||||
use crate::lexer::{Lexer, Token, TokenType};
|
||||
use std::{collections::HashMap, convert::TryFrom, iter::Peekable};
|
||||
|
||||
type PrefixParseFn = fn(&mut Parser) -> Result<Expression, ParseError>;
|
||||
type InfixParseFn = fn(Expression) -> Result<Expression, ParseError>;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)]
|
||||
enum ExpressionPriority {
|
||||
Lowest,
|
||||
Equals,
|
||||
LessGreater,
|
||||
Sum,
|
||||
Product,
|
||||
Prefix,
|
||||
Call,
|
||||
}
|
||||
use {
|
||||
crate::{
|
||||
lexer::{Lexer, Token, TokenType},
|
||||
parser::ast::{Program, Statement},
|
||||
},
|
||||
std::iter::Peekable,
|
||||
};
|
||||
|
||||
pub struct Parser<'a> {
|
||||
lexer: Peekable<Lexer<'a>>,
|
||||
current_token: Option<Token>,
|
||||
prefix_parse_fns: HashMap<TokenType, PrefixParseFn>,
|
||||
infix_parse_fns: HashMap<TokenType, InfixParseFn>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
fn new(lexer: Lexer<'a>) -> Parser {
|
||||
let prefix_parse_fns = HashMap::new();
|
||||
|
||||
let mut parser = Parser {
|
||||
pub fn new(lexer: Lexer<'a>) -> Self {
|
||||
Self {
|
||||
lexer: lexer.peekable(),
|
||||
current_token: None,
|
||||
infix_parse_fns: HashMap::new(),
|
||||
prefix_parse_fns,
|
||||
};
|
||||
parser.register_prefix_fn(TokenType::Ident, Parser::parse_identifier);
|
||||
parser.register_prefix_fn(TokenType::Int, Parser::parse_integer_literal);
|
||||
parser
|
||||
}
|
||||
|
||||
fn parse_statement(&mut self, token: TokenType) -> Result<Statement, ParseError> {
|
||||
match token {
|
||||
TokenType::Let => match Let::parse(self) {
|
||||
Ok(v) => Ok(Statement::Let(v)),
|
||||
Err(e) => Err(e), //TODO: Return appropriate error
|
||||
},
|
||||
TokenType::Return => match Return::parse(self) {
|
||||
Ok(v) => Ok(Statement::Return(v)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
_ => match ExpressionStatement::parse(self) {
|
||||
Ok(v) => Ok(Statement::ExpressionStatement(v)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expression(
|
||||
&mut self,
|
||||
_priority: ExpressionPriority,
|
||||
) -> Result<Expression, ParseError> {
|
||||
let current_token = if let Some(token) = &self.current_token {
|
||||
token
|
||||
} else {
|
||||
return Err(ParseError::new(
|
||||
"parser.current_token is None. This *should* not have happened.",
|
||||
));
|
||||
};
|
||||
let prefix = match self.prefix_parse_fns.get(¤t_token.name) {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
return Err(ParseError::new(&format!(
|
||||
"no prefix parse function with token {:?} found in parser",
|
||||
current_token
|
||||
)))
|
||||
pub fn parse_program(mut self) -> Program {
|
||||
let mut program = Program { statements: vec![] };
|
||||
|
||||
loop {
|
||||
let token = self.lexer.next().unwrap();
|
||||
if token.name == TokenType::EOF {
|
||||
break;
|
||||
}
|
||||
};
|
||||
prefix(self)
|
||||
|
||||
match Statement::parse(&mut self, token) {
|
||||
Some(v) => program.statements.push(v),
|
||||
None => todo!(), // This will happen in case of a parsing error or something
|
||||
}
|
||||
}
|
||||
|
||||
program
|
||||
}
|
||||
|
||||
fn parse_identifier(parser: &mut Parser) -> Result<Expression, ParseError> {
|
||||
let ct = parser.current_token.clone().unwrap();
|
||||
Ok(Expression::Ident(Identifier::new(
|
||||
ct.clone(), // TODO: Correction needed, Can be a source of subtle error in some cases
|
||||
String::try_from(ct.value.unwrap())?.into(),
|
||||
)))
|
||||
}
|
||||
|
||||
fn parse_integer_literal(parser: &mut Parser) -> Result<Expression, ParseError> {
|
||||
let v = parser.current_token.clone().unwrap();
|
||||
|
||||
Ok(Expression::Ident(Identifier::new(
|
||||
v.clone(),
|
||||
v.value.unwrap(),
|
||||
)))
|
||||
}
|
||||
|
||||
fn expect_peek(&mut self, token: Token) -> bool {
|
||||
fn peek_token_is(&mut self, token: TokenType) -> bool {
|
||||
match self.lexer.peek() {
|
||||
Some(v) if v.name == token.name => {
|
||||
self.current_token = self.lexer.next();
|
||||
true
|
||||
}
|
||||
Some(_) | None => false,
|
||||
Some(v) => v.name == token,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn current_token_is(&self, token: Token) -> bool {
|
||||
self.current_token == Some(token)
|
||||
}
|
||||
// TODO: Remove this. We most likely don't need it anywhere
|
||||
// fn current_token_is(&self, token: TokenType) -> bool {
|
||||
// false
|
||||
// }
|
||||
|
||||
fn peek_token_is(&mut self, token: &Token) -> bool {
|
||||
self.lexer.peek() == Some(token)
|
||||
}
|
||||
|
||||
fn register_infix_fn(&mut self, token: TokenType, f: InfixParseFn) {
|
||||
self.infix_parse_fns.insert(token, f);
|
||||
}
|
||||
|
||||
fn register_prefix_fn(&mut self, token: TokenType, f: PrefixParseFn) {
|
||||
self.prefix_parse_fns.insert(token, f);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseError {
|
||||
desc: String,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "ParseError: {}", self)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseError {
|
||||
fn description(&self) -> &str {
|
||||
&self.desc
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseError {
|
||||
fn new(desc: &str) -> ParseError {
|
||||
ParseError {
|
||||
desc: desc.to_owned(),
|
||||
fn expect_peek(&mut self, token: TokenType) -> Option<Token> {
|
||||
if self.peek_token_is(token) {
|
||||
self.lexer.next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for ParseError {
|
||||
fn from(desc: String) -> ParseError {
|
||||
ParseError { desc }
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
lexer::{Lexer, TokenType},
|
||||
parser::{
|
||||
ast::{Identifier, LetStatement, Statement},
|
||||
Parser,
|
||||
},
|
||||
};
|
||||
#[test]
|
||||
fn let_statements() {
|
||||
let lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
|
||||
let parser = Parser::new(lexer);
|
||||
let program = parser.parse_program();
|
||||
|
||||
impl From<&str> for ParseError {
|
||||
fn from(s: &str) -> ParseError {
|
||||
ParseError { desc: s.to_owned() }
|
||||
assert_eq!(program.statements.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
program.statements,
|
||||
vec![
|
||||
Statement::Let(LetStatement {
|
||||
name: Identifier::new(TokenType::Let, "x"),
|
||||
value: None
|
||||
}),
|
||||
Statement::Let(LetStatement {
|
||||
name: Identifier::new(TokenType::Let, "y"),
|
||||
value: None
|
||||
}),
|
||||
Statement::Let(LetStatement {
|
||||
name: Identifier::new(TokenType::Let, "foobar"),
|
||||
value: None
|
||||
})
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
use crate::lexer::{Lexer, Token, TokenType};
|
||||
use crate::parser::{ast::Statement, Parser};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Program {
|
||||
statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
impl Program {
|
||||
pub fn parse(lexer: Lexer) -> Program {
|
||||
let mut statements = vec![];
|
||||
let mut parser = Parser::new(lexer);
|
||||
while let Some(token) = parser.lexer.next() {
|
||||
parser.current_token = Some(token.clone());
|
||||
if parser.current_token_is(Token::new(TokenType::EOF)) {
|
||||
break;
|
||||
}
|
||||
|
||||
match parser.parse_statement(token.name) {
|
||||
Ok(v) => statements.push(v),
|
||||
Err(e) => {
|
||||
println!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Program { statements }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexer::{Lexer, Token, TokenType};
|
||||
use crate::parser::ast::{Expression, ExpressionStatement, Identifier, Let, Statement};
|
||||
use crate::parser::Program;
|
||||
|
||||
#[test]
|
||||
fn let_statements() {
|
||||
let ip = "
|
||||
let yr = 5;
|
||||
let qq = 10;
|
||||
let foobar = 8388383;
|
||||
";
|
||||
|
||||
let expected_out = Program {
|
||||
statements: vec![
|
||||
Statement::Let(Let::new(
|
||||
Identifier::new(Token::new(TokenType::Let), "yr".into()),
|
||||
None
|
||||
// Some(Expression::Ident(Identifier::new(
|
||||
// Token::new(TokenType::Let),
|
||||
// "5",
|
||||
// ))),
|
||||
)),
|
||||
Statement::Let(Let::new(
|
||||
Identifier::new(Token::new(TokenType::Let), "qq".into()),
|
||||
None
|
||||
// Some(Expression::Ident(Identifier::new(
|
||||
// Token::new(TokenType::Let),
|
||||
// "10",
|
||||
// ))),
|
||||
)),
|
||||
Statement::Let(Let::new(
|
||||
Identifier::new(Token::new(TokenType::Let), "foobar".into()),
|
||||
None
|
||||
// Some(Expression::Ident(Identifier::new(
|
||||
// Token::new(TokenType::Let),
|
||||
// "8388383",
|
||||
// ))),
|
||||
)),
|
||||
],
|
||||
};
|
||||
let lexer = Lexer::new(ip);
|
||||
let as_tree = Program::parse(lexer);
|
||||
|
||||
assert_eq!(as_tree.statements.len(), 3);
|
||||
assert_eq!(as_tree, expected_out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn return_statements() {
|
||||
let ip = "
|
||||
return 5;
|
||||
return 10;
|
||||
return 80932;
|
||||
";
|
||||
|
||||
let lexer = Lexer::new(ip);
|
||||
let as_tree = Program::parse(lexer);
|
||||
assert_eq!(as_tree.statements.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifier_expression() {
|
||||
let ip = "
|
||||
foobar;
|
||||
";
|
||||
let lexer = Lexer::new(ip);
|
||||
let as_tree = Program::parse(lexer);
|
||||
let expected_out = Program {
|
||||
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
|
||||
Some(Token::with_value(TokenType::Ident, "foobar".into())),
|
||||
Expression::Ident(Identifier::new(
|
||||
Token::with_value(TokenType::Ident, "foobar".into()),
|
||||
"foobar".into(),
|
||||
)),
|
||||
))],
|
||||
};
|
||||
|
||||
println!("{:?}", as_tree);
|
||||
assert_eq!(as_tree.statements.len(), 1);
|
||||
assert_eq!(as_tree, expected_out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integer_literal_expression() {
|
||||
let ip = "5;";
|
||||
|
||||
let lexer = Lexer::new(ip);
|
||||
let as_tree = Program::parse(lexer);
|
||||
let expected_out = Program {
|
||||
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
|
||||
Some(Token::with_value(TokenType::Int, 5.into())),
|
||||
Expression::Ident(Identifier::new(
|
||||
Token::with_value(TokenType::Int, 5.into()),
|
||||
5.into(),
|
||||
)),
|
||||
))],
|
||||
};
|
||||
|
||||
assert_eq!(as_tree.statements.len(), 1);
|
||||
assert_eq!(as_tree, expected_out);
|
||||
}
|
||||
}
|
12
src/repl.rs
12
src/repl.rs
|
@ -1,7 +1,7 @@
|
|||
use crate::{lexer::Lexer, parser::Program};
|
||||
use crate::{lexer::Lexer, parser::Parser};
|
||||
use std::io::{self, BufRead, Write};
|
||||
|
||||
const PROMPT: &str = ">> ";
|
||||
const PROMPT: &[u8] = b">> ";
|
||||
|
||||
pub fn init() {
|
||||
let stdin = io::stdin();
|
||||
|
@ -14,7 +14,7 @@ pub fn init() {
|
|||
|
||||
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
||||
loop {
|
||||
out.write_all(PROMPT.as_bytes()).unwrap();
|
||||
out.write_all(PROMPT).unwrap();
|
||||
out.flush().unwrap();
|
||||
let mut s = String::new();
|
||||
ip.read_line(&mut s).unwrap();
|
||||
|
@ -24,7 +24,9 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
|||
println!("{:?}", token);
|
||||
}
|
||||
|
||||
let parser = Program::parse(tokens);
|
||||
println!("parser={:?}", parser);
|
||||
let parser = Parser::new(tokens);
|
||||
|
||||
let stmts = parser.parse_program();
|
||||
println!("parser={:?}", stmts);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user