Fixed Parser, Can parse Let statements now

This commit is contained in:
Ishan Jain 2019-12-25 17:55:15 +05:30
parent 631c2d8b1a
commit 5ac3b5e29a
7 changed files with 226 additions and 505 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
target/ target/
.vscode/settings.json .vscode/settings.json
.vscode/launch.json .vscode/launch.json
.idea/*

View File

@ -1,7 +1,8 @@
use std::collections::HashMap; use std::{
use std::convert::TryFrom; collections::HashMap,
use std::iter::Peekable; iter::Peekable,
use std::str::{self, Chars}; str::{self, Chars},
};
lazy_static! { lazy_static! {
static ref IDENTMAP: HashMap<&'static str, Token> = { static ref IDENTMAP: HashMap<&'static str, Token> = {
@ -22,8 +23,11 @@ pub enum TokenType {
Illegal, Illegal,
EOF, EOF,
// Identifiers // Identifiers and Literals
Int,
// Ident is basically most things that are not covered
// by other variants of this enum.
Ident,
// Operators // Operators
Assign, Assign,
@ -53,70 +57,28 @@ pub enum TokenType {
Else, Else,
False, False,
Return, Return,
Ident,
} }
#[derive(Debug, PartialEq, Eq, Clone, Hash)] #[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct Token { pub struct Token {
pub name: TokenType, pub name: TokenType,
pub value: Option<Literal>, pub literal: Option<String>,
} }
impl Token { impl Token {
#[inline] #[inline]
pub fn new(name: TokenType) -> Self { pub fn new(name: TokenType) -> Self {
Token { name, value: None } Token {
name,
literal: None,
}
} }
#[inline] #[inline]
pub fn with_value(name: TokenType, value: Literal) -> Self { pub fn with_value(name: TokenType, value: &str) -> Self {
Token { Token {
name, name,
value: Some(value), literal: Some(value.to_string()),
}
}
}
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
pub enum Literal {
String(String),
Int(i64),
}
impl From<String> for Literal {
fn from(s: String) -> Literal {
Literal::String(s)
}
}
impl From<&str> for Literal {
fn from(s: &str) -> Literal {
Literal::String(s.to_owned())
}
}
impl From<i64> for Literal {
fn from(i: i64) -> Literal {
Literal::Int(i)
}
}
impl TryFrom<Literal> for String {
type Error = &'static str;
fn try_from(l: Literal) -> Result<String, Self::Error> {
match l {
Literal::String(v) => Ok(v),
Literal::Int(_) => Err("can not convert Int to String"),
}
}
}
impl TryFrom<Literal> for i64 {
type Error = &'static str;
fn try_from(l: Literal) -> Result<i64, Self::Error> {
match l {
Literal::Int(v) => Ok(v),
Literal::String(_) => Err("can not convert String to Int"),
} }
} }
} }
@ -128,7 +90,7 @@ pub struct Lexer<'a> {
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Lexer<'a> { pub fn new(input: &'a str) -> Lexer {
let input = input.chars().peekable(); let input = input.chars().peekable();
Lexer { Lexer {
input, input,
@ -136,17 +98,20 @@ impl<'a> Lexer<'a> {
} }
} }
// This consumes one char from input
#[inline]
fn read_char(&mut self) -> Option<char> { fn read_char(&mut self) -> Option<char> {
self.input.next() self.input.next()
} }
// This reads an Identifier from input
fn read_identifier(&mut self, first: char) -> String { fn read_identifier(&mut self, first: char) -> String {
let mut ident = Vec::new(); let mut ident = String::new();
ident.push(first); ident.push(first);
while self.peek_is_letter() { while self.peek_is_letter() {
ident.push(self.read_char().unwrap()); ident.push(self.read_char().unwrap());
} }
ident.into_iter().collect::<String>() ident
} }
fn peek_is_letter(&mut self) -> bool { fn peek_is_letter(&mut self) -> bool {
@ -165,26 +130,23 @@ impl<'a> Lexer<'a> {
fn skip_whitespace(&mut self) { fn skip_whitespace(&mut self) {
while let Some(&v) = self.input.peek() { while let Some(&v) = self.input.peek() {
if v == ' ' || v == '\t' || v == '\n' || v == '\r' { match v {
' ' | '\t' | '\n' | '\r' => {
self.read_char(); self.read_char();
} else { }
break; _ => break,
} }
} }
} }
// use i64 for all numbers for now. // use i64 for all numbers for now.
fn read_number(&mut self, first: char) -> i64 { fn read_number(&mut self, first: char) -> String {
let mut number = Vec::new(); let mut number = Vec::new();
number.push(first); number.push(first);
while self.peek_is_ascii_digit() { while self.peek_is_ascii_digit() {
number.push(self.read_char().unwrap()); number.push(self.read_char().unwrap());
} }
number number.into_iter().collect()
.into_iter()
.collect::<String>()
.parse::<i64>()
.unwrap()
} }
} }
@ -238,7 +200,7 @@ impl<'a> Iterator for Lexer<'a> {
} }
Some(ch) if ch.is_ascii_digit() => { Some(ch) if ch.is_ascii_digit() => {
let number = self.read_number(ch); let number = self.read_number(ch);
Some(Token::with_value(TokenType::Int, (number as i64).into())) Some(Token::with_value(TokenType::Ident, &number))
} }
None if !self.eof_sent => { None if !self.eof_sent => {
self.eof_sent = true; self.eof_sent = true;
@ -250,28 +212,26 @@ impl<'a> Iterator for Lexer<'a> {
} }
} }
#[inline]
fn is_letter(c: char) -> bool { fn is_letter(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_' c.is_ascii_alphabetic() || c == '_'
} }
fn lookup_ident(ident: &str) -> Token { fn lookup_ident(ident: &str) -> Token {
match IDENTMAP.get(ident) { match IDENTMAP.get(&ident) {
Some(v) => v.clone(), Some(v) => v.clone(),
None => Token::with_value(TokenType::Ident, ident.into()), None => Token::with_value(TokenType::Ident, ident),
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{Lexer, Token, TokenType}; use super::{Lexer, Token, TokenType};
use std::collections::HashMap;
#[test] #[test]
fn new() { fn new() {
let mut tests = HashMap::new(); assert_eq!(
Lexer::new("=+(){},;").collect::<Vec<Token>>(),
tests.insert(
"=+(){},;",
vec![ vec![
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::new(TokenType::Plus), Token::new(TokenType::Plus),
@ -284,7 +244,9 @@ mod tests {
Token::new(TokenType::EOF), Token::new(TokenType::EOF),
], ],
); );
tests.insert(
assert_eq!(
Lexer::new(
"let five = 5; "let five = 5;
let ten = 10; let ten = 10;
@ -292,48 +254,52 @@ mod tests {
x + y; x + y;
}; };
let result = add(five, ten);", let result = add(five, ten);"
)
.collect::<Vec<Token>>(),
vec![ vec![
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "five".into()), Token::with_value(TokenType::Ident, "five"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::with_value(TokenType::Int, 5.into()), Token::with_value(TokenType::Ident, "5"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "ten".into()), Token::with_value(TokenType::Ident, "ten"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::with_value(TokenType::Int, 10.into()), Token::with_value(TokenType::Ident, "10"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "add".into()), Token::with_value(TokenType::Ident, "add"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::new(TokenType::Function), Token::new(TokenType::Function),
Token::new(TokenType::LParen), Token::new(TokenType::LParen),
Token::with_value(TokenType::Ident, "x".into()), Token::with_value(TokenType::Ident, "x"),
Token::new(TokenType::Comma), Token::new(TokenType::Comma),
Token::with_value(TokenType::Ident, "y".into()), Token::with_value(TokenType::Ident, "y"),
Token::new(TokenType::RParen), Token::new(TokenType::RParen),
Token::new(TokenType::LBrace), Token::new(TokenType::LBrace),
Token::with_value(TokenType::Ident, "x".into()), Token::with_value(TokenType::Ident, "x"),
Token::new(TokenType::Plus), Token::new(TokenType::Plus),
Token::with_value(TokenType::Ident, "y".into()), Token::with_value(TokenType::Ident, "y"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::RBrace), Token::new(TokenType::RBrace),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "result".into()), Token::with_value(TokenType::Ident, "result"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::with_value(TokenType::Ident, "add".into()), Token::with_value(TokenType::Ident, "add"),
Token::new(TokenType::LParen), Token::new(TokenType::LParen),
Token::with_value(TokenType::Ident, "five".into()), Token::with_value(TokenType::Ident, "five"),
Token::new(TokenType::Comma), Token::new(TokenType::Comma),
Token::with_value(TokenType::Ident, "ten".into()), Token::with_value(TokenType::Ident, "ten"),
Token::new(TokenType::RParen), Token::new(TokenType::RParen),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::EOF), Token::new(TokenType::EOF),
], ],
); );
tests.insert(
assert_eq!(
Lexer::new(
"let result = add(five, ten); "let result = add(five, ten);
!-/*5; !-/*5;
5 < 10 > 5; 5 < 10 > 5;
@ -347,35 +313,37 @@ mod tests {
10 == 10; 10 == 10;
9 != 10; 9 != 10;
", "
)
.collect::<Vec<Token>>(),
vec![ vec![
Token::new(TokenType::Let), Token::new(TokenType::Let),
Token::with_value(TokenType::Ident, "result".into()), Token::with_value(TokenType::Ident, "result"),
Token::new(TokenType::Assign), Token::new(TokenType::Assign),
Token::with_value(TokenType::Ident, "add".into()), Token::with_value(TokenType::Ident, "add"),
Token::new(TokenType::LParen), Token::new(TokenType::LParen),
Token::with_value(TokenType::Ident, "five".into()), Token::with_value(TokenType::Ident, "five"),
Token::new(TokenType::Comma), Token::new(TokenType::Comma),
Token::with_value(TokenType::Ident, "ten".into()), Token::with_value(TokenType::Ident, "ten"),
Token::new(TokenType::RParen), Token::new(TokenType::RParen),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::ExclamationMark), Token::new(TokenType::ExclamationMark),
Token::new(TokenType::Subtract), Token::new(TokenType::Subtract),
Token::new(TokenType::Divide), Token::new(TokenType::Divide),
Token::new(TokenType::Multiply), Token::new(TokenType::Multiply),
Token::with_value(TokenType::Int, 5.into()), Token::with_value(TokenType::Ident, "5"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::with_value(TokenType::Int, 5.into()), Token::with_value(TokenType::Ident, "5"),
Token::new(TokenType::LessThan), Token::new(TokenType::LessThan),
Token::with_value(TokenType::Int, 10.into()), Token::with_value(TokenType::Ident, "10"),
Token::new(TokenType::GreaterThan), Token::new(TokenType::GreaterThan),
Token::with_value(TokenType::Int, 5.into()), Token::with_value(TokenType::Ident, "5"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::If), Token::new(TokenType::If),
Token::new(TokenType::LParen), Token::new(TokenType::LParen),
Token::with_value(TokenType::Int, 5.into()), Token::with_value(TokenType::Ident, "5"),
Token::new(TokenType::LessThan), Token::new(TokenType::LessThan),
Token::with_value(TokenType::Int, 10.into()), Token::with_value(TokenType::Ident, "10"),
Token::new(TokenType::RParen), Token::new(TokenType::RParen),
Token::new(TokenType::LBrace), Token::new(TokenType::LBrace),
Token::new(TokenType::Return), Token::new(TokenType::Return),
@ -388,28 +356,16 @@ mod tests {
Token::new(TokenType::False), Token::new(TokenType::False),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::RBrace), Token::new(TokenType::RBrace),
Token::with_value(TokenType::Int, 10.into()), Token::with_value(TokenType::Ident, "10"),
Token::new(TokenType::Equals), Token::new(TokenType::Equals),
Token::with_value(TokenType::Int, 10.into()), Token::with_value(TokenType::Ident, "10"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::with_value(TokenType::Int, 9.into()), Token::with_value(TokenType::Ident, "9"),
Token::new(TokenType::NotEquals), Token::new(TokenType::NotEquals),
Token::with_value(TokenType::Int, 10.into()), Token::with_value(TokenType::Ident, "10"),
Token::new(TokenType::Semicolon), Token::new(TokenType::Semicolon),
Token::new(TokenType::EOF), Token::new(TokenType::EOF),
], ],
); );
for (k, v) in tests {
let tokenized_output = Lexer::new(k).collect::<Vec<Token>>();
assert_eq!(v.len(), tokenized_output.len());
for (exp, actual) in v.into_iter().zip(tokenized_output) {
if actual != exp {
println!("Expect: {:?}, Actual: {:?}", exp, actual);
}
assert_eq!(actual, exp);
}
}
} }
} }

View File

@ -1,115 +1,82 @@
use crate::{ use crate::{
lexer::{Literal, Token, TokenType}, lexer::{Token, TokenType},
parser::{ExpressionPriority, ParseError, Parser}, parser::Parser,
}; };
use std::convert::TryFrom;
#[derive(Debug)]
pub struct Program {
pub statements: Vec<Statement>,
}
pub enum Node {
Statement(Statement),
Expression(Expression),
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Statement { pub enum Statement {
Let(Let), Let(LetStatement),
Return(Return),
ExpressionStatement(ExpressionStatement),
} }
impl<'a> Statement {
pub fn parse(parser: &'a mut Parser, token: Token) -> Option<Self> {
match token.name {
TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)),
_ => todo!(),
}
}
}
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
#[derive(Debug, PartialEq)]
pub struct Expression;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Expression { pub struct LetStatement {
Ident(Identifier), // name field is to store the identifier of the binding
pub name: Identifier,
// value is to store the expression that'll produce value
pub value: Option<Expression>,
} }
#[derive(Debug, PartialEq)] impl LetStatement {
pub struct Let { // TODO: Implement code to parse let statement
name: Identifier, pub fn parse(parser: &mut Parser) -> Option<Self> {
value: Option<Expression>, let mut stmt = LetStatement {
} name: Identifier::new(TokenType::Let, "placeholder_value"),
value: None,
};
impl Let { if let Some(v) = parser.expect_peek(TokenType::Ident) {
pub fn new(name: Identifier, value: Option<Expression>) -> Let { stmt.name.value = v.literal?;
Let { name, value } } else {
return None;
} }
pub fn parse(parser: &mut Parser) -> Result<Let, ParseError> { parser.expect_peek(TokenType::Assign)?;
if !parser.expect_peek(Token::new(TokenType::Ident)) {
return Err(ParseError::new("expected ident, Couldn't find it"));
}
let literal = String::try_from(parser.current_token.clone().unwrap().value.unwrap())?; // TODO: Right now, We are just skipping over all the expressions
let name = Identifier::new(Token::new(TokenType::Let), literal.into()); // That'll come later
while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {}
if !parser.expect_peek(Token::new(TokenType::Assign)) { Some(stmt)
return Err(ParseError::new("expected =, Could not find it"));
}
// TODO: Replace this with code to parse expressions correctly
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
parser.current_token = parser.lexer.next();
}
Ok(Let::new(name, None))
} }
} }
#[derive(Debug, PartialEq)] // Identifier will be an expression
pub struct Return { // Identifier in a let statement like, let x = 5; where `x` is an identifier doesn't produce a value
return_value: Expression, // but an identifier *can* produce value when used on rhs, e.g. let x = y; Here `y` is producing a value
}
impl Return {
pub fn new() -> Return {
Return {
return_value: Expression::Ident(Identifier::new(
Token::new(TokenType::Return),
"return".into(),
)),
}
}
pub fn parse(parser: &mut Parser) -> Result<Return, ParseError> {
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
parser.current_token = parser.lexer.next();
}
Ok(Return::new())
}
}
// Identifier is used to represent variable names and other user created identifiers.
// `Literal` can be an int as well. So, Identifier can be a Integer Literal
// The wording sounds a little confusing, maybe?
// TODO: possible @refactor
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Identifier { pub struct Identifier {
name: Token, pub token: TokenType,
value: Literal, pub value: String,
} }
impl Identifier { impl Identifier {
pub fn new(name: Token, value: Literal) -> Identifier { pub fn new(token: TokenType, v: &str) -> Self {
Identifier { name, value } Identifier {
} token: token,
} value: v.to_string(),
}
#[derive(Debug, PartialEq)]
pub struct ExpressionStatement {
token: Option<Token>, // The first token in Expression
expression: Expression,
}
impl ExpressionStatement {
#[allow(dead_code)]
pub fn new(token: Option<Token>, expression: Expression) -> Self {
ExpressionStatement { token, expression }
}
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
let ct = parser.current_token.clone();
let expr = parser.parse_expression(ExpressionPriority::Lowest)?;
let s = Token::new(TokenType::Semicolon);
if parser.peek_token_is(&s) {
parser.current_token = parser.lexer.next();
}
Ok(ExpressionStatement::new(ct, expr))
} }
} }

View File

@ -1,165 +1,95 @@
pub mod ast; pub mod ast;
mod program; use {
crate::{
pub use self::program::Program; lexer::{Lexer, Token, TokenType},
parser::ast::{Program, Statement},
use self::ast::{Expression, ExpressionStatement, Identifier, Let, Return, Statement}; },
use crate::lexer::{Lexer, Token, TokenType}; std::iter::Peekable,
use std::{collections::HashMap, convert::TryFrom, iter::Peekable}; };
type PrefixParseFn = fn(&mut Parser) -> Result<Expression, ParseError>;
type InfixParseFn = fn(Expression) -> Result<Expression, ParseError>;
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)]
enum ExpressionPriority {
Lowest,
Equals,
LessGreater,
Sum,
Product,
Prefix,
Call,
}
pub struct Parser<'a> { pub struct Parser<'a> {
lexer: Peekable<Lexer<'a>>, lexer: Peekable<Lexer<'a>>,
current_token: Option<Token>,
prefix_parse_fns: HashMap<TokenType, PrefixParseFn>,
infix_parse_fns: HashMap<TokenType, InfixParseFn>,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
fn new(lexer: Lexer<'a>) -> Parser { pub fn new(lexer: Lexer<'a>) -> Self {
let prefix_parse_fns = HashMap::new(); Self {
let mut parser = Parser {
lexer: lexer.peekable(), lexer: lexer.peekable(),
current_token: None,
infix_parse_fns: HashMap::new(),
prefix_parse_fns,
};
parser.register_prefix_fn(TokenType::Ident, Parser::parse_identifier);
parser.register_prefix_fn(TokenType::Int, Parser::parse_integer_literal);
parser
}
fn parse_statement(&mut self, token: TokenType) -> Result<Statement, ParseError> {
match token {
TokenType::Let => match Let::parse(self) {
Ok(v) => Ok(Statement::Let(v)),
Err(e) => Err(e), //TODO: Return appropriate error
},
TokenType::Return => match Return::parse(self) {
Ok(v) => Ok(Statement::Return(v)),
Err(e) => Err(e),
},
_ => match ExpressionStatement::parse(self) {
Ok(v) => Ok(Statement::ExpressionStatement(v)),
Err(e) => Err(e),
},
} }
} }
fn parse_expression( pub fn parse_program(mut self) -> Program {
&mut self, let mut program = Program { statements: vec![] };
_priority: ExpressionPriority,
) -> Result<Expression, ParseError> { loop {
let current_token = if let Some(token) = &self.current_token { let token = self.lexer.next().unwrap();
token if token.name == TokenType::EOF {
} else { break;
return Err(ParseError::new(
"parser.current_token is None. This *should* not have happened.",
));
};
let prefix = match self.prefix_parse_fns.get(&current_token.name) {
Some(v) => v,
None => {
return Err(ParseError::new(&format!(
"no prefix parse function with token {:?} found in parser",
current_token
)))
}
};
prefix(self)
} }
fn parse_identifier(parser: &mut Parser) -> Result<Expression, ParseError> { match Statement::parse(&mut self, token) {
let ct = parser.current_token.clone().unwrap(); Some(v) => program.statements.push(v),
Ok(Expression::Ident(Identifier::new( None => todo!(), // This will happen in case of a parsing error or something
ct.clone(), // TODO: Correction needed, Can be a source of subtle error in some cases }
String::try_from(ct.value.unwrap())?.into(),
)))
} }
fn parse_integer_literal(parser: &mut Parser) -> Result<Expression, ParseError> { program
let v = parser.current_token.clone().unwrap();
Ok(Expression::Ident(Identifier::new(
v.clone(),
v.value.unwrap(),
)))
} }
fn expect_peek(&mut self, token: Token) -> bool { fn peek_token_is(&mut self, token: TokenType) -> bool {
match self.lexer.peek() { match self.lexer.peek() {
Some(v) if v.name == token.name => { Some(v) => v.name == token,
self.current_token = self.lexer.next(); None => false,
true
}
Some(_) | None => false,
} }
} }
fn current_token_is(&self, token: Token) -> bool { // TODO: Remove this. We most likely don't need it anywhere
self.current_token == Some(token) // fn current_token_is(&self, token: TokenType) -> bool {
} // false
// }
fn peek_token_is(&mut self, token: &Token) -> bool { fn expect_peek(&mut self, token: TokenType) -> Option<Token> {
self.lexer.peek() == Some(token) if self.peek_token_is(token) {
} self.lexer.next()
} else {
fn register_infix_fn(&mut self, token: TokenType, f: InfixParseFn) { None
self.infix_parse_fns.insert(token, f);
}
fn register_prefix_fn(&mut self, token: TokenType, f: PrefixParseFn) {
self.prefix_parse_fns.insert(token, f);
}
}
#[derive(Debug)]
pub struct ParseError {
desc: String,
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "ParseError: {}", self)
}
}
impl std::error::Error for ParseError {
fn description(&self) -> &str {
&self.desc
}
}
impl ParseError {
fn new(desc: &str) -> ParseError {
ParseError {
desc: desc.to_owned(),
} }
} }
} }
impl From<String> for ParseError { #[cfg(test)]
fn from(desc: String) -> ParseError { mod tests {
ParseError { desc } use crate::{
} lexer::{Lexer, TokenType},
} parser::{
ast::{Identifier, LetStatement, Statement},
Parser,
},
};
#[test]
fn let_statements() {
let lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
let parser = Parser::new(lexer);
let program = parser.parse_program();
impl From<&str> for ParseError { assert_eq!(program.statements.len(), 3);
fn from(s: &str) -> ParseError {
ParseError { desc: s.to_owned() } assert_eq!(
program.statements,
vec![
Statement::Let(LetStatement {
name: Identifier::new(TokenType::Let, "x"),
value: None
}),
Statement::Let(LetStatement {
name: Identifier::new(TokenType::Let, "y"),
value: None
}),
Statement::Let(LetStatement {
name: Identifier::new(TokenType::Let, "foobar"),
value: None
})
]
);
} }
} }

View File

@ -1,135 +0,0 @@
use crate::lexer::{Lexer, Token, TokenType};
use crate::parser::{ast::Statement, Parser};
#[derive(Debug, PartialEq)]
pub struct Program {
statements: Vec<Statement>,
}
impl Program {
pub fn parse(lexer: Lexer) -> Program {
let mut statements = vec![];
let mut parser = Parser::new(lexer);
while let Some(token) = parser.lexer.next() {
parser.current_token = Some(token.clone());
if parser.current_token_is(Token::new(TokenType::EOF)) {
break;
}
match parser.parse_statement(token.name) {
Ok(v) => statements.push(v),
Err(e) => {
println!("{:?}", e);
continue;
}
};
}
Program { statements }
}
}
#[cfg(test)]
mod tests {
use crate::lexer::{Lexer, Token, TokenType};
use crate::parser::ast::{Expression, ExpressionStatement, Identifier, Let, Statement};
use crate::parser::Program;
#[test]
fn let_statements() {
let ip = "
let yr = 5;
let qq = 10;
let foobar = 8388383;
";
let expected_out = Program {
statements: vec![
Statement::Let(Let::new(
Identifier::new(Token::new(TokenType::Let), "yr".into()),
None
// Some(Expression::Ident(Identifier::new(
// Token::new(TokenType::Let),
// "5",
// ))),
)),
Statement::Let(Let::new(
Identifier::new(Token::new(TokenType::Let), "qq".into()),
None
// Some(Expression::Ident(Identifier::new(
// Token::new(TokenType::Let),
// "10",
// ))),
)),
Statement::Let(Let::new(
Identifier::new(Token::new(TokenType::Let), "foobar".into()),
None
// Some(Expression::Ident(Identifier::new(
// Token::new(TokenType::Let),
// "8388383",
// ))),
)),
],
};
let lexer = Lexer::new(ip);
let as_tree = Program::parse(lexer);
assert_eq!(as_tree.statements.len(), 3);
assert_eq!(as_tree, expected_out);
}
#[test]
fn return_statements() {
let ip = "
return 5;
return 10;
return 80932;
";
let lexer = Lexer::new(ip);
let as_tree = Program::parse(lexer);
assert_eq!(as_tree.statements.len(), 3);
}
#[test]
fn identifier_expression() {
let ip = "
foobar;
";
let lexer = Lexer::new(ip);
let as_tree = Program::parse(lexer);
let expected_out = Program {
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
Some(Token::with_value(TokenType::Ident, "foobar".into())),
Expression::Ident(Identifier::new(
Token::with_value(TokenType::Ident, "foobar".into()),
"foobar".into(),
)),
))],
};
println!("{:?}", as_tree);
assert_eq!(as_tree.statements.len(), 1);
assert_eq!(as_tree, expected_out);
}
#[test]
fn integer_literal_expression() {
let ip = "5;";
let lexer = Lexer::new(ip);
let as_tree = Program::parse(lexer);
let expected_out = Program {
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
Some(Token::with_value(TokenType::Int, 5.into())),
Expression::Ident(Identifier::new(
Token::with_value(TokenType::Int, 5.into()),
5.into(),
)),
))],
};
assert_eq!(as_tree.statements.len(), 1);
assert_eq!(as_tree, expected_out);
}
}

View File

@ -1,7 +1,7 @@
use crate::{lexer::Lexer, parser::Program}; use crate::{lexer::Lexer, parser::Parser};
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
const PROMPT: &str = ">> "; const PROMPT: &[u8] = b">> ";
pub fn init() { pub fn init() {
let stdin = io::stdin(); let stdin = io::stdin();
@ -14,7 +14,7 @@ pub fn init() {
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) { fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
loop { loop {
out.write_all(PROMPT.as_bytes()).unwrap(); out.write_all(PROMPT).unwrap();
out.flush().unwrap(); out.flush().unwrap();
let mut s = String::new(); let mut s = String::new();
ip.read_line(&mut s).unwrap(); ip.read_line(&mut s).unwrap();
@ -24,7 +24,9 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
println!("{:?}", token); println!("{:?}", token);
} }
let parser = Program::parse(tokens); let parser = Parser::new(tokens);
println!("parser={:?}", parser);
let stmts = parser.parse_program();
println!("parser={:?}", stmts);
} }
} }