Fixed Parser, Can parse Let statements now
This commit is contained in:
parent
631c2d8b1a
commit
5ac3b5e29a
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
target/
|
target/
|
||||||
.vscode/settings.json
|
.vscode/settings.json
|
||||||
.vscode/launch.json
|
.vscode/launch.json
|
||||||
|
.idea/*
|
||||||
|
|
192
src/lexer/mod.rs
192
src/lexer/mod.rs
|
@ -1,7 +1,8 @@
|
||||||
use std::collections::HashMap;
|
use std::{
|
||||||
use std::convert::TryFrom;
|
collections::HashMap,
|
||||||
use std::iter::Peekable;
|
iter::Peekable,
|
||||||
use std::str::{self, Chars};
|
str::{self, Chars},
|
||||||
|
};
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref IDENTMAP: HashMap<&'static str, Token> = {
|
static ref IDENTMAP: HashMap<&'static str, Token> = {
|
||||||
|
@ -22,8 +23,11 @@ pub enum TokenType {
|
||||||
Illegal,
|
Illegal,
|
||||||
EOF,
|
EOF,
|
||||||
|
|
||||||
// Identifiers
|
// Identifiers and Literals
|
||||||
Int,
|
|
||||||
|
// Ident is basically most things that are not covered
|
||||||
|
// by other variants of this enum.
|
||||||
|
Ident,
|
||||||
|
|
||||||
// Operators
|
// Operators
|
||||||
Assign,
|
Assign,
|
||||||
|
@ -53,70 +57,28 @@ pub enum TokenType {
|
||||||
Else,
|
Else,
|
||||||
False,
|
False,
|
||||||
Return,
|
Return,
|
||||||
Ident,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
pub name: TokenType,
|
pub name: TokenType,
|
||||||
pub value: Option<Literal>,
|
pub literal: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(name: TokenType) -> Self {
|
pub fn new(name: TokenType) -> Self {
|
||||||
Token { name, value: None }
|
Token {
|
||||||
|
name,
|
||||||
|
literal: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn with_value(name: TokenType, value: Literal) -> Self {
|
pub fn with_value(name: TokenType, value: &str) -> Self {
|
||||||
Token {
|
Token {
|
||||||
name,
|
name,
|
||||||
value: Some(value),
|
literal: Some(value.to_string()),
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
|
|
||||||
pub enum Literal {
|
|
||||||
String(String),
|
|
||||||
Int(i64),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<String> for Literal {
|
|
||||||
fn from(s: String) -> Literal {
|
|
||||||
Literal::String(s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<&str> for Literal {
|
|
||||||
fn from(s: &str) -> Literal {
|
|
||||||
Literal::String(s.to_owned())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<i64> for Literal {
|
|
||||||
fn from(i: i64) -> Literal {
|
|
||||||
Literal::Int(i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<Literal> for String {
|
|
||||||
type Error = &'static str;
|
|
||||||
fn try_from(l: Literal) -> Result<String, Self::Error> {
|
|
||||||
match l {
|
|
||||||
Literal::String(v) => Ok(v),
|
|
||||||
Literal::Int(_) => Err("can not convert Int to String"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<Literal> for i64 {
|
|
||||||
type Error = &'static str;
|
|
||||||
fn try_from(l: Literal) -> Result<i64, Self::Error> {
|
|
||||||
match l {
|
|
||||||
Literal::Int(v) => Ok(v),
|
|
||||||
Literal::String(_) => Err("can not convert String to Int"),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -128,7 +90,7 @@ pub struct Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
pub fn new(input: &'a str) -> Lexer<'a> {
|
pub fn new(input: &'a str) -> Lexer {
|
||||||
let input = input.chars().peekable();
|
let input = input.chars().peekable();
|
||||||
Lexer {
|
Lexer {
|
||||||
input,
|
input,
|
||||||
|
@ -136,17 +98,20 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This consumes one char from input
|
||||||
|
#[inline]
|
||||||
fn read_char(&mut self) -> Option<char> {
|
fn read_char(&mut self) -> Option<char> {
|
||||||
self.input.next()
|
self.input.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This reads an Identifier from input
|
||||||
fn read_identifier(&mut self, first: char) -> String {
|
fn read_identifier(&mut self, first: char) -> String {
|
||||||
let mut ident = Vec::new();
|
let mut ident = String::new();
|
||||||
ident.push(first);
|
ident.push(first);
|
||||||
while self.peek_is_letter() {
|
while self.peek_is_letter() {
|
||||||
ident.push(self.read_char().unwrap());
|
ident.push(self.read_char().unwrap());
|
||||||
}
|
}
|
||||||
ident.into_iter().collect::<String>()
|
ident
|
||||||
}
|
}
|
||||||
|
|
||||||
fn peek_is_letter(&mut self) -> bool {
|
fn peek_is_letter(&mut self) -> bool {
|
||||||
|
@ -165,26 +130,23 @@ impl<'a> Lexer<'a> {
|
||||||
|
|
||||||
fn skip_whitespace(&mut self) {
|
fn skip_whitespace(&mut self) {
|
||||||
while let Some(&v) = self.input.peek() {
|
while let Some(&v) = self.input.peek() {
|
||||||
if v == ' ' || v == '\t' || v == '\n' || v == '\r' {
|
match v {
|
||||||
|
' ' | '\t' | '\n' | '\r' => {
|
||||||
self.read_char();
|
self.read_char();
|
||||||
} else {
|
}
|
||||||
break;
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// use i64 for all numbers for now.
|
// use i64 for all numbers for now.
|
||||||
fn read_number(&mut self, first: char) -> i64 {
|
fn read_number(&mut self, first: char) -> String {
|
||||||
let mut number = Vec::new();
|
let mut number = Vec::new();
|
||||||
number.push(first);
|
number.push(first);
|
||||||
while self.peek_is_ascii_digit() {
|
while self.peek_is_ascii_digit() {
|
||||||
number.push(self.read_char().unwrap());
|
number.push(self.read_char().unwrap());
|
||||||
}
|
}
|
||||||
number
|
number.into_iter().collect()
|
||||||
.into_iter()
|
|
||||||
.collect::<String>()
|
|
||||||
.parse::<i64>()
|
|
||||||
.unwrap()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -238,7 +200,7 @@ impl<'a> Iterator for Lexer<'a> {
|
||||||
}
|
}
|
||||||
Some(ch) if ch.is_ascii_digit() => {
|
Some(ch) if ch.is_ascii_digit() => {
|
||||||
let number = self.read_number(ch);
|
let number = self.read_number(ch);
|
||||||
Some(Token::with_value(TokenType::Int, (number as i64).into()))
|
Some(Token::with_value(TokenType::Ident, &number))
|
||||||
}
|
}
|
||||||
None if !self.eof_sent => {
|
None if !self.eof_sent => {
|
||||||
self.eof_sent = true;
|
self.eof_sent = true;
|
||||||
|
@ -250,28 +212,26 @@ impl<'a> Iterator for Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn is_letter(c: char) -> bool {
|
fn is_letter(c: char) -> bool {
|
||||||
c.is_ascii_alphabetic() || c == '_'
|
c.is_ascii_alphabetic() || c == '_'
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lookup_ident(ident: &str) -> Token {
|
fn lookup_ident(ident: &str) -> Token {
|
||||||
match IDENTMAP.get(ident) {
|
match IDENTMAP.get(&ident) {
|
||||||
Some(v) => v.clone(),
|
Some(v) => v.clone(),
|
||||||
None => Token::with_value(TokenType::Ident, ident.into()),
|
None => Token::with_value(TokenType::Ident, ident),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{Lexer, Token, TokenType};
|
use super::{Lexer, Token, TokenType};
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn new() {
|
fn new() {
|
||||||
let mut tests = HashMap::new();
|
assert_eq!(
|
||||||
|
Lexer::new("=+(){},;").collect::<Vec<Token>>(),
|
||||||
tests.insert(
|
|
||||||
"=+(){},;",
|
|
||||||
vec![
|
vec![
|
||||||
Token::new(TokenType::Assign),
|
Token::new(TokenType::Assign),
|
||||||
Token::new(TokenType::Plus),
|
Token::new(TokenType::Plus),
|
||||||
|
@ -284,7 +244,9 @@ mod tests {
|
||||||
Token::new(TokenType::EOF),
|
Token::new(TokenType::EOF),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
tests.insert(
|
|
||||||
|
assert_eq!(
|
||||||
|
Lexer::new(
|
||||||
"let five = 5;
|
"let five = 5;
|
||||||
let ten = 10;
|
let ten = 10;
|
||||||
|
|
||||||
|
@ -292,48 +254,52 @@ mod tests {
|
||||||
x + y;
|
x + y;
|
||||||
};
|
};
|
||||||
|
|
||||||
let result = add(five, ten);",
|
let result = add(five, ten);"
|
||||||
|
)
|
||||||
|
.collect::<Vec<Token>>(),
|
||||||
vec![
|
vec![
|
||||||
Token::new(TokenType::Let),
|
Token::new(TokenType::Let),
|
||||||
Token::with_value(TokenType::Ident, "five".into()),
|
Token::with_value(TokenType::Ident, "five"),
|
||||||
Token::new(TokenType::Assign),
|
Token::new(TokenType::Assign),
|
||||||
Token::with_value(TokenType::Int, 5.into()),
|
Token::with_value(TokenType::Ident, "5"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::Let),
|
Token::new(TokenType::Let),
|
||||||
Token::with_value(TokenType::Ident, "ten".into()),
|
Token::with_value(TokenType::Ident, "ten"),
|
||||||
Token::new(TokenType::Assign),
|
Token::new(TokenType::Assign),
|
||||||
Token::with_value(TokenType::Int, 10.into()),
|
Token::with_value(TokenType::Ident, "10"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::Let),
|
Token::new(TokenType::Let),
|
||||||
Token::with_value(TokenType::Ident, "add".into()),
|
Token::with_value(TokenType::Ident, "add"),
|
||||||
Token::new(TokenType::Assign),
|
Token::new(TokenType::Assign),
|
||||||
Token::new(TokenType::Function),
|
Token::new(TokenType::Function),
|
||||||
Token::new(TokenType::LParen),
|
Token::new(TokenType::LParen),
|
||||||
Token::with_value(TokenType::Ident, "x".into()),
|
Token::with_value(TokenType::Ident, "x"),
|
||||||
Token::new(TokenType::Comma),
|
Token::new(TokenType::Comma),
|
||||||
Token::with_value(TokenType::Ident, "y".into()),
|
Token::with_value(TokenType::Ident, "y"),
|
||||||
Token::new(TokenType::RParen),
|
Token::new(TokenType::RParen),
|
||||||
Token::new(TokenType::LBrace),
|
Token::new(TokenType::LBrace),
|
||||||
Token::with_value(TokenType::Ident, "x".into()),
|
Token::with_value(TokenType::Ident, "x"),
|
||||||
Token::new(TokenType::Plus),
|
Token::new(TokenType::Plus),
|
||||||
Token::with_value(TokenType::Ident, "y".into()),
|
Token::with_value(TokenType::Ident, "y"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::RBrace),
|
Token::new(TokenType::RBrace),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::Let),
|
Token::new(TokenType::Let),
|
||||||
Token::with_value(TokenType::Ident, "result".into()),
|
Token::with_value(TokenType::Ident, "result"),
|
||||||
Token::new(TokenType::Assign),
|
Token::new(TokenType::Assign),
|
||||||
Token::with_value(TokenType::Ident, "add".into()),
|
Token::with_value(TokenType::Ident, "add"),
|
||||||
Token::new(TokenType::LParen),
|
Token::new(TokenType::LParen),
|
||||||
Token::with_value(TokenType::Ident, "five".into()),
|
Token::with_value(TokenType::Ident, "five"),
|
||||||
Token::new(TokenType::Comma),
|
Token::new(TokenType::Comma),
|
||||||
Token::with_value(TokenType::Ident, "ten".into()),
|
Token::with_value(TokenType::Ident, "ten"),
|
||||||
Token::new(TokenType::RParen),
|
Token::new(TokenType::RParen),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::EOF),
|
Token::new(TokenType::EOF),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
tests.insert(
|
|
||||||
|
assert_eq!(
|
||||||
|
Lexer::new(
|
||||||
"let result = add(five, ten);
|
"let result = add(five, ten);
|
||||||
!-/*5;
|
!-/*5;
|
||||||
5 < 10 > 5;
|
5 < 10 > 5;
|
||||||
|
@ -347,35 +313,37 @@ mod tests {
|
||||||
10 == 10;
|
10 == 10;
|
||||||
9 != 10;
|
9 != 10;
|
||||||
|
|
||||||
",
|
"
|
||||||
|
)
|
||||||
|
.collect::<Vec<Token>>(),
|
||||||
vec![
|
vec![
|
||||||
Token::new(TokenType::Let),
|
Token::new(TokenType::Let),
|
||||||
Token::with_value(TokenType::Ident, "result".into()),
|
Token::with_value(TokenType::Ident, "result"),
|
||||||
Token::new(TokenType::Assign),
|
Token::new(TokenType::Assign),
|
||||||
Token::with_value(TokenType::Ident, "add".into()),
|
Token::with_value(TokenType::Ident, "add"),
|
||||||
Token::new(TokenType::LParen),
|
Token::new(TokenType::LParen),
|
||||||
Token::with_value(TokenType::Ident, "five".into()),
|
Token::with_value(TokenType::Ident, "five"),
|
||||||
Token::new(TokenType::Comma),
|
Token::new(TokenType::Comma),
|
||||||
Token::with_value(TokenType::Ident, "ten".into()),
|
Token::with_value(TokenType::Ident, "ten"),
|
||||||
Token::new(TokenType::RParen),
|
Token::new(TokenType::RParen),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::ExclamationMark),
|
Token::new(TokenType::ExclamationMark),
|
||||||
Token::new(TokenType::Subtract),
|
Token::new(TokenType::Subtract),
|
||||||
Token::new(TokenType::Divide),
|
Token::new(TokenType::Divide),
|
||||||
Token::new(TokenType::Multiply),
|
Token::new(TokenType::Multiply),
|
||||||
Token::with_value(TokenType::Int, 5.into()),
|
Token::with_value(TokenType::Ident, "5"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::with_value(TokenType::Int, 5.into()),
|
Token::with_value(TokenType::Ident, "5"),
|
||||||
Token::new(TokenType::LessThan),
|
Token::new(TokenType::LessThan),
|
||||||
Token::with_value(TokenType::Int, 10.into()),
|
Token::with_value(TokenType::Ident, "10"),
|
||||||
Token::new(TokenType::GreaterThan),
|
Token::new(TokenType::GreaterThan),
|
||||||
Token::with_value(TokenType::Int, 5.into()),
|
Token::with_value(TokenType::Ident, "5"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::If),
|
Token::new(TokenType::If),
|
||||||
Token::new(TokenType::LParen),
|
Token::new(TokenType::LParen),
|
||||||
Token::with_value(TokenType::Int, 5.into()),
|
Token::with_value(TokenType::Ident, "5"),
|
||||||
Token::new(TokenType::LessThan),
|
Token::new(TokenType::LessThan),
|
||||||
Token::with_value(TokenType::Int, 10.into()),
|
Token::with_value(TokenType::Ident, "10"),
|
||||||
Token::new(TokenType::RParen),
|
Token::new(TokenType::RParen),
|
||||||
Token::new(TokenType::LBrace),
|
Token::new(TokenType::LBrace),
|
||||||
Token::new(TokenType::Return),
|
Token::new(TokenType::Return),
|
||||||
|
@ -388,28 +356,16 @@ mod tests {
|
||||||
Token::new(TokenType::False),
|
Token::new(TokenType::False),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::RBrace),
|
Token::new(TokenType::RBrace),
|
||||||
Token::with_value(TokenType::Int, 10.into()),
|
Token::with_value(TokenType::Ident, "10"),
|
||||||
Token::new(TokenType::Equals),
|
Token::new(TokenType::Equals),
|
||||||
Token::with_value(TokenType::Int, 10.into()),
|
Token::with_value(TokenType::Ident, "10"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::with_value(TokenType::Int, 9.into()),
|
Token::with_value(TokenType::Ident, "9"),
|
||||||
Token::new(TokenType::NotEquals),
|
Token::new(TokenType::NotEquals),
|
||||||
Token::with_value(TokenType::Int, 10.into()),
|
Token::with_value(TokenType::Ident, "10"),
|
||||||
Token::new(TokenType::Semicolon),
|
Token::new(TokenType::Semicolon),
|
||||||
Token::new(TokenType::EOF),
|
Token::new(TokenType::EOF),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
for (k, v) in tests {
|
|
||||||
let tokenized_output = Lexer::new(k).collect::<Vec<Token>>();
|
|
||||||
assert_eq!(v.len(), tokenized_output.len());
|
|
||||||
|
|
||||||
for (exp, actual) in v.into_iter().zip(tokenized_output) {
|
|
||||||
if actual != exp {
|
|
||||||
println!("Expect: {:?}, Actual: {:?}", exp, actual);
|
|
||||||
}
|
|
||||||
assert_eq!(actual, exp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,115 +1,82 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
lexer::{Literal, Token, TokenType},
|
lexer::{Token, TokenType},
|
||||||
parser::{ExpressionPriority, ParseError, Parser},
|
parser::Parser,
|
||||||
};
|
};
|
||||||
use std::convert::TryFrom;
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Program {
|
||||||
|
pub statements: Vec<Statement>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Node {
|
||||||
|
Statement(Statement),
|
||||||
|
Expression(Expression),
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Statement {
|
pub enum Statement {
|
||||||
Let(Let),
|
Let(LetStatement),
|
||||||
Return(Return),
|
|
||||||
ExpressionStatement(ExpressionStatement),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> Statement {
|
||||||
|
pub fn parse(parser: &'a mut Parser, token: Token) -> Option<Self> {
|
||||||
|
match token.name {
|
||||||
|
TokenType::Let => Some(Statement::Let(LetStatement::parse(parser)?)),
|
||||||
|
_ => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Expressions are not going to be a struct so using this here just as a placeholder
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct Expression;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Expression {
|
pub struct LetStatement {
|
||||||
Ident(Identifier),
|
// name field is to store the identifier of the binding
|
||||||
|
pub name: Identifier,
|
||||||
|
// value is to store the expression that'll produce value
|
||||||
|
pub value: Option<Expression>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
impl LetStatement {
|
||||||
pub struct Let {
|
// TODO: Implement code to parse let statement
|
||||||
name: Identifier,
|
pub fn parse(parser: &mut Parser) -> Option<Self> {
|
||||||
value: Option<Expression>,
|
let mut stmt = LetStatement {
|
||||||
|
name: Identifier::new(TokenType::Let, "placeholder_value"),
|
||||||
|
value: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(v) = parser.expect_peek(TokenType::Ident) {
|
||||||
|
stmt.name.value = v.literal?;
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Let {
|
parser.expect_peek(TokenType::Assign)?;
|
||||||
pub fn new(name: Identifier, value: Option<Expression>) -> Let {
|
|
||||||
Let { name, value }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse(parser: &mut Parser) -> Result<Let, ParseError> {
|
// TODO: Right now, We are just skipping over all the expressions
|
||||||
if !parser.expect_peek(Token::new(TokenType::Ident)) {
|
// That'll come later
|
||||||
return Err(ParseError::new("expected ident, Couldn't find it"));
|
while parser.lexer.next() != Some(Token::new(TokenType::Semicolon)) {}
|
||||||
}
|
|
||||||
|
|
||||||
let literal = String::try_from(parser.current_token.clone().unwrap().value.unwrap())?;
|
Some(stmt)
|
||||||
let name = Identifier::new(Token::new(TokenType::Let), literal.into());
|
|
||||||
|
|
||||||
if !parser.expect_peek(Token::new(TokenType::Assign)) {
|
|
||||||
return Err(ParseError::new("expected =, Could not find it"));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Replace this with code to parse expressions correctly
|
|
||||||
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
|
||||||
parser.current_token = parser.lexer.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Let::new(name, None))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
// Identifier will be an expression
|
||||||
pub struct Return {
|
// Identifier in a let statement like, let x = 5; where `x` is an identifier doesn't produce a value
|
||||||
return_value: Expression,
|
// but an identifier *can* produce value when used on rhs, e.g. let x = y; Here `y` is producing a value
|
||||||
}
|
|
||||||
|
|
||||||
impl Return {
|
|
||||||
pub fn new() -> Return {
|
|
||||||
Return {
|
|
||||||
return_value: Expression::Ident(Identifier::new(
|
|
||||||
Token::new(TokenType::Return),
|
|
||||||
"return".into(),
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse(parser: &mut Parser) -> Result<Return, ParseError> {
|
|
||||||
while !parser.current_token_is(Token::new(TokenType::Semicolon)) {
|
|
||||||
parser.current_token = parser.lexer.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Return::new())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Identifier is used to represent variable names and other user created identifiers.
|
|
||||||
// `Literal` can be an int as well. So, Identifier can be a Integer Literal
|
|
||||||
// The wording sounds a little confusing, maybe?
|
|
||||||
// TODO: possible @refactor
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Identifier {
|
pub struct Identifier {
|
||||||
name: Token,
|
pub token: TokenType,
|
||||||
value: Literal,
|
pub value: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Identifier {
|
impl Identifier {
|
||||||
pub fn new(name: Token, value: Literal) -> Identifier {
|
pub fn new(token: TokenType, v: &str) -> Self {
|
||||||
Identifier { name, value }
|
Identifier {
|
||||||
|
token: token,
|
||||||
|
value: v.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct ExpressionStatement {
|
|
||||||
token: Option<Token>, // The first token in Expression
|
|
||||||
expression: Expression,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ExpressionStatement {
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn new(token: Option<Token>, expression: Expression) -> Self {
|
|
||||||
ExpressionStatement { token, expression }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
|
|
||||||
let ct = parser.current_token.clone();
|
|
||||||
|
|
||||||
let expr = parser.parse_expression(ExpressionPriority::Lowest)?;
|
|
||||||
|
|
||||||
let s = Token::new(TokenType::Semicolon);
|
|
||||||
if parser.peek_token_is(&s) {
|
|
||||||
parser.current_token = parser.lexer.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(ExpressionStatement::new(ct, expr))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,165 +1,95 @@
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
mod program;
|
use {
|
||||||
|
crate::{
|
||||||
pub use self::program::Program;
|
lexer::{Lexer, Token, TokenType},
|
||||||
|
parser::ast::{Program, Statement},
|
||||||
use self::ast::{Expression, ExpressionStatement, Identifier, Let, Return, Statement};
|
},
|
||||||
use crate::lexer::{Lexer, Token, TokenType};
|
std::iter::Peekable,
|
||||||
use std::{collections::HashMap, convert::TryFrom, iter::Peekable};
|
};
|
||||||
|
|
||||||
type PrefixParseFn = fn(&mut Parser) -> Result<Expression, ParseError>;
|
|
||||||
type InfixParseFn = fn(Expression) -> Result<Expression, ParseError>;
|
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)]
|
|
||||||
enum ExpressionPriority {
|
|
||||||
Lowest,
|
|
||||||
Equals,
|
|
||||||
LessGreater,
|
|
||||||
Sum,
|
|
||||||
Product,
|
|
||||||
Prefix,
|
|
||||||
Call,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Parser<'a> {
|
pub struct Parser<'a> {
|
||||||
lexer: Peekable<Lexer<'a>>,
|
lexer: Peekable<Lexer<'a>>,
|
||||||
current_token: Option<Token>,
|
|
||||||
prefix_parse_fns: HashMap<TokenType, PrefixParseFn>,
|
|
||||||
infix_parse_fns: HashMap<TokenType, InfixParseFn>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Parser<'a> {
|
impl<'a> Parser<'a> {
|
||||||
fn new(lexer: Lexer<'a>) -> Parser {
|
pub fn new(lexer: Lexer<'a>) -> Self {
|
||||||
let prefix_parse_fns = HashMap::new();
|
Self {
|
||||||
|
|
||||||
let mut parser = Parser {
|
|
||||||
lexer: lexer.peekable(),
|
lexer: lexer.peekable(),
|
||||||
current_token: None,
|
|
||||||
infix_parse_fns: HashMap::new(),
|
|
||||||
prefix_parse_fns,
|
|
||||||
};
|
|
||||||
parser.register_prefix_fn(TokenType::Ident, Parser::parse_identifier);
|
|
||||||
parser.register_prefix_fn(TokenType::Int, Parser::parse_integer_literal);
|
|
||||||
parser
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_statement(&mut self, token: TokenType) -> Result<Statement, ParseError> {
|
|
||||||
match token {
|
|
||||||
TokenType::Let => match Let::parse(self) {
|
|
||||||
Ok(v) => Ok(Statement::Let(v)),
|
|
||||||
Err(e) => Err(e), //TODO: Return appropriate error
|
|
||||||
},
|
|
||||||
TokenType::Return => match Return::parse(self) {
|
|
||||||
Ok(v) => Ok(Statement::Return(v)),
|
|
||||||
Err(e) => Err(e),
|
|
||||||
},
|
|
||||||
_ => match ExpressionStatement::parse(self) {
|
|
||||||
Ok(v) => Ok(Statement::ExpressionStatement(v)),
|
|
||||||
Err(e) => Err(e),
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_expression(
|
pub fn parse_program(mut self) -> Program {
|
||||||
&mut self,
|
let mut program = Program { statements: vec![] };
|
||||||
_priority: ExpressionPriority,
|
|
||||||
) -> Result<Expression, ParseError> {
|
loop {
|
||||||
let current_token = if let Some(token) = &self.current_token {
|
let token = self.lexer.next().unwrap();
|
||||||
token
|
if token.name == TokenType::EOF {
|
||||||
} else {
|
break;
|
||||||
return Err(ParseError::new(
|
|
||||||
"parser.current_token is None. This *should* not have happened.",
|
|
||||||
));
|
|
||||||
};
|
|
||||||
let prefix = match self.prefix_parse_fns.get(¤t_token.name) {
|
|
||||||
Some(v) => v,
|
|
||||||
None => {
|
|
||||||
return Err(ParseError::new(&format!(
|
|
||||||
"no prefix parse function with token {:?} found in parser",
|
|
||||||
current_token
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
prefix(self)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_identifier(parser: &mut Parser) -> Result<Expression, ParseError> {
|
match Statement::parse(&mut self, token) {
|
||||||
let ct = parser.current_token.clone().unwrap();
|
Some(v) => program.statements.push(v),
|
||||||
Ok(Expression::Ident(Identifier::new(
|
None => todo!(), // This will happen in case of a parsing error or something
|
||||||
ct.clone(), // TODO: Correction needed, Can be a source of subtle error in some cases
|
}
|
||||||
String::try_from(ct.value.unwrap())?.into(),
|
|
||||||
)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_integer_literal(parser: &mut Parser) -> Result<Expression, ParseError> {
|
program
|
||||||
let v = parser.current_token.clone().unwrap();
|
|
||||||
|
|
||||||
Ok(Expression::Ident(Identifier::new(
|
|
||||||
v.clone(),
|
|
||||||
v.value.unwrap(),
|
|
||||||
)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expect_peek(&mut self, token: Token) -> bool {
|
fn peek_token_is(&mut self, token: TokenType) -> bool {
|
||||||
match self.lexer.peek() {
|
match self.lexer.peek() {
|
||||||
Some(v) if v.name == token.name => {
|
Some(v) => v.name == token,
|
||||||
self.current_token = self.lexer.next();
|
None => false,
|
||||||
true
|
|
||||||
}
|
|
||||||
Some(_) | None => false,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn current_token_is(&self, token: Token) -> bool {
|
// TODO: Remove this. We most likely don't need it anywhere
|
||||||
self.current_token == Some(token)
|
// fn current_token_is(&self, token: TokenType) -> bool {
|
||||||
}
|
// false
|
||||||
|
// }
|
||||||
|
|
||||||
fn peek_token_is(&mut self, token: &Token) -> bool {
|
fn expect_peek(&mut self, token: TokenType) -> Option<Token> {
|
||||||
self.lexer.peek() == Some(token)
|
if self.peek_token_is(token) {
|
||||||
}
|
self.lexer.next()
|
||||||
|
} else {
|
||||||
fn register_infix_fn(&mut self, token: TokenType, f: InfixParseFn) {
|
None
|
||||||
self.infix_parse_fns.insert(token, f);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn register_prefix_fn(&mut self, token: TokenType, f: PrefixParseFn) {
|
|
||||||
self.prefix_parse_fns.insert(token, f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct ParseError {
|
|
||||||
desc: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ParseError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
write!(f, "ParseError: {}", self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for ParseError {
|
|
||||||
fn description(&self) -> &str {
|
|
||||||
&self.desc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ParseError {
|
|
||||||
fn new(desc: &str) -> ParseError {
|
|
||||||
ParseError {
|
|
||||||
desc: desc.to_owned(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<String> for ParseError {
|
#[cfg(test)]
|
||||||
fn from(desc: String) -> ParseError {
|
mod tests {
|
||||||
ParseError { desc }
|
use crate::{
|
||||||
}
|
lexer::{Lexer, TokenType},
|
||||||
}
|
parser::{
|
||||||
|
ast::{Identifier, LetStatement, Statement},
|
||||||
|
Parser,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
#[test]
|
||||||
|
fn let_statements() {
|
||||||
|
let lexer = Lexer::new("let x =5;let y=10; let foobar=538383;");
|
||||||
|
let parser = Parser::new(lexer);
|
||||||
|
let program = parser.parse_program();
|
||||||
|
|
||||||
impl From<&str> for ParseError {
|
assert_eq!(program.statements.len(), 3);
|
||||||
fn from(s: &str) -> ParseError {
|
|
||||||
ParseError { desc: s.to_owned() }
|
assert_eq!(
|
||||||
|
program.statements,
|
||||||
|
vec![
|
||||||
|
Statement::Let(LetStatement {
|
||||||
|
name: Identifier::new(TokenType::Let, "x"),
|
||||||
|
value: None
|
||||||
|
}),
|
||||||
|
Statement::Let(LetStatement {
|
||||||
|
name: Identifier::new(TokenType::Let, "y"),
|
||||||
|
value: None
|
||||||
|
}),
|
||||||
|
Statement::Let(LetStatement {
|
||||||
|
name: Identifier::new(TokenType::Let, "foobar"),
|
||||||
|
value: None
|
||||||
|
})
|
||||||
|
]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,135 +0,0 @@
|
||||||
use crate::lexer::{Lexer, Token, TokenType};
|
|
||||||
use crate::parser::{ast::Statement, Parser};
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct Program {
|
|
||||||
statements: Vec<Statement>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Program {
|
|
||||||
pub fn parse(lexer: Lexer) -> Program {
|
|
||||||
let mut statements = vec![];
|
|
||||||
let mut parser = Parser::new(lexer);
|
|
||||||
while let Some(token) = parser.lexer.next() {
|
|
||||||
parser.current_token = Some(token.clone());
|
|
||||||
if parser.current_token_is(Token::new(TokenType::EOF)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
match parser.parse_statement(token.name) {
|
|
||||||
Ok(v) => statements.push(v),
|
|
||||||
Err(e) => {
|
|
||||||
println!("{:?}", e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Program { statements }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::lexer::{Lexer, Token, TokenType};
|
|
||||||
use crate::parser::ast::{Expression, ExpressionStatement, Identifier, Let, Statement};
|
|
||||||
use crate::parser::Program;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn let_statements() {
|
|
||||||
let ip = "
|
|
||||||
let yr = 5;
|
|
||||||
let qq = 10;
|
|
||||||
let foobar = 8388383;
|
|
||||||
";
|
|
||||||
|
|
||||||
let expected_out = Program {
|
|
||||||
statements: vec![
|
|
||||||
Statement::Let(Let::new(
|
|
||||||
Identifier::new(Token::new(TokenType::Let), "yr".into()),
|
|
||||||
None
|
|
||||||
// Some(Expression::Ident(Identifier::new(
|
|
||||||
// Token::new(TokenType::Let),
|
|
||||||
// "5",
|
|
||||||
// ))),
|
|
||||||
)),
|
|
||||||
Statement::Let(Let::new(
|
|
||||||
Identifier::new(Token::new(TokenType::Let), "qq".into()),
|
|
||||||
None
|
|
||||||
// Some(Expression::Ident(Identifier::new(
|
|
||||||
// Token::new(TokenType::Let),
|
|
||||||
// "10",
|
|
||||||
// ))),
|
|
||||||
)),
|
|
||||||
Statement::Let(Let::new(
|
|
||||||
Identifier::new(Token::new(TokenType::Let), "foobar".into()),
|
|
||||||
None
|
|
||||||
// Some(Expression::Ident(Identifier::new(
|
|
||||||
// Token::new(TokenType::Let),
|
|
||||||
// "8388383",
|
|
||||||
// ))),
|
|
||||||
)),
|
|
||||||
],
|
|
||||||
};
|
|
||||||
let lexer = Lexer::new(ip);
|
|
||||||
let as_tree = Program::parse(lexer);
|
|
||||||
|
|
||||||
assert_eq!(as_tree.statements.len(), 3);
|
|
||||||
assert_eq!(as_tree, expected_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn return_statements() {
|
|
||||||
let ip = "
|
|
||||||
return 5;
|
|
||||||
return 10;
|
|
||||||
return 80932;
|
|
||||||
";
|
|
||||||
|
|
||||||
let lexer = Lexer::new(ip);
|
|
||||||
let as_tree = Program::parse(lexer);
|
|
||||||
assert_eq!(as_tree.statements.len(), 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn identifier_expression() {
|
|
||||||
let ip = "
|
|
||||||
foobar;
|
|
||||||
";
|
|
||||||
let lexer = Lexer::new(ip);
|
|
||||||
let as_tree = Program::parse(lexer);
|
|
||||||
let expected_out = Program {
|
|
||||||
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
|
|
||||||
Some(Token::with_value(TokenType::Ident, "foobar".into())),
|
|
||||||
Expression::Ident(Identifier::new(
|
|
||||||
Token::with_value(TokenType::Ident, "foobar".into()),
|
|
||||||
"foobar".into(),
|
|
||||||
)),
|
|
||||||
))],
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("{:?}", as_tree);
|
|
||||||
assert_eq!(as_tree.statements.len(), 1);
|
|
||||||
assert_eq!(as_tree, expected_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn integer_literal_expression() {
|
|
||||||
let ip = "5;";
|
|
||||||
|
|
||||||
let lexer = Lexer::new(ip);
|
|
||||||
let as_tree = Program::parse(lexer);
|
|
||||||
let expected_out = Program {
|
|
||||||
statements: vec![Statement::ExpressionStatement(ExpressionStatement::new(
|
|
||||||
Some(Token::with_value(TokenType::Int, 5.into())),
|
|
||||||
Expression::Ident(Identifier::new(
|
|
||||||
Token::with_value(TokenType::Int, 5.into()),
|
|
||||||
5.into(),
|
|
||||||
)),
|
|
||||||
))],
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(as_tree.statements.len(), 1);
|
|
||||||
assert_eq!(as_tree, expected_out);
|
|
||||||
}
|
|
||||||
}
|
|
12
src/repl.rs
12
src/repl.rs
|
@ -1,7 +1,7 @@
|
||||||
use crate::{lexer::Lexer, parser::Program};
|
use crate::{lexer::Lexer, parser::Parser};
|
||||||
use std::io::{self, BufRead, Write};
|
use std::io::{self, BufRead, Write};
|
||||||
|
|
||||||
const PROMPT: &str = ">> ";
|
const PROMPT: &[u8] = b">> ";
|
||||||
|
|
||||||
pub fn init() {
|
pub fn init() {
|
||||||
let stdin = io::stdin();
|
let stdin = io::stdin();
|
||||||
|
@ -14,7 +14,7 @@ pub fn init() {
|
||||||
|
|
||||||
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
||||||
loop {
|
loop {
|
||||||
out.write_all(PROMPT.as_bytes()).unwrap();
|
out.write_all(PROMPT).unwrap();
|
||||||
out.flush().unwrap();
|
out.flush().unwrap();
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
ip.read_line(&mut s).unwrap();
|
ip.read_line(&mut s).unwrap();
|
||||||
|
@ -24,7 +24,9 @@ fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
|
||||||
println!("{:?}", token);
|
println!("{:?}", token);
|
||||||
}
|
}
|
||||||
|
|
||||||
let parser = Program::parse(tokens);
|
let parser = Parser::new(tokens);
|
||||||
println!("parser={:?}", parser);
|
|
||||||
|
let stmts = parser.parse_program();
|
||||||
|
println!("parser={:?}", stmts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user