From f51ead1641b67af7932a42bdfde6e24b788894d5 Mon Sep 17 00:00:00 2001 From: Ishan Jain Date: Mon, 27 May 2024 16:26:52 +0530 Subject: [PATCH] WIP: Hash evaluator --- src/evaluator/mod.rs | 115 +++++++++++++++++++++++++++++------ src/evaluator/tree_walker.rs | 44 +++++++++++--- src/lexer.rs | 14 ++++- src/parser/ast.rs | 92 ++++++++++++++++++++++------ src/parser/mod.rs | 68 +++++++++++++++++++++ 5 files changed, 285 insertions(+), 48 deletions(-) diff --git a/src/evaluator/mod.rs b/src/evaluator/mod.rs index 56bad4b..3ad277e 100644 --- a/src/evaluator/mod.rs +++ b/src/evaluator/mod.rs @@ -3,8 +3,9 @@ use { itertools::Itertools, std::{ cell::RefCell, - collections::HashMap, + collections::BTreeMap, fmt::{self, Display, Formatter, Result as FmtResult, Write}, + hash::{Hash, Hasher}, rc::Rc, }, }; @@ -15,16 +16,25 @@ pub trait Evaluator { fn eval(&self, node: Node, env: Rc>) -> Option; } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Environment { - store: HashMap, - outer: Option>>, + store: BTreeMap, + outer: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct OuterEnvironment(Rc>); + +impl Hash for OuterEnvironment { + fn hash(&self, state: &mut H) { + self.0.borrow().hash(state); + } } impl Environment { pub fn new() -> Self { Self { - store: HashMap::new(), + store: BTreeMap::new(), outer: None, } } @@ -33,7 +43,7 @@ impl Environment { Some(v) => Some(v.clone()), None => match &self.outer { Some(outer) => { - let outer = outer.borrow(); + let outer = outer.0.borrow(); outer.get(name) } None => None, @@ -44,15 +54,15 @@ impl Environment { self.store.insert(name, val); } - pub fn new_enclosed(env: Rc>) -> Self { + pub fn new_enclosed(env: OuterEnvironment) -> Self { Self { - store: HashMap::new(), + store: BTreeMap::new(), outer: Some(env), } } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Hash, Ord, PartialOrd, Eq)] pub enum Object { Integer(i64), String(String), @@ -62,6 +72,7 @@ pub enum Object { Function(Function), Builtin(BuiltinFunction), Array(Array), + Hash(HashObject), Null, } @@ -90,6 +101,15 @@ impl Object { out } + Object::Hash(h) => { + let mut pairs = vec![]; + + for (k, v) in h.pairs.iter() { + pairs.push(format!("{}: {}", k.inspect(), v.inspect())); + } + + format!("{{ {} }}", pairs.join(", ")) + } } } } @@ -106,20 +126,21 @@ impl Display for Object { Object::Null => "NULL", Object::Builtin(_) => "BUILTIN", Object::Array(_) => "ARRAY", + Object::Hash(_) => "HASH", }) } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] pub struct Function { parameters: Vec, body: BlockStatement, - env: Rc>, + env: OuterEnvironment, } type Builtin = fn(Vec) -> Object; -#[derive(Clone)] +#[derive(Clone, Hash, Ord, PartialOrd, PartialEq, Eq)] pub struct BuiltinFunction { func: Box, } @@ -132,17 +153,22 @@ impl fmt::Debug for BuiltinFunction { } } -impl PartialEq for BuiltinFunction { - fn eq(&self, _: &Self) -> bool { - false - } -} - -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] pub struct Array { elements: Vec, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] +pub struct HashObject { + pairs: BTreeMap, +} + +impl HashObject { + fn new(ip: impl Into>) -> Self { + Self { pairs: ip.into() } + } +} + #[cfg(test)] mod tests { use std::{assert_matches::assert_matches, cell::RefCell, rc::Rc}; @@ -153,7 +179,7 @@ mod tests { parser::{ast::Node, Parser}, }; - use super::Array; + use super::{Array, HashObject}; const TRUE: Object = Object::Boolean(true); const FALSE: Object = Object::Boolean(false); const NULL: Object = Object::Null; @@ -340,6 +366,10 @@ mod tests { "\"Hello\" - \"World\"", Some(Object::Error("unknown operator: STRING - STRING".into())), ), + ( + "{\"name\": \"Monkey\"}[fn(x) {x}];", + Some(Object::Error("unusable as hash key: FUNCTION".into())), + ), ]; run_test_cases(&test_cases); @@ -527,4 +557,49 @@ mod tests { run_test_cases(&test_cases); } + + #[test] + fn hash_literals() { + let test_cases = [( + "let two = \"two\"; + { + \"one\": 10 - 9, + two: 1 + 1, + \"thr\" + \"ee\": 6 / 2, + 4: 4, + true: 5, + false: 6 + } + ", + Some(Object::Hash(HashObject::new([ + (Object::String("one".to_string()), Object::Integer(1)), + (Object::String("two".to_string()), Object::Integer(2)), + (Object::String("three".to_string()), Object::Integer(3)), + (Object::Integer(4), Object::Integer(4)), + (Object::Boolean(true), Object::Integer(5)), + (Object::Boolean(false), Object::Integer(6)), + ]))), + )]; + + run_test_cases(&test_cases); + } + + #[test] + fn hash_index_expressions() { + let test_cases = [ + ("{\"foo\": 5}[\"foo\"]", Some(Object::Integer(5))), + ("{\"foo\": 5}[\"var\"]", Some(Object::Null)), + ( + "let key = \"foo\"; {\"foo\": 5}[key]", + Some(Object::Integer(5)), + ), + ("{}[\"foo\"]", Some(Object::Null)), + ("{5: 5}[5]", Some(Object::Integer(5))), + ("{true: 5}[true]", Some(Object::Integer(5))), + ("{false: 5}[false]", Some(Object::Integer(5))), + ("{true: 5}[false]", Some(Object::Null)), + ]; + + run_test_cases(&test_cases); + } } diff --git a/src/evaluator/tree_walker.rs b/src/evaluator/tree_walker.rs index 2825bdd..beab588 100644 --- a/src/evaluator/tree_walker.rs +++ b/src/evaluator/tree_walker.rs @@ -1,4 +1,4 @@ -use std::{cell::RefCell, rc::Rc}; +use std::{cell::RefCell, collections::BTreeMap, rc::Rc}; // TODO: This is all a mess. Almost certainly because right now, I don't know any better way to do this. // It's just constantly unwrapping enums from one place and rewrapping it to some other enum(or even the same enum) and returning it @@ -7,12 +7,12 @@ use crate::{ evaluator::{Array, Evaluator, Object}, lexer::TokenType, parser::ast::{ - BlockStatement, Expression, ExpressionStatement, Identifier, LetStatement, Node, Program, - Statement, + BlockStatement, Expression, ExpressionStatement, HashLiteral, Identifier, LetStatement, + Node, Program, Statement, }, }; -use super::{builtins::BUILTINS, Environment, Function}; +use super::{builtins::BUILTINS, Environment, Function, HashObject, OuterEnvironment}; pub struct TreeWalker; @@ -47,9 +47,8 @@ impl Evaluator for TreeWalker { Expression::Identifier(v) => self.eval_identifier(v, env), Expression::IntegerLiteral(il) => Some(Object::Integer(il.value)), Expression::StringLiteral(s) => Some(Object::String(s.value)), + Expression::HashLiteral(h) => self.eval_hash_literal(h, env), Expression::ArrayLiteral(v) => { - println!("{:?}", v); - let args = match self.eval_expression(v.elements, env) { Ok(v) => v, Err(e) => return Some(e), @@ -76,7 +75,7 @@ impl Evaluator for TreeWalker { Expression::FunctionExpression(fnl) => Some(Object::Function(Function { body: fnl.body, parameters: fnl.parameters, - env, + env: OuterEnvironment(env), })), Expression::CallExpression(v) => { let function = self.eval(Node::Expression(*v.function), env.clone())?; @@ -305,8 +304,11 @@ impl TreeWalker { } fn eval_index_expression(&self, left: Object, index: Object) -> Option { - match (&left, index) { - (Object::Array(a), Object::Integer(i)) => Some(Self::eval_array_index_expression(a, i)), + match (&left, &index) { + (Object::Array(a), Object::Integer(i)) => { + Some(Self::eval_array_index_expression(a, *i)) + } + (Object::Hash(h), _) => self.eval_hash_index_expression(h, index), _ => Some(Object::Error(format!( "index operator not supported: {}", @@ -315,6 +317,12 @@ impl TreeWalker { } } + fn eval_hash_index_expression(&self, left: &HashObject, index: Object) -> Option { + Some(Object::Error(format!( + "index operator not supported: {:?}", + left + ))) + } fn eval_array_index_expression(array: &Array, index: i64) -> Object { let max = array.elements.len() as i64; if index < 0 || index >= max { @@ -322,4 +330,22 @@ impl TreeWalker { } array.elements[index as usize].clone() } + + fn eval_hash_literal( + &self, + node: HashLiteral, + env: Rc>, + ) -> Option { + let mut out = BTreeMap::new(); + + for (k, v) in node.pairs.into_iter() { + let k = self.eval(Node::Expression(k), env.clone())?; + + let v = self.eval(Node::Expression(v), env.clone())?; + + out.insert(k, v); + } + + Some(Object::Hash(HashObject { pairs: out })) + } } diff --git a/src/lexer.rs b/src/lexer.rs index 66d675a..b20b55f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -19,7 +19,7 @@ lazy_static! { }; } -#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] +#[derive(Debug, PartialEq, Eq, Copy, Clone, Ord, PartialOrd, Hash)] pub enum TokenType { Illegal, #[allow(clippy::upper_case_acronyms)] @@ -47,6 +47,7 @@ pub enum TokenType { // Delimiter Comma, Semicolon, + Colon, LParen, RParen, LBrace, @@ -79,6 +80,7 @@ impl Display for TokenType { TokenType::NotEquals => "!=", TokenType::Comma => ",", TokenType::Semicolon => ";", + TokenType::Colon => ":", TokenType::LParen => "(", TokenType::RParen => ")", TokenType::LBrace => "{", @@ -101,7 +103,7 @@ impl Display for TokenType { } } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] +#[derive(Debug, PartialEq, Eq, Clone, Ord, PartialOrd, Hash)] pub struct Token { pub name: TokenType, pub literal: Option, @@ -243,6 +245,7 @@ impl<'a> Iterator for Lexer<'a> { Some('-') => Some(token!(TokenType::Minus)), Some(',') => Some(token!(TokenType::Comma)), Some(';') => Some(token!(TokenType::Semicolon)), + Some(':') => Some(token!(TokenType::Colon)), Some('(') => Some(token!(TokenType::LParen)), Some(')') => Some(token!(TokenType::RParen)), Some('[') => Some(token!(TokenType::LBracket)), @@ -387,6 +390,8 @@ mod tests { \"foo bar\" [1,2]; + {\"foo\": \"bar\"} + " ) .collect::>(), @@ -446,6 +451,11 @@ mod tests { token!(TokenType::Int, "2"), token!(TokenType::RBracket), token!(TokenType::Semicolon), + token!(TokenType::LBrace), + token!(TokenType::String, "foo"), + token!(TokenType::Colon), + token!(TokenType::String, "bar"), + token!(TokenType::RBrace), token!(TokenType::EOF), ], ); diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 6f209a4..934cd4e 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -6,6 +6,7 @@ use { itertools::Itertools, std::{ cmp::PartialOrd, + collections::BTreeMap, convert::From, fmt::{Display, Formatter, Result as FmtResult, Write}, }, @@ -34,7 +35,7 @@ impl Display for Program { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub enum Statement { Let(LetStatement), Return(ReturnStatement), @@ -65,7 +66,7 @@ impl Display for Statement { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct LetStatement { // name field is to store the identifier of the binding pub name: Identifier, @@ -113,7 +114,7 @@ impl Display for LetStatement { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct ReturnStatement { pub value: Option, } @@ -145,7 +146,7 @@ impl Display for ReturnStatement { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct ExpressionStatement { token: Token, pub expression: Expression, @@ -185,12 +186,13 @@ pub enum ExpressionPriority { Index = 7, } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, Hash, PartialEq, Eq, Ord, PartialOrd, Clone)] pub enum Expression { Identifier(Identifier), IntegerLiteral(IntegerLiteral), StringLiteral(StringLiteral), ArrayLiteral(ArrayLiteral), + HashLiteral(HashLiteral), IndexExpression(IndexExpression), PrefixExpression(PrefixExpression), InfixExpression(InfixExpression), @@ -261,6 +263,7 @@ impl Display for Expression { Expression::IndexExpression(v) => { format!("({}[{}])", v.left, v.index) } + Expression::HashLiteral(v) => v.to_string(), }; f.write_str(&value) @@ -276,7 +279,7 @@ impl From<&Expression> for String { // Identifier will be an expression // Identifier in a let statement like, let x = 5; where `x` is an identifier doesn't produce a value // but an identifier *can* produce value when used on rhs, e.g. let x = y; Here `y` is producing a value -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct Identifier { token: TokenType, pub value: String, @@ -303,7 +306,7 @@ impl Display for Identifier { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct IntegerLiteral { pub value: i64, } @@ -326,7 +329,7 @@ impl IntegerLiteral { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct StringLiteral { pub value: String, } @@ -344,7 +347,7 @@ impl StringLiteral { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct ArrayLiteral { pub elements: Vec, } @@ -371,7 +374,7 @@ impl Display for ArrayLiteral { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct IndexExpression { pub left: Box, pub index: Box, @@ -399,7 +402,62 @@ impl IndexExpression { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] +pub struct HashLiteral { + pub pairs: BTreeMap, +} + +impl HashLiteral { + pub fn new(pairs: impl Into>) -> Self { + Self { + pairs: pairs.into(), + } + } + + pub fn parse(parser: &mut Parser, _token: Token) -> Option { + let mut map = BTreeMap::new(); + + while !parser.peek_token_is(TokenType::RBrace) { + let ctoken = parser.lexer.next()?; + + let key = Expression::parse(parser, ctoken, ExpressionPriority::Lowest)?; + + parser.expect_peek(TokenType::Colon)?; + let ctoken = parser.lexer.next()?; + + let value = Expression::parse(parser, ctoken, ExpressionPriority::Lowest)?; + + map.insert(key, value); + + if !parser.peek_token_is(TokenType::RBrace) + && parser.expect_peek(TokenType::Comma).is_none() + { + return None; + } + } + + parser.expect_peek(TokenType::RBrace)?; + + Some(Expression::HashLiteral(HashLiteral { pairs: map })) + } +} + +impl Display for HashLiteral { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + f.write_char('{')?; + + f.write_str( + &self + .pairs + .iter() + .map(|(k, v)| format!("{}:{}", k, v)) + .join(", "), + )?; + f.write_char('}') + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct PrefixExpression { pub operator: TokenType, pub right: Box, @@ -429,7 +487,7 @@ impl Display for PrefixExpression { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct InfixExpression { pub left: Box, pub operator: TokenType, @@ -463,7 +521,7 @@ impl Display for InfixExpression { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct BooleanExpression { token: TokenType, pub value: bool, @@ -489,7 +547,7 @@ impl Display for BooleanExpression { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct IfExpression { pub condition: Box, pub consequence: BlockStatement, @@ -537,7 +595,7 @@ impl Display for IfExpression { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct BlockStatement { pub statements: Vec, } @@ -577,7 +635,7 @@ impl Display for BlockStatement { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct FunctionLiteral { token: Token, pub parameters: Vec, @@ -643,7 +701,7 @@ impl Display for FunctionLiteral { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Hash, Eq, Ord, PartialOrd, Clone)] pub struct CallExpression { pub function: Box, pub arguments: Vec, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3df6b26..290d76d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -60,6 +60,7 @@ impl<'a> Parser<'a> { parser.register_prefix(TokenType::If, IfExpression::parse); parser.register_prefix(TokenType::Function, FunctionLiteral::parse); parser.register_prefix(TokenType::LBracket, ArrayLiteral::parse); + parser.register_prefix(TokenType::LBrace, HashLiteral::parse); // Neat trick! // Call expressions looks like (). @@ -980,4 +981,71 @@ mod tests { check_test_cases(&test_cases); } + + #[test] + fn hash_literal() { + let test_cases = [ + ( + "{\"one\": 1, \"two\": 2, \"three\": 3}", + vec![Statement::ExpressionStatement(ExpressionStatement::new( + token!(TokenType::LBrace), + Expression::HashLiteral(HashLiteral::new([ + ( + Expression::StringLiteral(StringLiteral::new("one")), + Expression::IntegerLiteral(IntegerLiteral::new(1)), + ), + ( + Expression::StringLiteral(StringLiteral::new("two")), + Expression::IntegerLiteral(IntegerLiteral::new(2)), + ), + ( + Expression::StringLiteral(StringLiteral::new("three")), + Expression::IntegerLiteral(IntegerLiteral::new(3)), + ), + ])), + ))], + ), + ( + "{}", + vec![Statement::ExpressionStatement(ExpressionStatement::new( + token!(TokenType::LBrace), + Expression::HashLiteral(HashLiteral::new([])), + ))], + ), + ( + "{\"one\": 0 + 1, \"two\": 10-8, \"three\": 15/5}", + vec![Statement::ExpressionStatement(ExpressionStatement::new( + token!(TokenType::LBrace), + Expression::HashLiteral(HashLiteral::new([ + ( + Expression::StringLiteral(StringLiteral::new("one")), + Expression::InfixExpression(InfixExpression::new( + Expression::IntegerLiteral(IntegerLiteral::new(0)), + TokenType::Plus, + Expression::IntegerLiteral(IntegerLiteral::new(1)), + )), + ), + ( + Expression::StringLiteral(StringLiteral::new("two")), + Expression::InfixExpression(InfixExpression::new( + Expression::IntegerLiteral(IntegerLiteral::new(10)), + TokenType::Minus, + Expression::IntegerLiteral(IntegerLiteral::new(8)), + )), + ), + ( + Expression::StringLiteral(StringLiteral::new("three")), + Expression::InfixExpression(InfixExpression::new( + Expression::IntegerLiteral(IntegerLiteral::new(15)), + TokenType::Slash, + Expression::IntegerLiteral(IntegerLiteral::new(5)), + )), + ), + ])), + ))], + ), + ]; + + check_test_cases(&test_cases); + } }