From c84e73756f352314881c15e59c3469db4ea1b2a1 Mon Sep 17 00:00:00 2001 From: ishanjain28 Date: Mon, 14 Jan 2019 00:39:27 +0530 Subject: [PATCH] Initial commit. Working on lexer --- .gitignore | 1 + Cargo.lock | 14 +++++++ Cargo.toml | 7 ++++ src/lexer/mod.rs | 104 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 34 ++++++++++++++++ 5 files changed, 160 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lexer/mod.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2f7896d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..b3f0ee0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,14 @@ +[[package]] +name = "interpreter" +version = "0.1.0" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..733bfdb --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "interpreter" +version = "0.1.0" +authors = ["ishanjain28 "] + +[dependencies] +lazy_static = "1.2.0" diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..9739c91 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,104 @@ +use std::collections::HashMap; + +lazy_static! { + static ref IDENTMAP: HashMap<&'static str, Token> = { + let mut m = HashMap::new(); + m.insert("fn", Token::Function); + m.insert("let", Token::Let); + m + }; +} + +#[derive(Debug, PartialEq)] +pub enum Token { + Illegal, + EOF, + + // Identifiers + Int(i64), + + // Operators + Assign, + Plus, + Multiply, + Divide, + Subtract, + + // Delimiter + Comma, + Semicolon, + LParen, + RParen, + LBrace, + RBrace, + + // Keywords + Function, + Let, +} + +#[derive(Debug)] +pub struct Lexer { + input: Vec, + position: usize, + read_position: usize, + ch: char, +} + +impl Lexer { + pub fn new(input: &str) -> Lexer { + Lexer { + input: input.chars().collect::>(), + position: 0, + read_position: 0, + ch: '0', + } + } + + fn read_char(&mut self) { + if self.read_position >= self.input.len() { + self.ch = '0'; + } else { + self.ch = self.input[self.read_position]; + } + + self.position = self.read_position; + self.read_position += 1; + } + + fn read_identifier(&mut self) -> String { + let pos = self.position; + while is_letter(self.ch) { + self.read_char(); + } + + self.input[pos..self.position].iter().collect::() + } +} + +impl Iterator for Lexer { + type Item = Token; + + fn next(&mut self) -> Option { + self.read_char(); + + match self.ch { + '=' => Some(Token::Assign), + '+' => Some(Token::Plus), + '*' => Some(Token::Multiply), + '/' => Some(Token::Divide), + '-' => Some(Token::Subtract), + ',' => Some(Token::Comma), + ';' => Some(Token::Semicolon), + '(' => Some(Token::LParen), + ')' => Some(Token::RParen), + '[' => Some(Token::LBrace), + ']' => Some(Token::RBrace), + _ => None, + } + } +} + +fn is_letter(c: char) -> bool { + c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..291eb48 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,34 @@ +#[macro_use] +extern crate lazy_static; + +mod lexer; + +fn main() {} + +#[cfg(test)] +mod tests { + use lexer::{Lexer, Token}; + #[test] + fn new_token() { + let input = "=+()[],;"; + let expected = vec![ + Token::Assign, + Token::Plus, + Token::LParen, + Token::RParen, + Token::Comma, + Token::Semicolon, + Token::EOF, + ]; + + let tokenized_output = Lexer::new(input).collect::>(); + + println!("{:?}", tokenized_output); + assert_eq!(expected.len(), tokenized_output.len()); + + for (exp, actual) in expected.into_iter().zip(tokenized_output) { + assert_eq!(actual, exp); + println!("{:?} {:?}", actual, exp); + } + } +}