1
0
Fork 0
monkey-interpreter/src/lexer/mod.rs

177 lines
4.1 KiB
Rust

use std::collections::HashMap;
use std::iter::Peekable;
use std::str::{self, Chars};
lazy_static! {
static ref IDENTMAP: HashMap<&'static str, Token> = {
let mut m = HashMap::new();
m.insert("fn", Token::Function);
m.insert("let", Token::Let);
m.insert("true", Token::True);
m.insert("false", Token::False);
m.insert("return", Token::Return);
m.insert("if", Token::If);
m.insert("else", Token::Else);
m
};
}
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Illegal,
EOF,
// Identifiers
Int(i64),
// Operators
Assign,
Plus,
Multiply,
Divide,
Subtract,
ExclamationMark,
LessThan,
GreaterThan,
// Delimiter
Comma,
Semicolon,
LParen,
RParen,
LBrace,
RBrace,
// Keywords
Function,
If,
Let,
True,
Else,
False,
Return,
Ident(String),
}
#[derive(Debug)]
pub struct Lexer<'a> {
input: Peekable<Chars<'a>>,
eof_sent: bool,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Lexer<'a> {
let input = input.chars().peekable();
Lexer {
input,
eof_sent: false,
}
}
fn read_char(&mut self) -> Option<char> {
self.input.next()
}
fn read_identifier(&mut self, first: char) -> String {
let mut ident = Vec::new();
ident.push(first);
while self.peek_is_letter() {
ident.push(self.read_char().unwrap());
}
ident.into_iter().collect::<String>()
}
fn peek_is_letter(&mut self) -> bool {
match self.input.peek() {
Some(v) => is_letter(v),
None => false,
}
}
fn peek_is_ascii_digit(&mut self) -> bool {
match self.input.peek() {
Some(v) => v.is_ascii_digit(),
None => false,
}
}
fn skip_whitespace(&mut self) {
while let Some(&v) = self.input.peek() {
if v == ' ' || v == '\t' || v == '\n' || v == '\r' {
self.read_char();
} else {
break;
}
}
}
// use i64 for all numbers for now.
fn read_number(&mut self, first: char) -> i64 {
let mut number = Vec::new();
number.push(first);
while self.peek_is_ascii_digit() {
number.push(self.read_char().unwrap());
}
number
.into_iter()
.collect::<String>()
.parse::<i64>()
.unwrap()
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.skip_whitespace();
let ch = self.read_char();
let v = match ch {
Some('=') => Some(Token::Assign),
Some('+') => Some(Token::Plus),
Some('*') => Some(Token::Multiply),
Some('/') => Some(Token::Divide),
Some('-') => Some(Token::Subtract),
Some(',') => Some(Token::Comma),
Some(';') => Some(Token::Semicolon),
Some('(') => Some(Token::LParen),
Some(')') => Some(Token::RParen),
Some('{') => Some(Token::LBrace),
Some('}') => Some(Token::RBrace),
Some('!') => Some(Token::ExclamationMark),
Some('>') => Some(Token::GreaterThan),
Some('<') => Some(Token::LessThan),
Some(ch @ _) if is_letter(&ch) => {
let ident = self.read_identifier(ch);
Some(lookup_ident(&ident))
}
Some(ch @ _) if ch.is_ascii_digit() => {
let number = self.read_number(ch);
Some(Token::Int(number))
}
None if !self.eof_sent => {
self.eof_sent = true;
Some(Token::EOF)
}
None => None,
_ => Some(Token::Illegal),
};
v
}
}
fn is_letter(c: &char) -> bool {
c.is_ascii_alphabetic() || c == &'_'
}
fn lookup_ident(ident: &str) -> Token {
match IDENTMAP.get(ident) {
Some(v) => v.clone(),
None => Token::Ident(ident.to_string()),
}
}