added scanner

This commit is contained in:
Ishan Jain 2024-06-08 08:16:25 +05:30
parent 750b3357ec
commit caee5ce154
Signed by: ishan
GPG Key ID: 0506DB2A1CC75C27
6 changed files with 430 additions and 3 deletions

16
Cargo.lock generated Normal file
View File

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "loxi"
version = "0.1.0"
dependencies = [
"lazy_static",
]

View File

@ -3,6 +3,5 @@ name = "loxi"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
lazy_static = "1.4.0"

21
src/loxi.rs Normal file
View File

@ -0,0 +1,21 @@
use crate::scanner::{Scanner, ScannerError};
use std::io::{Result as IoResult, Write};
pub fn run(program: &str) {
let tokens = Scanner::new(program);
for token in tokens {
println!("{:?}", token);
}
}
fn print_parser_errors<W: Write>(mut out: W, errors: &[ScannerError]) -> IoResult<()> {
for error in errors {
out.write_fmt(format_args!(
"\tline: {} | error: {}\n",
error.line, error.message
))
.unwrap();
}
out.flush()
}

View File

@ -1,3 +1,20 @@
#[macro_use]
mod scanner;
mod loxi;
mod repl;
use std::env;
fn main() { fn main() {
println!("Hello, world!"); let args: Vec<String> = env::args().collect();
match args.len() {
1 => repl::init(),
v if v > 1 => {
println!("Usage: loxi [script]")
}
_ => {
// TODO: Read the file
}
}
} }

24
src/repl.rs Normal file
View File

@ -0,0 +1,24 @@
use crate::loxi::run;
use std::io::{self, BufRead, Write};
const PROMPT: &[u8] = b">> ";
pub fn init() {
let stdin = io::stdin();
let read_handle = stdin.lock();
let stdout = io::stdout();
let write_handle = stdout.lock();
start(read_handle, write_handle);
}
fn start<R: BufRead, W: Write>(mut ip: R, mut out: W) {
loop {
out.write_all(PROMPT).unwrap();
out.flush().unwrap();
let mut s = String::new();
ip.read_line(&mut s).unwrap();
run(&s);
}
}

350
src/scanner/mod.rs Normal file
View File

@ -0,0 +1,350 @@
use lazy_static::lazy_static;
use std::{collections::HashMap, iter::Peekable, str::Chars};
lazy_static! {
static ref IDENTMAP: HashMap<&'static str, TokenType> = {
let mut m = HashMap::new();
m.insert("fun", TokenType::Fun);
m.insert("var", TokenType::Var);
m.insert("true", TokenType::True);
m.insert("false", TokenType::False);
m.insert("return", TokenType::Return);
m.insert("if", TokenType::If);
m.insert("else", TokenType::Else);
m.insert("for", TokenType::For);
m.insert("nil", TokenType::Nil);
m.insert("and", TokenType::And);
m.insert("class", TokenType::Class);
m.insert("or", TokenType::Or);
m.insert("print", TokenType::Print);
m.insert("return", TokenType::Return);
m.insert("super", TokenType::Super);
m.insert("this", TokenType::This);
m.insert("while", TokenType::While);
m
};
}
pub struct Scanner<'a> {
input: Peekable<Chars<'a>>,
eof_sent: bool,
line: u64,
// Errors
// Provide an interface to log errors in the scanning process
}
impl<'a> Scanner<'a> {
pub fn new(program: &'a str) -> Self {
Self {
input: program.chars().peekable(),
eof_sent: false,
line: 0,
}
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.input.peek() {
match c {
'\n' => {
self.read_char();
self.line += 1;
}
' ' | '\t' | '\r' => {
self.read_char();
}
_ => break,
}
}
}
#[inline]
fn read_char(&mut self) -> Option<char> {
self.input.next()
}
fn read_number(&mut self, first: char) -> Result<String, String> {
let mut number = first.to_string();
let mut decimal_found = false;
while let Some(c) = self.input.next() {
match c {
v if v.is_ascii_digit() => number.push(c),
'.' if !decimal_found => {
number.push(c);
decimal_found = true;
if let Some(&next_char) = self.input.peek() {
if !next_char.is_ascii_digit() {
return Err("trailing dot when parsing number".to_string());
}
}
}
' ' | '\t' | '\r' | '\n' => return Ok(number),
v => {
return Err(format!(
"error in parsing number, unexpected character: {:?}",
v
))
}
}
}
Ok(number)
}
fn read_string(&mut self) -> Result<String, String> {
let mut out = String::new();
while let Some(c) = self.read_char() {
match c {
'"' => return Ok(out),
'\n' => return Err("unterminated string".to_string()),
'\\' => {
let next_char = self
.read_char()
.ok_or_else(|| "Unterminated escape sequence".to_string())?;
match next_char {
'n' => out.push('\n'),
'r' => out.push('\r'),
't' => out.push('\t'),
'"' => out.push('\"'),
'\\' => out.push('\\'),
_ => return Err("invalid escape sequence".to_string()),
}
}
_ => out.push(c),
}
}
Ok(out)
}
fn read_identifier(&mut self, first: char) -> String {
let mut ident = first.to_string();
while self.input.peek().map_or(false, |&c| is_letter(c)) {
ident.push(self.read_char().unwrap());
}
ident
}
}
impl<'a> Iterator for Scanner<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.skip_whitespace();
let ch = self.read_char();
match ch {
Some('(') => Some(Token::new(TokenType::LeftParen)),
Some(')') => Some(Token::new(TokenType::RightParen)),
Some('{') => Some(Token::new(TokenType::LeftBrace)),
Some('}') => Some(Token::new(TokenType::RightBrace)),
Some(',') => Some(Token::new(TokenType::Comma)),
Some('.') => {
match self.input.peek() {
Some(v) if v.is_ascii_digit() => {
// TODO: Log trailing dot error
None
}
_ => Some(Token::new(TokenType::Dot)),
}
}
Some('-') => Some(Token::new(TokenType::Minus)),
Some('+') => Some(Token::new(TokenType::Plus)),
Some(';') => Some(Token::new(TokenType::Semicolon)),
Some('*') => Some(Token::new(TokenType::Star)),
Some('=') => {
if let Some(&next) = self.input.peek() {
if next == '=' {
self.read_char();
Some(Token::new(TokenType::EqualEqual))
} else {
Some(Token::new(TokenType::Equal))
}
} else {
Some(Token::new(TokenType::Equal))
}
}
Some('!') => {
if let Some(&next) = self.input.peek() {
if next == '=' {
self.read_char();
Some(Token::new(TokenType::BangEqual))
} else {
Some(Token::new(TokenType::Bang))
}
} else {
Some(Token::new(TokenType::Bang))
}
}
Some('<') => {
if let Some(&next) = self.input.peek() {
if next == '=' {
self.read_char();
Some(Token::new(TokenType::LessEqual))
} else {
Some(Token::new(TokenType::Less))
}
} else {
Some(Token::new(TokenType::Less))
}
}
Some('>') => {
if let Some(&next) = self.input.peek() {
if next == '=' {
self.read_char();
Some(Token::new(TokenType::GreaterEqual))
} else {
Some(Token::new(TokenType::Greater))
}
} else {
Some(Token::new(TokenType::Greater))
}
}
Some('/') => {
// TODO: All this needs to be cleaned
if let Some(&next) = self.input.peek() {
if next == '/' {
// Found a comment!
// Skip till the end of line
while let Some(next) = self.read_char() {
if next == '\n' {
break;
}
}
None
} else {
Some(Token::new(TokenType::Slash))
}
} else {
Some(Token::new(TokenType::Slash))
}
}
Some('"') => {
match self.read_string() {
Ok(s) => Some(Token::with_lexeme(TokenType::LString, s)),
Err(e) => {
// TODO: Log errors
return None;
}
}
}
Some(c) if c.is_ascii_digit() => {
match self.read_number(c) {
Ok(v) => Some(Token::with_lexeme(TokenType::Number, v)),
Err(e) => {
// TODO: Log error
None
}
}
}
Some(c) if is_letter(c) => {
let ident = self.read_identifier(c);
Some(lookup_ident(&ident))
}
Some('\n') => {
unreachable!()
}
None if !self.eof_sent => {
self.eof_sent = true;
Some(Token::new(TokenType::Eof))
}
None => None,
_ => Some(Token::new(TokenType::Illegal)),
}
}
}
fn lookup_ident(ident: &str) -> Token {
match IDENTMAP.get(&ident) {
Some(v) => Token::new(*v),
None => Token::with_lexeme(TokenType::Identifier, ident.to_string()),
}
}
#[inline]
fn is_letter(ch: char) -> bool {
ch.is_alphabetic() || ch == '_'
}
pub struct ScannerError {
pub line: u64,
pub message: String,
}
#[derive(Debug, Copy, Clone)]
pub enum TokenType {
LeftParen,
RightParen,
LeftBrace,
RightBrace,
Comma,
Dot,
Minus,
Plus,
Semicolon,
Slash,
Star,
Bang,
BangEqual,
Equal,
EqualEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
Identifier,
LString,
Number,
And,
Class,
Else,
False,
Fun,
For,
If,
Nil,
Or,
Print,
Return,
Super,
This,
True,
Var,
While,
Eof,
Illegal,
}
#[derive(Debug)]
pub struct Token {
ttype: TokenType,
line: u64,
lexeme: String,
literal: Option<String>,
}
impl Token {
pub fn new(ttype: TokenType) -> Self {
Token {
ttype,
line: 0,
lexeme: "".to_string(),
literal: None,
}
}
pub fn with_lexeme(ttype: TokenType, l: String) -> Self {
Token {
ttype,
line: 0,
lexeme: l,
literal: None,
}
}
}