diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b0bef68 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +*.db diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5c1b152 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "anyhow" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1" + +[[package]] +name = "sqlite-starter-rust" +version = "0.1.0" +dependencies = [ + "anyhow", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..72b40e6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,22 @@ +# DON'T EDIT THIS! +# +# Codecrafters relies on this file being intact to run tests successfully. Any changes +# here will not reflect when CodeCrafters tests your code, and might even cause build +# failures. +# +# DON'T EDIT THIS! +[package] +name = "sqlite-starter-rust" +version = "0.1.0" +authors = ["Codecrafters "] +edition = "2018" + +# DON'T EDIT THIS! +# +# Codecrafters relies on this file being intact to run tests successfully. Any changes +# here will not reflect when CodeCrafters tests your code, and might even cause build +# failures. +# +# DON'T EDIT THIS! +[dependencies] +anyhow = "1.0.43" # for easy error handling diff --git a/README.md b/README.md index 8bcafd8..d141367 100644 --- a/README.md +++ b/README.md @@ -1 +1,85 @@ -# sqlite-starter-rust \ No newline at end of file +This is a starting point for Rust solutions to the +["Build Your Own SQLite" Challenge](https://codecrafters.io/challenges/sqlite). + +In this challenge, you'll build a barebones SQLite implementation that supports +basic SQL queries like `SELECT`. Along the way we'll learn about +[SQLite's file format](https://www.sqlite.org/fileformat.html), how indexed data +is +[stored in B-trees](https://jvns.ca/blog/2014/10/02/how-does-sqlite-work-part-2-btrees/) +and more. + +**Note**: If you're viewing this repo on GitHub, head over to +[codecrafters.io](https://codecrafters.io) to signup for early access. + +# Passing the first stage + +CodeCrafters runs tests when you do a `git push`. Make an empty commit and push +your solution to see the first stage fail. + +```sh +git commit --allow-empty -m "Running tests" +git push origin master +``` + +You should see a failure message that says it expected "number of tables: " +to be present. + +Go to `src/main.rs` and uncomment the `.dbinfo` command implementation. Commit +and push your changes to pass the first stage: + +```sh +git add . +git commit -m "pass the first stage" +git push origin master +``` + +Time to move on to the next stage! + +# Running Your Program Locally + +1. Ensure you have `cargo (1.43)` installed locally +1. Run `./your_sqlite3.sh` to run your program, which is implemented in + `src/main.rs`. This command compiles your Rust project, so it might be slow + the first time you run it. Subsequent runs will be fast. +1. Commit your changes and run `git push origin master` to submit your solution + to CodeCrafters. Test output will be streamed to your terminal. + +# Sample Databases + +To make it easy to test queries locally, we've added a sample database in the +root of this repository: `sample.db`. + +This contains two tables: `apples` & `oranges`. You can use this to test your +implementation for the first 6 stages. + +You can explore this database by running queries against it like this: + +```sh +$ sqlite3 sample.db "select id, name from apples" +1|Granny Smith +2|Fuji +3|Honeycrisp +4|Golden Delicious +``` + +There are two other databases that you can use: + +1. `superheroes.db`: + - This is a small version of the test database used in the table-scan stage. + - It contains one table: `superheroes`. + - It is ~1MB in size. +1. `companies.db`: + - This is a small version of the test database used in the index-scan stage. + - It contains one table: `companies`, and one index: `idx_companies_country` + - It is ~7MB in size. + +These aren't included in the repository because they're large in size. You can +download them by running this script: + +```sh +./download_sample_databases.sh +``` + +If the script doesn't work for some reason, you can download the databases +directly from +[codecrafters-io/sample-sqlite-databases](https://github.com/codecrafters-io/sample-sqlite-databases). diff --git a/codecrafters.yml b/codecrafters.yml new file mode 100644 index 0000000..895914e --- /dev/null +++ b/codecrafters.yml @@ -0,0 +1,11 @@ +# Set this to true if you want debug logs. +# +# These can be VERY verbose, so we suggest turning them off +# unless you really need them. +debug: false + +# Use this to change the Rust version used to run your code +# on Codecrafters. +# +# Available versions: rust-1.43 +language_pack: rust-1.43 diff --git a/download_sample_databases.sh b/download_sample_databases.sh new file mode 100755 index 0000000..03e0573 --- /dev/null +++ b/download_sample_databases.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +echo "Downloading superheroes.db: ~1MB (used in stage 7)" +curl -Lo superheroes.db https://raw.githubusercontent.com/codecrafters-io/sample-sqlite-databases/master/superheroes.db + +echo "Downloading companies.db: ~7MB (used in stage 8)" +curl -Lo companies.db https://raw.githubusercontent.com/codecrafters-io/sample-sqlite-databases/master/companies.db + +echo "Sample databases downloaded." diff --git a/sample.db b/sample.db new file mode 100644 index 0000000..687673e Binary files /dev/null and b/sample.db differ diff --git a/src/header.rs b/src/header.rs new file mode 100644 index 0000000..9e9af62 --- /dev/null +++ b/src/header.rs @@ -0,0 +1,44 @@ +use anyhow::{bail, Result}; +use std::convert::TryInto; + +#[derive(Debug)] +pub enum BTreePage { + InteriorIndex = 2, + InteriorTable = 5, + LeafIndex = 10, + LeafTable = 13, +} + +#[derive(Debug)] +pub struct PageHeader { + pub page_type: BTreePage, + pub first_free_block_start: u16, + pub number_of_cells: u16, + pub start_of_content_area: u16, + pub fragmented_free_bytes: u8, +} + +impl PageHeader { + /// Parses a page header stream into a page header + pub fn parse(stream: &[u8]) -> Result { + let page_type = match stream[0] { + 2 => BTreePage::InteriorIndex, + 5 => BTreePage::InteriorTable, + 10 => BTreePage::LeafIndex, + 13 => BTreePage::LeafTable, + x => bail!("Invalid page value encountered: {}", x), + }; + let first_free_block_start = u16::from_be_bytes(stream[1..3].try_into()?); + let number_of_cells = u16::from_be_bytes(stream[3..5].try_into()?); + let start_of_content_area = u16::from_be_bytes(stream[5..7].try_into()?); + let fragmented_free_bytes = stream[7]; + let header = PageHeader { + page_type, + first_free_block_start, + number_of_cells, + start_of_content_area, + fragmented_free_bytes, + }; + Ok(header) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7691f9c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,4 @@ +pub mod header; +pub mod record; +pub mod schema; +pub mod varint; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..c91f252 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,59 @@ +use anyhow::{bail, Result}; +use sqlite_starter_rust::{ + header::PageHeader, record::parse_record, schema::Schema, varint::parse_varint, +}; +use std::convert::TryInto; +use std::fs::File; +use std::io::prelude::*; + +fn main() -> Result<()> { + // Parse arguments + let args = std::env::args().collect::>(); + match args.len() { + 0 | 1 => bail!("Missing and "), + 2 => bail!("Missing "), + _ => {} + } + + // Read database file into database + let mut file = File::open(&args[1])?; + let mut database = Vec::new(); + file.read_to_end(&mut database)?; + + // Parse command and act accordingly + let command = &args[2]; + match command.as_str() { + ".dbinfo" => { + // Parse page header from database + let page_header = PageHeader::parse(&database[100..108])?; + + // Obtain all cell pointers + let cell_pointers = database[108..] + .chunks_exact(2) + .take(page_header.number_of_cells.into()) + .map(|bytes| u16::from_be_bytes(bytes.try_into().unwrap())) + .collect::>(); + + // Obtain all records from column 5 + #[allow(unused_variables)] + let schemas = cell_pointers + .into_iter() + .map(|cell_pointer| { + let stream = &database[cell_pointer as usize..]; + let (_, offset) = parse_varint(stream); + let (_rowid, read_bytes) = parse_varint(&stream[offset..]); + parse_record(&stream[offset + read_bytes..], 5) + .map(|record| Schema::parse(record).expect("Invalid record")) + }) + .collect::>>()?; + + println!("Your code goes here!"); + + // Uncomment this block to pass the first stage + // println!("number of tables: {}", schemas.len()); + } + _ => bail!("Missing or invalid command passed: {}", command), + } + + Ok(()) +} diff --git a/src/record.rs b/src/record.rs new file mode 100644 index 0000000..8731907 --- /dev/null +++ b/src/record.rs @@ -0,0 +1,42 @@ +use crate::varint::parse_varint; +use anyhow::{bail, Result}; + +/// Reads SQLite's "Record Format" as mentioned here: +/// [record_format](https://www.sqlite.org/fileformat.html#record_format) +pub fn parse_record(stream: &[u8], column_count: usize) -> Result>> { + // Parse number of bytes in header, and use bytes_read as offset + let (_, mut offset) = parse_varint(stream); + + // Read each varint into serial types and modify the offset + let mut serial_types = vec![]; + for _ in 0..column_count { + let (varint, read_bytes) = parse_varint(&stream[offset..]); + offset += read_bytes; + serial_types.push(varint); + } + + // Parse each serial type as column into record and modify the offset + let mut record = vec![]; + for serial_type in serial_types { + let column = parse_column_value(&stream[offset..], serial_type as u8)?; + offset += column.len(); + record.push(column); + } + + Ok(record) +} + +fn parse_column_value(stream: &[u8], serial_type: u8) -> Result> { + let column_value = match serial_type { + // 8 bit twos-complement integer + 1 => vec![stream[0]], + // Text encoding + n if serial_type >= 13 && serial_type % 2 == 1 => { + let n_bytes = (n - 13) / 2; + let bytes = stream[0..n_bytes as usize].to_vec(); + bytes + } + _ => bail!("Invalid serial_type: {}", serial_type), + }; + Ok(column_value) +} diff --git a/src/schema.rs b/src/schema.rs new file mode 100644 index 0000000..400ec80 --- /dev/null +++ b/src/schema.rs @@ -0,0 +1,29 @@ +#[derive(Debug)] +pub struct Schema { + kind: String, + name: String, + table_name: String, + root_page: u8, + sql: String, +} + +impl Schema { + /// Parses a record into a schema + pub fn parse(record: Vec>) -> Option { + let mut items = record.into_iter(); + let kind = items.next()?; + let name = items.next()?; + let table_name = items.next()?; + let root_page = *items.next()?.get(0)?; + let sql = items.next()?; + + let schema = Self { + kind: String::from_utf8_lossy(&kind).to_string(), + name: String::from_utf8_lossy(&name).to_string(), + table_name: String::from_utf8_lossy(&table_name).to_string(), + root_page, + sql: String::from_utf8_lossy(&sql).to_string(), + }; + Some(schema) + } +} diff --git a/src/varint.rs b/src/varint.rs new file mode 100644 index 0000000..cbc9028 --- /dev/null +++ b/src/varint.rs @@ -0,0 +1,46 @@ +const IS_FIRST_BIT_ZERO_MASK: u8 = 0b10000000; +const LAST_SEVEN_BITS_MASK: u8 = 0b01111111; + +/// Parses SQLite's "varint" (short for variable-length integer) as mentioned here: +/// [varint](https://www.sqlite.org/fileformat2.html#varint) +/// +/// Returns (varint, bytes_read) +pub fn parse_varint(stream: &[u8]) -> (usize, usize) { + let usable_bytes = read_usable_bytes(stream); + let bytes_read = usable_bytes.len(); + let varint = usable_bytes + .into_iter() + .enumerate() + .fold(0, |value, (i, usable_byte)| { + let usable_size = if i == 8 { 8 } else { 7 }; + (value << usable_size) + usable_value(usable_size, usable_byte) as usize + }); + (varint, bytes_read) +} + +/// Usable size is either 8 or 7 +fn usable_value(usable_size: u8, byte: u8) -> u8 { + if usable_size == 8 { + usable_size + } else { + byte & LAST_SEVEN_BITS_MASK + } +} + +fn read_usable_bytes(stream: &[u8]) -> Vec { + let mut usable_bytes = vec![]; + + for i in 0..8 { + let byte = stream[i]; + usable_bytes.push(byte); + if starts_with_zero(byte) { + break; + } + } + + usable_bytes +} + +fn starts_with_zero(byte: u8) -> bool { + (byte & IS_FIRST_BIT_ZERO_MASK) == 0 +} diff --git a/your_sqlite3.sh b/your_sqlite3.sh new file mode 100755 index 0000000..56a5e8f --- /dev/null +++ b/your_sqlite3.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# +# DON'T EDIT THIS! +# +# CodeCrafters uses this file to test your code. Don't make any changes here! +# +# DON'T EDIT THIS! +exec cargo run \ + --quiet \ + --release \ + --target-dir=/tmp/codecrafters-sqlite-target \ + --manifest-path "$(dirname "$0")/Cargo.toml" "$@"