Syncing with codecrafters-io/languages

Created by https://github.com/codecrafters-io/languages
master
Paul Kuruvilla 1 year ago
parent e133463cf9
commit bb0abd30c5
  1. 12
      .gitignore
  2. 14
      Cargo.lock
  3. 22
      Cargo.toml
  4. 86
      README.md
  5. 11
      codecrafters.yml
  6. 9
      download_sample_databases.sh
  7. BIN
      sample.db
  8. 44
      src/header.rs
  9. 4
      src/lib.rs
  10. 59
      src/main.rs
  11. 42
      src/record.rs
  12. 29
      src/schema.rs
  13. 46
      src/varint.rs
  14. 12
      your_sqlite3.sh

12
.gitignore vendored

@ -0,0 +1,12 @@
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
*.db

14
Cargo.lock generated

@ -0,0 +1,14 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "anyhow"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1"
[[package]]
name = "sqlite-starter-rust"
version = "0.1.0"
dependencies = [
"anyhow",
]

@ -0,0 +1,22 @@
# DON'T EDIT THIS!
#
# Codecrafters relies on this file being intact to run tests successfully. Any changes
# here will not reflect when CodeCrafters tests your code, and might even cause build
# failures.
#
# DON'T EDIT THIS!
[package]
name = "sqlite-starter-rust"
version = "0.1.0"
authors = ["Codecrafters <hello@codecrafters.io>"]
edition = "2018"
# DON'T EDIT THIS!
#
# Codecrafters relies on this file being intact to run tests successfully. Any changes
# here will not reflect when CodeCrafters tests your code, and might even cause build
# failures.
#
# DON'T EDIT THIS!
[dependencies]
anyhow = "1.0.43" # for easy error handling

@ -1 +1,85 @@
# sqlite-starter-rust
This is a starting point for Rust solutions to the
["Build Your Own SQLite" Challenge](https://codecrafters.io/challenges/sqlite).
In this challenge, you'll build a barebones SQLite implementation that supports
basic SQL queries like `SELECT`. Along the way we'll learn about
[SQLite's file format](https://www.sqlite.org/fileformat.html), how indexed data
is
[stored in B-trees](https://jvns.ca/blog/2014/10/02/how-does-sqlite-work-part-2-btrees/)
and more.
**Note**: If you're viewing this repo on GitHub, head over to
[codecrafters.io](https://codecrafters.io) to signup for early access.
# Passing the first stage
CodeCrafters runs tests when you do a `git push`. Make an empty commit and push
your solution to see the first stage fail.
```sh
git commit --allow-empty -m "Running tests"
git push origin master
```
You should see a failure message that says it expected "number of tables: <n>"
to be present.
Go to `src/main.rs` and uncomment the `.dbinfo` command implementation. Commit
and push your changes to pass the first stage:
```sh
git add .
git commit -m "pass the first stage"
git push origin master
```
Time to move on to the next stage!
# Running Your Program Locally
1. Ensure you have `cargo (1.43)` installed locally
1. Run `./your_sqlite3.sh` to run your program, which is implemented in
`src/main.rs`. This command compiles your Rust project, so it might be slow
the first time you run it. Subsequent runs will be fast.
1. Commit your changes and run `git push origin master` to submit your solution
to CodeCrafters. Test output will be streamed to your terminal.
# Sample Databases
To make it easy to test queries locally, we've added a sample database in the
root of this repository: `sample.db`.
This contains two tables: `apples` & `oranges`. You can use this to test your
implementation for the first 6 stages.
You can explore this database by running queries against it like this:
```sh
$ sqlite3 sample.db "select id, name from apples"
1|Granny Smith
2|Fuji
3|Honeycrisp
4|Golden Delicious
```
There are two other databases that you can use:
1. `superheroes.db`:
- This is a small version of the test database used in the table-scan stage.
- It contains one table: `superheroes`.
- It is ~1MB in size.
1. `companies.db`:
- This is a small version of the test database used in the index-scan stage.
- It contains one table: `companies`, and one index: `idx_companies_country`
- It is ~7MB in size.
These aren't included in the repository because they're large in size. You can
download them by running this script:
```sh
./download_sample_databases.sh
```
If the script doesn't work for some reason, you can download the databases
directly from
[codecrafters-io/sample-sqlite-databases](https://github.com/codecrafters-io/sample-sqlite-databases).

@ -0,0 +1,11 @@
# Set this to true if you want debug logs.
#
# These can be VERY verbose, so we suggest turning them off
# unless you really need them.
debug: false
# Use this to change the Rust version used to run your code
# on Codecrafters.
#
# Available versions: rust-1.43
language_pack: rust-1.43

@ -0,0 +1,9 @@
#!/bin/sh
echo "Downloading superheroes.db: ~1MB (used in stage 7)"
curl -Lo superheroes.db https://raw.githubusercontent.com/codecrafters-io/sample-sqlite-databases/master/superheroes.db
echo "Downloading companies.db: ~7MB (used in stage 8)"
curl -Lo companies.db https://raw.githubusercontent.com/codecrafters-io/sample-sqlite-databases/master/companies.db
echo "Sample databases downloaded."

Binary file not shown.

@ -0,0 +1,44 @@
use anyhow::{bail, Result};
use std::convert::TryInto;
#[derive(Debug)]
pub enum BTreePage {
InteriorIndex = 2,
InteriorTable = 5,
LeafIndex = 10,
LeafTable = 13,
}
#[derive(Debug)]
pub struct PageHeader {
pub page_type: BTreePage,
pub first_free_block_start: u16,
pub number_of_cells: u16,
pub start_of_content_area: u16,
pub fragmented_free_bytes: u8,
}
impl PageHeader {
/// Parses a page header stream into a page header
pub fn parse(stream: &[u8]) -> Result<Self> {
let page_type = match stream[0] {
2 => BTreePage::InteriorIndex,
5 => BTreePage::InteriorTable,
10 => BTreePage::LeafIndex,
13 => BTreePage::LeafTable,
x => bail!("Invalid page value encountered: {}", x),
};
let first_free_block_start = u16::from_be_bytes(stream[1..3].try_into()?);
let number_of_cells = u16::from_be_bytes(stream[3..5].try_into()?);
let start_of_content_area = u16::from_be_bytes(stream[5..7].try_into()?);
let fragmented_free_bytes = stream[7];
let header = PageHeader {
page_type,
first_free_block_start,
number_of_cells,
start_of_content_area,
fragmented_free_bytes,
};
Ok(header)
}
}

@ -0,0 +1,4 @@
pub mod header;
pub mod record;
pub mod schema;
pub mod varint;

@ -0,0 +1,59 @@
use anyhow::{bail, Result};
use sqlite_starter_rust::{
header::PageHeader, record::parse_record, schema::Schema, varint::parse_varint,
};
use std::convert::TryInto;
use std::fs::File;
use std::io::prelude::*;
fn main() -> Result<()> {
// Parse arguments
let args = std::env::args().collect::<Vec<_>>();
match args.len() {
0 | 1 => bail!("Missing <database path> and <command>"),
2 => bail!("Missing <command>"),
_ => {}
}
// Read database file into database
let mut file = File::open(&args[1])?;
let mut database = Vec::new();
file.read_to_end(&mut database)?;
// Parse command and act accordingly
let command = &args[2];
match command.as_str() {
".dbinfo" => {
// Parse page header from database
let page_header = PageHeader::parse(&database[100..108])?;
// Obtain all cell pointers
let cell_pointers = database[108..]
.chunks_exact(2)
.take(page_header.number_of_cells.into())
.map(|bytes| u16::from_be_bytes(bytes.try_into().unwrap()))
.collect::<Vec<_>>();
// Obtain all records from column 5
#[allow(unused_variables)]
let schemas = cell_pointers
.into_iter()
.map(|cell_pointer| {
let stream = &database[cell_pointer as usize..];
let (_, offset) = parse_varint(stream);
let (_rowid, read_bytes) = parse_varint(&stream[offset..]);
parse_record(&stream[offset + read_bytes..], 5)
.map(|record| Schema::parse(record).expect("Invalid record"))
})
.collect::<Result<Vec<_>>>()?;
println!("Your code goes here!");
// Uncomment this block to pass the first stage
// println!("number of tables: {}", schemas.len());
}
_ => bail!("Missing or invalid command passed: {}", command),
}
Ok(())
}

@ -0,0 +1,42 @@
use crate::varint::parse_varint;
use anyhow::{bail, Result};
/// Reads SQLite's "Record Format" as mentioned here:
/// [record_format](https://www.sqlite.org/fileformat.html#record_format)
pub fn parse_record(stream: &[u8], column_count: usize) -> Result<Vec<Vec<u8>>> {
// Parse number of bytes in header, and use bytes_read as offset
let (_, mut offset) = parse_varint(stream);
// Read each varint into serial types and modify the offset
let mut serial_types = vec![];
for _ in 0..column_count {
let (varint, read_bytes) = parse_varint(&stream[offset..]);
offset += read_bytes;
serial_types.push(varint);
}
// Parse each serial type as column into record and modify the offset
let mut record = vec![];
for serial_type in serial_types {
let column = parse_column_value(&stream[offset..], serial_type as u8)?;
offset += column.len();
record.push(column);
}
Ok(record)
}
fn parse_column_value(stream: &[u8], serial_type: u8) -> Result<Vec<u8>> {
let column_value = match serial_type {
// 8 bit twos-complement integer
1 => vec![stream[0]],
// Text encoding
n if serial_type >= 13 && serial_type % 2 == 1 => {
let n_bytes = (n - 13) / 2;
let bytes = stream[0..n_bytes as usize].to_vec();
bytes
}
_ => bail!("Invalid serial_type: {}", serial_type),
};
Ok(column_value)
}

@ -0,0 +1,29 @@
#[derive(Debug)]
pub struct Schema {
kind: String,
name: String,
table_name: String,
root_page: u8,
sql: String,
}
impl Schema {
/// Parses a record into a schema
pub fn parse(record: Vec<Vec<u8>>) -> Option<Self> {
let mut items = record.into_iter();
let kind = items.next()?;
let name = items.next()?;
let table_name = items.next()?;
let root_page = *items.next()?.get(0)?;
let sql = items.next()?;
let schema = Self {
kind: String::from_utf8_lossy(&kind).to_string(),
name: String::from_utf8_lossy(&name).to_string(),
table_name: String::from_utf8_lossy(&table_name).to_string(),
root_page,
sql: String::from_utf8_lossy(&sql).to_string(),
};
Some(schema)
}
}

@ -0,0 +1,46 @@
const IS_FIRST_BIT_ZERO_MASK: u8 = 0b10000000;
const LAST_SEVEN_BITS_MASK: u8 = 0b01111111;
/// Parses SQLite's "varint" (short for variable-length integer) as mentioned here:
/// [varint](https://www.sqlite.org/fileformat2.html#varint)
///
/// Returns (varint, bytes_read)
pub fn parse_varint(stream: &[u8]) -> (usize, usize) {
let usable_bytes = read_usable_bytes(stream);
let bytes_read = usable_bytes.len();
let varint = usable_bytes
.into_iter()
.enumerate()
.fold(0, |value, (i, usable_byte)| {
let usable_size = if i == 8 { 8 } else { 7 };
(value << usable_size) + usable_value(usable_size, usable_byte) as usize
});
(varint, bytes_read)
}
/// Usable size is either 8 or 7
fn usable_value(usable_size: u8, byte: u8) -> u8 {
if usable_size == 8 {
usable_size
} else {
byte & LAST_SEVEN_BITS_MASK
}
}
fn read_usable_bytes(stream: &[u8]) -> Vec<u8> {
let mut usable_bytes = vec![];
for i in 0..8 {
let byte = stream[i];
usable_bytes.push(byte);
if starts_with_zero(byte) {
break;
}
}
usable_bytes
}
fn starts_with_zero(byte: u8) -> bool {
(byte & IS_FIRST_BIT_ZERO_MASK) == 0
}

@ -0,0 +1,12 @@
#!/bin/sh
#
# DON'T EDIT THIS!
#
# CodeCrafters uses this file to test your code. Don't make any changes here!
#
# DON'T EDIT THIS!
exec cargo run \
--quiet \
--release \
--target-dir=/tmp/codecrafters-sqlite-target \
--manifest-path "$(dirname "$0")/Cargo.toml" "$@"
Loading…
Cancel
Save