diff --git a/Cargo.lock b/Cargo.lock index 2f48fb3..79bf332 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,11 +48,28 @@ dependencies = [ "byteorder", ] +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "onebrc" version = "0.1.0" dependencies = [ "fxhash", + "memmap", "rayon", ] @@ -75,3 +92,25 @@ dependencies = [ "crossbeam-deque", "crossbeam-utils", ] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 5d2d6f0..c5e30c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,5 @@ edition = "2021" [dependencies] fxhash = "0.2.1" +memmap = "0.7.0" rayon = "1.8.0" diff --git a/README.md b/README.md index 01ee63d..c2965cb 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # One billion row challenge -* Fastest single threaded runtime: 29.7s (4.5s to read the file into memory, 24s to process rows and ~150micros to generate the output) +These tests were run on a machine with r9 5900x, 64gb memory. + + +* single threaded, no mmap: 29.7s (4.5s to read the file into memory, 24s to process rows and ~150micros to generate the output) + +* multi threaded, no mmap: read = 4.682276171s processed = 2.057576429s output_gen = 188.113µs + +* multi threaded + mmap: read = 5.75µs processed = 2.144708s output_gen = 276.73µs -* Fastest multi threaded runtime on a machine with 12 cores: read = 4.682276171s processed = 2.057576429s output_gen = 188.113µs diff --git a/src/main.rs b/src/main.rs index 676f712..75984d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,18 @@ use fxhash::FxHashMap; +use memmap::MmapOptions; use rayon::prelude::*; -use std::{fs::File, io::Read, time::Instant}; +use std::{fs::File, time::Instant}; fn main() { - let mut buf = Vec::new(); - let t1 = Instant::now(); - let mut file = File::open("./measurements.txt").expect("error in opening file"); + let file = File::open("./measurements.txt").expect("error in opening file"); - file.read_to_end(&mut buf) - .expect("error in reading file to memory"); + let buf = unsafe { + MmapOptions::new() + .map(&file) + .expect("error in mmaping file") + }; let t1_elapsed = t1.elapsed();