mmap optimization, updated readme

This commit is contained in:
Ishan Jain 2024-01-11 04:19:30 +05:30
parent 4fdef18833
commit 05a3283daa
Signed by: ishan
GPG Key ID: 0506DB2A1CC75C27
4 changed files with 56 additions and 8 deletions

39
Cargo.lock generated
View File

@ -48,11 +48,28 @@ dependencies = [
"byteorder", "byteorder",
] ]
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
[[package]]
name = "memmap"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "onebrc" name = "onebrc"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"fxhash", "fxhash",
"memmap",
"rayon", "rayon",
] ]
@ -75,3 +92,25 @@ dependencies = [
"crossbeam-deque", "crossbeam-deque",
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -5,4 +5,5 @@ edition = "2021"
[dependencies] [dependencies]
fxhash = "0.2.1" fxhash = "0.2.1"
memmap = "0.7.0"
rayon = "1.8.0" rayon = "1.8.0"

View File

@ -1,5 +1,11 @@
# One billion row challenge # One billion row challenge
* Fastest single threaded runtime: 29.7s (4.5s to read the file into memory, 24s to process rows and ~150micros to generate the output) These tests were run on a machine with r9 5900x, 64gb memory.
* single threaded, no mmap: 29.7s (4.5s to read the file into memory, 24s to process rows and ~150micros to generate the output)
* multi threaded, no mmap: read = 4.682276171s processed = 2.057576429s output_gen = 188.113µs
* multi threaded + mmap: read = 5.75µs processed = 2.144708s output_gen = 276.73µs
* Fastest multi threaded runtime on a machine with 12 cores: read = 4.682276171s processed = 2.057576429s output_gen = 188.113µs

View File

@ -1,16 +1,18 @@
use fxhash::FxHashMap; use fxhash::FxHashMap;
use memmap::MmapOptions;
use rayon::prelude::*; use rayon::prelude::*;
use std::{fs::File, io::Read, time::Instant}; use std::{fs::File, time::Instant};
fn main() { fn main() {
let mut buf = Vec::new();
let t1 = Instant::now(); let t1 = Instant::now();
let mut file = File::open("./measurements.txt").expect("error in opening file"); let file = File::open("./measurements.txt").expect("error in opening file");
file.read_to_end(&mut buf) let buf = unsafe {
.expect("error in reading file to memory"); MmapOptions::new()
.map(&file)
.expect("error in mmaping file")
};
let t1_elapsed = t1.elapsed(); let t1_elapsed = t1.elapsed();