From 1e234756ef51a2f6e5107b5805761309f824da34 Mon Sep 17 00:00:00 2001 From: Ishan Jain Date: Tue, 13 Apr 2021 01:01:41 +0530 Subject: [PATCH] Added AVX2 enabled version of vec3 type, Updated readme --- Cargo.lock | 17 ++++ Cargo.toml | 1 + README.md | 7 ++ src/aabb.rs | 24 ++--- src/main.rs | 6 +- src/types/mod.rs | 10 +- src/types/simd_vec3.rs | 214 +++++++++++++++++++++++++++++++++++++++++ src/types/vec3.rs | 10 +- 8 files changed, 265 insertions(+), 24 deletions(-) create mode 100644 src/types/simd_vec3.rs diff --git a/Cargo.lock b/Cargo.lock index 42b1426..9e47148 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,6 +174,12 @@ version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" +[[package]] +name = "libm" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a" + [[package]] name = "memoffset" version = "0.6.1" @@ -243,6 +249,16 @@ dependencies = [ "libc", ] +[[package]] +name = "packed_simd_2" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3278e0492f961fd4ae70909f56b2723a7e8d01a228427294e19cdfdebda89a17" +dependencies = [ + "cfg-if 0.1.10", + "libm", +] + [[package]] name = "png" version = "0.16.8" @@ -332,6 +348,7 @@ version = "0.1.0" dependencies = [ "image", "num-traits", + "packed_simd_2", "rand", "rayon", "sdl2", diff --git a/Cargo.toml b/Cargo.toml index 3a1c286..c3a3875 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ version = "0.1.0" [dependencies] num-traits = "*" rayon = "1.5.0" +packed_simd = { version = "0.3.4", package = "packed_simd_2" } [dependencies.image] default-features = false diff --git a/README.md b/README.md index 8743982..e7065fa 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,13 @@ This is my attempt at Ray Tracing the Next Week book by Peter Shirley. + +On x86_64 target, It'll try to use AVX2. If you get build errors, Try commenting the simd_vec3 import in `src/types/mod.rs`. +I tried changing the cfg attribute to, `all(target_arch = "x86_64", target_feature = "avx2")` but it keeps reporting that `avx2` feature is disabled on my machine? + + +Without AVX2, The final scene takes ~33 seconds to render at a certain setting and with AVX2 the same scene takes ~23.5 seconds. + [_Ray Tracing: The Next Week_](https://raytracing.github.io/books/RayTracingTheNextWeek.html) # Renders diff --git a/src/aabb.rs b/src/aabb.rs index bbf7277..52b4b33 100644 --- a/src/aabb.rs +++ b/src/aabb.rs @@ -11,23 +11,17 @@ impl Aabb { Self { min, max } } - pub fn hit(&self, ray: &Ray, mut t_min: f64, mut t_max: f64) -> bool { - for i in 0..=2 { - let inverse_dir = 1.0 / ray.direction[i]; - let mut t0 = (self.min[i] - ray.origin[i]) * inverse_dir; - let mut t1 = (self.max[i] - ray.origin[i]) * inverse_dir; - if inverse_dir < 0.0 { - std::mem::swap(&mut t0, &mut t1); - } - t_min = if t0 > t_min { t0 } else { t_min }; - t_max = if t1 < t_max { t1 } else { t_max }; + pub fn hit(&self, ray: &Ray, t_min: f64, t_max: f64) -> bool { + let min = (self.min - ray.origin) / ray.direction; + let max = (self.max - ray.origin) / ray.direction; - if t_max <= t_min { - return false; - } - } + let mins = min.min(max); + let maxs = min.max(max); - true + let tmin = mins.max_element(t_min); + let tmax = maxs.min_element(t_max); + + tmax > tmin } pub fn surrounding_box(box0: Aabb, box1: Aabb) -> Self { diff --git a/src/main.rs b/src/main.rs index 48cedd7..26c8124 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,11 +22,11 @@ use std::time::Instant; pub trait Asf64: num_traits::AsPrimitive {} impl> Asf64 for T {} -const NUM_SAMPLES: u16 = 10000; +const NUM_SAMPLES: u16 = 500; const VERTICAL_PARTITION: usize = 12; const HORIZONTAL_PARTITION: usize = 12; -const WIDTH: usize = 1500; -const HEIGHT: usize = 1500; +const WIDTH: usize = 500; +const HEIGHT: usize = 500; fn main() -> Result<(), String> { run(WIDTH, HEIGHT) diff --git a/src/types/mod.rs b/src/types/mod.rs index 5fca40d..9f3c95b 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,9 +1,17 @@ mod color; mod dimension; mod ray; -mod vec3; pub use color::Color; pub use dimension::{Dimension, X, Y, Z}; pub use ray::Ray; + +#[cfg(not(target_arch = "x86_64"))] +mod vec3; +#[cfg(not(target_arch = "x86_64"))] pub use vec3::Vec3; + +#[cfg(target_arch = "x86_64")] +mod simd_vec3; +#[cfg(target_arch = "x86_64")] +pub use simd_vec3::Vec3; diff --git a/src/types/simd_vec3.rs b/src/types/simd_vec3.rs new file mode 100644 index 0000000..385c8e0 --- /dev/null +++ b/src/types/simd_vec3.rs @@ -0,0 +1,214 @@ +use std::{ + fmt::{Display, Formatter, Result as FmtResult}, + ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, RangeInclusive, Sub, SubAssign}, +}; + +use rand::Rng; + +use crate::{Asf64, Dimension, X, Y, Z}; + +use packed_simd::{f64x4, shuffle}; + +#[derive(Default, Debug, Copy, Clone)] +pub struct Vec3(f64x4); + +impl Vec3 { + #[inline] + pub fn new(a: impl Asf64, b: impl Asf64, c: impl Asf64) -> Vec3 { + Self(f64x4::new(a.as_(), b.as_(), c.as_(), 0.0)) + } + + pub fn splat(xyz: impl Asf64) -> Self { + Self::new(xyz, xyz, xyz) + } + + pub fn random(rng: &mut R) -> Self { + Self(f64x4::from_slice_aligned(&rng.gen::<[f64; 4]>())) + } + + pub fn random_in_range(rng: &mut R, range: RangeInclusive) -> Self { + Vec3::new( + rng.gen_range(range.clone()), + rng.gen_range(range.clone()), + rng.gen_range(range), + ) + } + + #[inline] + pub fn x(&self) -> f64 { + self.get::() + } + #[inline] + pub fn y(&self) -> f64 { + self.get::() + } + #[inline] + pub fn z(&self) -> f64 { + self.get::() + } + + pub fn get(&self) -> f64 { + unsafe { self.0.extract_unchecked(D::INDEX) } + } + + pub fn set(self, value: f64) -> Self { + Self(unsafe { self.0.replace_unchecked(D::INDEX, value) }) + } + + #[inline] + pub fn length(&self) -> f64 { + self.sq_len().sqrt() + } + + #[inline] + pub fn sq_len(&self) -> f64 { + (self.0 * self.0).sum() + } + + #[inline] + pub fn dot(&self, v: &Vec3) -> f64 { + (self.0 * v.0).sum() + } + + #[inline] + pub fn cross(&self, v: &Vec3) -> Vec3 { + // https://web.archive.org/web/20210412192227/https://geometrian.com/programming/tutorials/cross-product/index.php + let tmp0: f64x4 = shuffle!(self.0, [1, 2, 0, 3]); + let tmp1: f64x4 = shuffle!(v.0, [2, 0, 1, 3]); + let tmp2: f64x4 = shuffle!(self.0, [2, 0, 1, 3]); + let tmp3: f64x4 = shuffle!(v.0, [1, 2, 0, 3]); + + Vec3(tmp0 * tmp1 - tmp2 * tmp3) + } + + #[inline] + pub fn unit_vector(self) -> Vec3 { + self / self.length() + } + + pub fn min(self, other: Self) -> Vec3 { + Self(self.0.min(other.0)) + } + + pub fn max(self, other: Self) -> Vec3 { + Self(self.0.max(other.0)) + } + + pub fn min_element(self, other: f64) -> f64 { + unsafe { self.0.replace_unchecked(3, other).min_element() } + } + + pub fn max_element(self, other: f64) -> f64 { + unsafe { self.0.replace_unchecked(3, other).max_element() } + } + + #[inline] + pub fn sqrt(self) -> Self { + Self(self.0.sqrt()) + } +} + +impl Add for Vec3 { + type Output = Vec3; + + fn add(self, o: Vec3) -> Vec3 { + Vec3(self.0 + o.0) + } +} + +impl AddAssign for Vec3 { + fn add_assign(&mut self, o: Vec3) { + self.0 += o.0 + } +} + +impl Sub for Vec3 { + type Output = Vec3; + + fn sub(self, o: Vec3) -> Vec3 { + Vec3(self.0 - o.0) + } +} + +impl SubAssign for Vec3 { + fn sub_assign(&mut self, o: Vec3) { + self.0 -= o.0; + } +} + +impl Neg for Vec3 { + type Output = Vec3; + + fn neg(self) -> Vec3 { + Vec3(-self.0) + } +} + +impl MulAssign for Vec3 { + fn mul_assign(&mut self, o: Vec3) { + self.0 *= o.0 + } +} + +impl MulAssign for Vec3 { + fn mul_assign(&mut self, o: f64) { + self.0 *= o + } +} + +impl Mul for Vec3 { + type Output = Vec3; + fn mul(self, o: f64) -> Vec3 { + Vec3(self.0 * o) + } +} + +impl Mul for Vec3 { + type Output = Vec3; + fn mul(self, o: Vec3) -> Vec3 { + Vec3(self.0 * o.0) + } +} + +impl Div for Vec3 { + type Output = Vec3; + + fn div(self, o: Vec3) -> Vec3 { + Vec3(self.0 / o.0) + } +} + +impl Div for Vec3 { + type Output = Vec3; + + fn div(self, o: f64) -> Vec3 { + let o = 1.0 / o; + + Vec3(self.0 * o) + } +} + +impl DivAssign for Vec3 { + fn div_assign(&mut self, o: f64) { + let o = 1.0 / o; + + self.0 *= o + } +} + +impl Display for Vec3 { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + f.write_fmt(format_args!( + "{} {} {}", + self.get::(), + self.get::(), + self.get::() + )) + } +} + +impl From<(A, B, C)> for Vec3 { + fn from((x, y, z): (A, B, C)) -> Self { + Self::new(x, y, z) + } +} diff --git a/src/types/vec3.rs b/src/types/vec3.rs index 566e411..9ba86fc 100644 --- a/src/types/vec3.rs +++ b/src/types/vec3.rs @@ -16,7 +16,7 @@ pub struct Vec3([f64; 3]); impl Vec3 { #[inline] pub fn new(a: impl Asf64, b: impl Asf64, c: impl Asf64) -> Vec3 { - Vec3([a.as_(), b.as_(), c.as_()]) + Self([a.as_(), b.as_(), c.as_()]) } pub fn splat(xyz: impl Asf64) -> Self { @@ -102,12 +102,12 @@ impl Vec3 { ]) } - pub fn min_element(self) -> f64 { - self.x().min(self.y()).min(self.z()) + pub fn min_element(self, other: f64) -> f64 { + self.x().min(self.y()).min(self.z()).min(other) } - pub fn max_element(self) -> f64 { - self.x().max(self.y()).max(self.z()) + pub fn max_element(self, other: f64) -> f64 { + self.x().max(self.y()).max(self.z()).max(other) } #[inline]