fearless_simd API Reference

Overview

fearless_simd provides safe, portable SIMD abstractions for Rust. Core pattern: generic functions with S: Simd bounds, dispatched via dispatch! macro.

Quick Start

rust

use fearless_simd::{Level, dispatch, prelude::*};

#[inline(always)]  // REQUIRED for SIMD functions
fn process<S: Simd>(simd: S, data: &[f32]) -> f32 {
    let v = f32x4::from_slice(simd, &data[..4]);
    let squared = v * v;
    squared[0] + squared[1] + squared[2] + squared[3]
}

let level = Level::new();
let result = dispatch!(level, simd => process(simd, &data));

Core Types

Type	Description
`Level`	Runtime SIMD capability detection
`S: Simd`	Generic bound for SIMD token (Avx2, Sse4_2, Neon, etc.)
`S::f32s`, `S::i32s`	Native-width vectors (adapts to platform)
`f32x4<S>`, `i32x8<S>`	Fixed-width vectors

Vector Construction

All constructors take the simd token first:

rust

// Splat scalar to all lanes
let v = f32x4::splat(simd, 1.0);

// From slice (must be exact size)
let v = f32x4::from_slice(simd, &slice[..4]);

// From array via simd_into
let v: f32x4<S> = [1.0, 2.0, 3.0, 4.0].simd_into(simd);

// Element-wise function
let v = f32x4::from_fn(simd, |i| i as f32);

Element Access

rust

let val = v[0];             // Read element
v[1] = 2.0;                 // Write element (if mutable)
let slice = v.as_slice();   // Get &[Element] slice
v.store_slice(&mut out);    // Store to slice
out.copy_from_slice(v.as_slice());  // Alternative store

Operations

Arithmetic (float & int)

+, -, *, / (float only), unary -

Float-specific

rust

v.abs()                    // Absolute value
v.sqrt()                   // Square root
v.mul_add(a, b)            // Fused multiply-add: v*a + b
v.floor() / .ceil() / .round_ties_even() / .trunc()
v.min(other) / .max(other)

Integer-specific

rust

v.shl(n) / .shr(n)         // Shift by constant
v.shlv(amounts)            // Variable shift per lane
v & other / v | other      // Bitwise

Comparisons (return masks)

rust

let mask = a.simd_lt(b);   // Less than
let mask = a.simd_eq(b);   // Equal
// Also: simd_le, simd_gt, simd_ge

Conditional Selection

rust

let result = mask.select(if_true, if_false);
if mask.any_true() { /* at least one lane */ }
if mask.all_true() { /* every lane */ }

Native-Width Pattern

Use S::f32s for platform-optimal width (128-bit on most, 256-bit on AVX2):

rust

#[inline(always)]
fn sum_simd<S: Simd>(simd: S, data: &[f32]) -> f32
where
    S::f32s: SimdFloat<S>,
{
    let n = S::f32s::N;  // Lane count (4 or 8)
    let mut acc = S::f32s::splat(simd, 0.0);

    for chunk in data.chunks_exact(n) {
        let v = S::f32s::from_slice(simd, chunk);
        acc = acc + v;
    }

    // Horizontal sum via as_slice()
    acc.as_slice().iter().sum()
}

Interleaving & Shuffling

rust

// Zip: interleave elements
let (lo, hi) = a.zip_low(b), a.zip_high(b);

// Unzip: deinterleave
let (evens, odds) = a.unzip_low(b), a.unzip_high(b);

// Slide: concatenate and extract (like x86 alignr)
let shifted = a.slide::<2>(b);  // [a2,a3,b0,b1]

Type Conversions

rust

// Float <-> Int
let ints: i32x4<S> = floats.to_int_precise();  // Rounds to nearest
let floats: f32x4<S> = ints.to_float();

// Bitcast (reinterpret bits)
let as_ints: u32x4<S> = floats.bitcast();

// Width change
let wide = narrow.combine(narrow2);  // Double width
let (a, b) = wide.split();           // Halve width

Dispatch Pattern

rust

let level = Level::new();  // Detect once, reuse

// Method 1: dispatch! macro
dispatch!(level, simd => my_function(simd, args));

// Method 2: match on level
match level {
    Level::Avx2(simd) => simd.vectorize(|| process(simd, data)),
    Level::Sse4_2(simd) => simd.vectorize(|| process(simd, data)),
    // ... other levels
}

Common Mistakes

Mistake	Fix
Missing `#[inline(always)]`	Add to all SIMD functions
`from_slice(slice)`	`from_slice(simd, slice)` - token first
Using `f32x4` when `S::f32s` fits	Prefer native width for performance
Forgetting remainder handling	Use `chunks_exact()` + handle `.remainder()`
Calling `Level::new()` in hot loop	Detect once outside loop

Feature Flags

•std (default): Runtime detection via Level::new()
•libm: No-std float operations
•force_support_fallback: Enable scalar fallback for testing