summaryrefslogtreecommitdiff
path: root/exercises/112_vectors.zig
diff options
context:
space:
mode:
authorChris Boesch <chrboesch@noreply.codeberg.org>2026-04-07 09:18:37 +0200
committerChris Boesch <chrboesch@noreply.codeberg.org>2026-04-07 09:18:37 +0200
commit7cb7a9948a9f5c9965fedd59f0cd816f28962fe4 (patch)
tree1953e653faba351ab103ec030eacbfc79b7dae6c /exercises/112_vectors.zig
parent1166f3cfb65d7edc9086c6ba6b8edd4754964b33 (diff)
parent3574fd3ae037f34510c6bb6657332b57447ac5b1 (diff)
Merge branch 'main' into fix-060
Diffstat (limited to 'exercises/112_vectors.zig')
-rw-r--r--exercises/112_vectors.zig147
1 files changed, 147 insertions, 0 deletions
diff --git a/exercises/112_vectors.zig b/exercises/112_vectors.zig
new file mode 100644
index 0000000..96892ca
--- /dev/null
+++ b/exercises/112_vectors.zig
@@ -0,0 +1,147 @@
+// So far in Ziglings, we've seen how for loops can be used to
+// repeat calculations across an array in several ways.
+//
+// For loops are generally great for this kind of task, but
+// sometimes they don't fully utilize the capabilities of the
+// CPU.
+//
+// Most modern CPUs can execute instructions in which SEVERAL
+// calculations are performed WITHIN registers at the SAME TIME.
+// These are known as "single instruction, multiple data" (SIMD)
+// instructions. SIMD instructions can make code significantly
+// more performant.
+//
+// To see why, imagine we have a program in which we take the
+// square root of four (changing) f32 floats.
+//
+// A simple compiler would take the program and produce machine code
+// which calculates each square root sequentially. Most registers on
+// modern CPUs have 64 bits, so we could imagine that each float moves
+// into a 64-bit register, and the following happens four times:
+//
+// 32 bits 32 bits
+// +-------------------+
+// register | 0 | x |
+// +-------------------+
+//
+// |
+// [SQRT instruction]
+// V
+//
+// +-------------------+
+// | 0 | sqrt(x) |
+// +-------------------+
+//
+// Notice that half of the register contains blank data to which
+// nothing happened. What a waste! What if we were able to use
+// that space instead? This is the idea at the core of SIMD.
+//
+// Most modern CPUs contain specialized registers with at least 128 bits
+// for performing SIMD instructions. On a machine with 128-bit SIMD
+// registers, a smart compiler would probably NOT issue four sqrt
+// instructions as above, but instead pack the floats into a single
+// 128-bit register, then execute a single "packed" sqrt
+// instruction to do ALL the square root calculations at once.
+//
+// For example:
+//
+//
+// 32 bits 32 bits 32 bits 32 bits
+// +---------------------------------------+
+// register | 4.0 | 9.0 | 25.0 | 49.0 |
+// +---------------------------------------+
+//
+// |
+// [SIMD SQRT instruction]
+// V
+//
+// +---------------------------------------+
+// register | 2.0 | 3.0 | 5.0 | 7.0 |
+// +---------------------------------------+
+//
+// Pretty cool, right?
+//
+// Code with SIMD instructions is usually more performant than code
+// without SIMD instructions. Zig cares a lot about performance,
+// so it has built-in support for SIMD! It has a data structure that
+// directly supports SIMD instructions:
+//
+// +-----------+
+// | Vectors |
+// +-----------+
+//
+// Operations performed on vectors in Zig will be done in parallel using
+// SIMD instructions, whenever possible.
+//
+// Defining vectors in Zig is straightforwards. No library import is needed.
+const v1 = @Vector(3, i32){ 1, 10, 100 };
+const v2 = @Vector(3, f32){ 2.0, 3.0, 5.0 };
+
+// Vectors support the same builtin operators as their underlying base types.
+const v3 = v1 + v1; // { 2, 20, 200};
+const v4 = v2 * v2; // { 4.0, 9.0, 25.0};
+
+// Intrinsics that apply to base types usually extend to vectors.
+const v5: @Vector(3, f32) = @floatFromInt(v3); // { 2.0, 20.0, 200.0}
+const v6 = v4 - v5; // { 2.0, -11.0, -175.0}
+const v7 = @abs(v6); // { 2.0, 11.0, 175.0}
+
+// We can make constant vectors, and reduce vectors.
+const v8: @Vector(4, u8) = @splat(2); // { 2, 2, 2, 2}
+const v8_sum = @reduce(.Add, v8); // 8
+const v8_min = @reduce(.Min, v8); // 2
+
+// Fixed-length arrays can be automatically assigned to vectors (and vice-versa).
+const single_digit_primes = [4]i8{ 2, 3, 5, 7 };
+const prime_vector: @Vector(4, i8) = single_digit_primes;
+
+// Now let's use vectors to simplify and optimize some code!
+//
+// Ewa is writing a program in which they frequently want to compare
+// two lists of four f32s. Ewa expects the lists to be similar, and
+// wants to determine the largest pairwise difference between the lists.
+//
+// Ewa wrote the following function to figure this out.
+
+fn calcMaxPairwiseDiffOld(list1: [4]f32, list2: [4]f32) f32 {
+ var max_diff: f32 = 0;
+ for (list1, list2) |n1, n2| {
+ const abs_diff = @abs(n1 - n2);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ return max_diff;
+}
+
+// Ewa heard about vectors in Zig, and started writing a new vector
+// version of the function, but has got stuck!
+//
+// Help Ewa finish the vector version! The examples above should help.
+
+const Vec4 = @Vector(4, f32);
+fn calcMaxPairwiseDiffNew(a: Vec4, b: Vec4) f32 {
+ const abs_diff_vec = ???;
+ const max_diff = @reduce(???, abs_diff_vec);
+ return max_diff;
+}
+
+// Quite the simplification! We could even write the function in one line
+// and it would still be readable.
+//
+// Since the entire function is now expressed in terms of vector operations,
+// the Zig compiler will easily be able to compile it down to machine code
+// which utilizes the all-powerful SIMD instructions and does a lot of the
+// computation in parallel.
+
+const std = @import("std");
+const print = std.debug.print;
+
+pub fn main() void {
+ const l1 = [4]f32{ 3.141, 2.718, 0.577, 1.000 };
+ const l2 = [4]f32{ 3.154, 2.707, 0.591, 0.993 };
+ const mpd_old = calcMaxPairwiseDiffOld(l1, l2);
+ const mpd_new = calcMaxPairwiseDiffNew(l1, l2);
+ print("Max difference (old fn): {d: >5.3}\n", .{mpd_old});
+ print("Max difference (new fn): {d: >5.3}\n", .{mpd_new});
+}