feat: the entire thing

2023-06-10 22:20:47 +09:00 · 2023-06-10 22:20:47 +09:00 · b908b26dab
commit b908b26dab
8 changed files with 1695 additions and 0 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
+use flake
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+/target
+/.direnv
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,13 @@
+[package]
+name = "seedcracker"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+anyhow = "1.0.71"
+bytemuck = "1.13.1"
+md5 = "0.7.0"
+tokio = { version = "1.28.2", features = ["full"] }
+wgpu = "0.16.1"
--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,59 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1685518550,
+        "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1686338508,
+        "narHash": "sha256-F0bZVV5ChaduBQwAdee0o3zazmCufbXxn/teqsVRqXU=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "66c418d299cd454bd74644eaad905ec4ba2f81a1",
+        "type": "github"
+      },
+      "original": {
+        "id": "nixpkgs",
+        "type": "indirect"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/flake.nix
+++ b/flake.nix
@ -0,0 +1,24 @@
+{
+  inputs = {
+    nixpkgs.url = "nixpkgs";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs = { self, nixpkgs, flake-utils }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = nixpkgs.legacyPackages.${system};
+
+        libraries = with pkgs;[
+          libglvnd
+        ];
+      in
+      {
+        devShell = pkgs.mkShell {
+          shellHook =
+            ''
+              export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath libraries}:$LD_LIBRARY_PATH
+            '';
+        };
+      });
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,224 @@
+use std::borrow::Cow;
+
+use wgpu::{Instance, util::DeviceExt};
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    let seq_id = "minecraft:entities/blaze";
+    let batch_size = 512;
+    let mut base_seed = 0u64;
+
+    let mut seq_seed = md5::compute(seq_id).0;
+    seq_seed.reverse();
+
+    let instance = Instance::default();
+    let adapter = instance
+        .request_adapter(&wgpu::RequestAdapterOptions::default())
+        .await
+        .ok_or(anyhow::anyhow!("could not get adapter"))?;
+    let (device, queue) = adapter
+        .request_device(
+            &wgpu::DeviceDescriptor {
+                label: None,
+                features: wgpu::Features::empty(),
+                limits: wgpu::Limits::downlevel_defaults(),
+            },
+            None,
+        )
+        .await?;
+    let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+        label: None,
+        source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("main.wgsl"))),
+    });
+
+    // Instantiates buffer without data.
+    // `usage` of buffer specifies how it can be used:
+    //   `BufferUsages::MAP_READ` allows it to be read (outside the shader).
+    //   `BufferUsages::COPY_DST` allows it to be the destination of the copy.
+    let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+        label: None,
+        size: (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress,
+        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+        mapped_at_creation: false,
+    });
+
+    // Instantiates buffer without data.
+    // `usage` of buffer specifies how it can be used:
+    //   `BufferUsages::MAP_READ` allows it to be read (outside the shader).
+    //   `BufferUsages::COPY_DST` allows it to be the destination of the copy.
+    let staging_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+        label: None,
+        contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
+        usage: wgpu::BufferUsages::MAP_WRITE | wgpu::BufferUsages::COPY_SRC,
+    });
+
+    // Instantiates buffer with data (`numbers`).
+    // Usage allowing the buffer to be:
+    //   A storage buffer (can be bound within a bind group and thus available to a shader).
+    //   The destination of a copy.
+    //   The source of a copy.
+    let storage_buffer_0 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+        label: Some("Base seed"),
+        contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
+        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+    });
+
+    // Instantiates buffer with data (`numbers`).
+    // Usage allowing the buffer to be:
+    //   A storage buffer (can be bound within a bind group and thus available to a shader).
+    //   The destination of a copy.
+    //   The source of a copy.
+    let storage_buffer_1 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+        label: Some("Result Buffer"),
+        contents: &vec![0_u8; batch_size * 64 * std::mem::size_of::<u32>()],
+        usage: wgpu::BufferUsages::STORAGE
+            | wgpu::BufferUsages::COPY_SRC,
+    });
+
+    // Instantiates buffer with data (`numbers`).
+    // Usage allowing the buffer to be:
+    //   A storage buffer (can be bound within a bind group and thus available to a shader).
+    //   The destination of a copy.
+    //   The source of a copy.
+    let storage_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+        label: Some("Sequence MD5"),
+        contents: &seq_seed,
+        usage: wgpu::BufferUsages::STORAGE,
+    });
+
+    // A bind group defines how buffers are accessed by shaders.
+    // It is to WebGPU what a descriptor set is to Vulkan.
+    // `binding` here refers to the `binding` of a buffer in the shader (`layout(set = 0, binding = 0) buffer`).
+
+    // A pipeline specifies the operation of a shader
+
+    // Instantiates the pipeline.
+    let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+        label: None,
+        layout: None,
+        module: &cs_module,
+        entry_point: "main",
+    });
+
+    // Instantiates the bind group, once again specifying the binding of buffers.
+    let bind_group_0_layout = compute_pipeline.get_bind_group_layout(0);
+    let bind_group_0 = device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_0_layout,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: storage_buffer_0.as_entire_binding(),
+        }],
+    });
+
+    // Instantiates the bind group, once again specifying the binding of buffers.
+    let bind_group_1_layout = compute_pipeline.get_bind_group_layout(1);
+    let bind_group_1 = device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_1_layout,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: storage_buffer_1.as_entire_binding(),
+        }],
+    });
+
+    // Instantiates the bind group, once again specifying the binding of buffers.
+    let bind_group_2_layout = compute_pipeline.get_bind_group_layout(2);
+    let bind_group_2 = device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: None,
+        layout: &bind_group_2_layout,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: storage_buffer_2.as_entire_binding(),
+        }],
+    });
+
+    loop {
+        // println!("testing seeds {}", base_seeds[0]);
+        // A command encoder executes one or many pipelines.
+        // It is to WebGPU what a command buffer is to Vulkan.
+        let mut encoder =
+            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        
+        encoder.copy_buffer_to_buffer(&staging_buffer_2, 0, &storage_buffer_0, 0, (std::mem::size_of::<u64>()) as wgpu::BufferAddress);
+
+        {
+            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
+            cpass.set_pipeline(&compute_pipeline);
+            cpass.set_bind_group(0, &bind_group_0, &[]);
+            cpass.set_bind_group(1, &bind_group_1, &[]);
+            cpass.set_bind_group(2, &bind_group_2, &[]);
+            cpass.dispatch_workgroups(batch_size as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
+        }
+        // Sets adds copy operation to command encoder.
+        // Will copy data from storage buffer on GPU to staging buffer on CPU.
+        encoder.copy_buffer_to_buffer(&storage_buffer_1, 0, &staging_buffer, 0, (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress);
+
+        // Submits command encoder for processing
+        queue.submit(Some(encoder.finish()));
+
+        // Note that we're not calling `.await` here.
+        let buffer_slice = staging_buffer.slice(..);
+        // Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
+        let (sender, receiver) = tokio::sync::oneshot::channel();
+        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
+
+        // Poll the device in a blocking manner so that our future resolves.
+        // In an actual application, `device.poll(...)` should
+        // be called in an event loop or on another thread.
+        device.poll(wgpu::Maintain::Wait);
+
+        // Awaits until `buffer_future` can be read from
+        receiver.await??;
+        // Gets contents of buffer
+        let data = buffer_slice.get_mapped_range();
+        // Since contents are got in bytes, this converts these bytes back to u32
+        let result: Vec<i32> = bytemuck::cast_slice(&data).to_vec();
+
+        // With the current interface, we have to make sure all mapped views are
+        // dropped before we unmap the buffer.
+        drop(data);
+        staging_buffer.unmap(); // Unmaps buffer from memory
+                                // If you are familiar with C++ these 2 lines can be thought of similarly to:
+                                //   delete myPointer;
+                                //   myPointer = NULL;
+                                // It effectively frees the memory
+
+        // Returns data from buffer
+        for (i, n) in result.into_iter().enumerate() {
+            if n == 0 {
+                let seed = base_seed + i as u64;
+                println!("{:?}", seed);
+                std::process::exit(0);
+            }
+        }
+        
+        base_seed += batch_size as u64 * 64;
+
+        let buffer_slice = staging_buffer_2.slice(..);
+        // Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
+        let (sender, receiver) = tokio::sync::oneshot::channel();
+        buffer_slice.map_async(wgpu::MapMode::Write, move |v| sender.send(v).unwrap());
+
+        // Poll the device in a blocking manner so that our future resolves.
+        // In an actual application, `device.poll(...)` should
+        // be called in an event loop or on another thread.
+        device.poll(wgpu::Maintain::Wait);
+
+        // Awaits until `buffer_future` can be read from
+        receiver.await??;
+        let mut data = buffer_slice.get_mapped_range_mut();
+        data.copy_from_slice(bytemuck::cast_slice(&base_seed.to_ne_bytes()));
+
+        // With the current interface, we have to make sure all mapped views are
+        // dropped before we unmap the buffer.
+        drop(data);
+        staging_buffer_2.unmap(); // Unmaps buffer from memory
+                                // If you are familiar with C++ these 2 lines can be thought of similarly to:
+                                //   delete myPointer;
+                                //   myPointer = NULL;
+                                // It effectively frees the memory
+        
+        
+    }
+}
--- a/src/main.wgsl
+++ b/src/main.wgsl
@ -0,0 +1,229 @@
+struct U64 {
+    hi: u32,
+    lo: u32,
+}
+
+struct Xoroshiro128PlusPlusState {
+    s0: U64,
+    s1: U64,
+}
+
+fn rotl17(x: U64) -> U64 {
+    return U64(
+        (x.hi << 17u) | (x.lo >> 15u),
+        (x.hi >> 15u) | (x.lo << 17u)
+    );
+}
+
+fn rotl28(x: U64) -> U64 {
+    return U64(
+        (x.hi << 28u) | (x.lo >> 4u),
+        (x.hi >> 4u) | (x.lo << 28u)
+    );
+}
+
+fn rotl49(x: U64) -> U64 {
+    return U64(
+        (x.lo << 17u) | (x.hi >> 15u),
+        (x.lo >> 15u) | (x.hi << 17u)
+    );
+}
+
+fn add(lhs: U64, rhs: U64) -> U64 {
+    if (((lhs.lo >> 1u) + (rhs.lo >> 1u) + ((lhs.lo & 1u) & (rhs.lo & 1u))) >> 31u) == 1u {
+        return U64(
+            lhs.hi + rhs.hi + 1u,
+            lhs.lo + rhs.lo
+        );
+    } else {
+        return U64(
+            lhs.hi + rhs.hi,
+            lhs.lo + rhs.lo
+        );
+    }
+}
+
+fn next(state: ptr<function, Xoroshiro128PlusPlusState>) -> U64 {
+    let ret: U64 = add(rotl17(add((*state).s0, (*state).s1)), (*state).s0);
+
+    (*state).s1.hi ^= (*state).s0.hi;
+    (*state).s1.lo ^= (*state).s0.lo;
+
+    (*state).s0 = rotl49((*state).s0);
+    (*state).s0.hi ^= (*state).s1.hi ^ (((*state).s1.hi << 21u) | ((*state).s1.lo >> 11u));
+    (*state).s0.lo ^= (*state).s1.lo ^ ((*state).s1.lo << 21u);
+
+    (*state).s1 = rotl28((*state).s1);
+
+    return ret;
+}
+
+fn inc(x: U64) -> U64 {
+    return add(x, U64(0u, 1u));
+}
+
+fn inv(x: U64) -> U64 {
+    return U64(~x.hi, ~x.lo);
+}
+
+fn neg(x: U64) -> U64 {
+    return inc(inv(x));
+}
+
+fn abs64(x: U64) -> U64 {
+    if (x.hi >> 31u) == 1u {
+        return neg(x);
+    } else {
+        return x;
+    }
+}
+
+fn smul(lhs: U64, rhs: U64) -> U64 {
+    let sign = ((rhs.hi >> 31u) ^ (lhs.hi >> 31u)) == 1u;
+    let lhs = abs64(lhs);
+    let rhs = abs64(rhs);
+    if sign {
+        return neg(umul(lhs, rhs));
+    } else {
+        return umul(lhs, rhs);
+    }
+}
+
+fn sftr(lhs: U64, rhs: u32) -> U64 {
+    return U64(
+        (lhs.hi << rhs) | (lhs.lo >> (32u - rhs)),
+        lhs.lo << rhs
+    );
+}
+
+fn umul32_hi(lhs: u32, rhs: u32) -> u32 {
+    let lhs_hi = lhs >> 16u;
+    let lhs_lo = lhs & 0xFFFFu;
+    let rhs_hi = rhs >> 16u;
+    let rhs_lo = rhs & 0xFFFFu;
+
+    let hi_hi = lhs_hi * rhs_hi;
+    let hi_lo = lhs_hi * rhs_lo;
+    let lo_hi = lhs_lo * rhs_hi;
+    let lo_lo = lhs_lo * rhs_lo;
+
+    return hi_hi + (hi_lo >> 16u) + (lo_hi >> 16u) + (((lo_lo >> 16u) + (hi_lo & 0xFFFFu) + (lo_hi & 0xFFFFu)) >> 16u);
+}
+
+fn umul(lhs: U64, rhs: U64) -> U64 {
+    let hi1 = lhs.hi * rhs.lo;
+    let hi2 = lhs.lo * rhs.hi;
+
+    let lo = lhs.lo * rhs.lo;
+    let hi = hi1 + hi2 + umul32_hi(lhs.lo, rhs.lo);
+
+    return U64(hi, lo);
+}
+
+fn sftl30(x: U64) -> U64 {
+    return U64(
+        (x.hi >> 30u),
+        (x.hi << 2u) | (x.lo >> 30u)
+    );
+}
+
+fn sftl27(x: U64) -> U64 {
+    return U64(
+        (x.hi >> 27u),
+        (x.hi << 5u) | (x.lo >> 27u)
+    );
+}
+
+fn sftl31(x: U64) -> U64 {
+    return U64(
+        (x.hi >> 31u),
+        (x.hi << 1u) | (x.lo >> 31u)
+    );
+}
+
+fn xor(lhs: U64, rhs: U64) -> U64 {
+    return U64(
+        lhs.hi ^ rhs.hi,
+        lhs.lo ^ rhs.lo
+    );
+}
+
+fn mix_stafford_13(x: U64) -> U64 {
+    let x1 = smul(xor(x, sftl30(x)), U64(0xbf58476du, 0x1ce4e5b9u));
+    let x2 = smul(xor(x1, sftl27(x1)), U64(0x94d049bbu, 0x133111ebu));
+    return xor(x2, sftl31(x2));
+}
+
+fn derive_from_world(seed: U64) -> Xoroshiro128PlusPlusState {
+    let s0 = U64(seed.hi ^ 0x6a09e667u, seed.lo ^ 0xf3bcc909u);
+    let s1 = add(s0, U64(0x9e3779b9u, 0x7f4a7c15u));
+    return Xoroshiro128PlusPlusState(
+        mix_stafford_13(xor(s0, U64(seq_hash.w, seq_hash.z))),
+        mix_stafford_13(xor(s1, U64(seq_hash.y, seq_hash.x)))
+    );
+}
+
+fn to_vec2(x: U64) -> vec2<u32> {
+    return vec2(x.lo, x.hi);
+}
+
+@group(0)
+@binding(0)
+var<storage, read> base_seed: vec2<u32>;
+
+@group(1)
+@binding(0)
+var<storage, read_write> results: array<u32>;
+
+@group(2)
+@binding(0)
+var<storage, read> seq_hash: vec4<u32>;
+
+@compute
+@workgroup_size(64)
+fn main(
+    @builtin(local_invocation_index) local_invocation_index: u32,
+    @builtin(workgroup_id) workgroup_id: vec3<u32>,
+    @builtin(num_workgroups) num_workgroups: vec3<u32>,
+) {
+    let workgroup_idx = workgroup_id.x;
+    let world_seed = add(U64(base_seed.y, base_seed.x), U64(0u, local_invocation_index + (workgroup_idx * 64u)));
+    var state = derive_from_world(world_seed);
+    results[local_invocation_index + (workgroup_idx * 64u)] = 0u;
+    for (var i: u32 = 0u; i < 32u; i++) {
+        let value = next_int_with_max(&state, 2u);
+        if value != 1u {
+            results[local_invocation_index + (workgroup_idx * 64u)] = 1u;
+            break;
+        }
+    }
+}
+
+fn lt(lhs: U64, rhs: u32) -> bool {
+    if lhs.hi == 0u {
+        return lhs.lo < rhs;
+    } else {
+        return false;
+    }
+}
+
+fn next_int(state: ptr<function, Xoroshiro128PlusPlusState>) -> i32 {
+    return i32(next(state).lo);
+}
+
+fn next_int_with_max(state: ptr<function, Xoroshiro128PlusPlusState>, max: u32) -> u32 {
+    var l: U64 = U64(0u, next(state).lo);
+    var m: U64 = smul(l, U64(0u, max));
+    var n: U64 = U64(0u, m.lo);
+    if (lt(n, max)) {
+        for(var j: u32 = (((~max) + 1u) % max); lt(n, j); n = U64(0u, m.lo)) {
+            l = U64(0u, next(state).lo);
+            m = smul(l, U64(0u, max));
+        }
+    }
+    return m.hi;
+}
+
+fn next_float(state: ptr<function, Xoroshiro128PlusPlusState>) -> f32 {
+    return f32(next(state).hi >> 8u) * 5.9604645E-8;
+}