feat: the entire thing

2023-06-10 22:20:47 +09:00 · 2023-06-10 22:20:47 +09:00 · b908b26dab
commit b908b26dab
8 changed files with 1695 additions and 0 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
 use flake
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 /target
 /.direnv
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,13 @@
 [package]
 name = "seedcracker"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 anyhow = "1.0.71"
 bytemuck = "1.13.1"
 md5 = "0.7.0"
 tokio = { version = "1.28.2", features = ["full"] }
 wgpu = "0.16.1"
--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,59 @@
 {
  "nodes": {
    "flake-utils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
        "lastModified": 1685518550,
        "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1686338508,
        "narHash": "sha256-F0bZVV5ChaduBQwAdee0o3zazmCufbXxn/teqsVRqXU=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "66c418d299cd454bd74644eaad905ec4ba2f81a1",
        "type": "github"
      },
      "original": {
        "id": "nixpkgs",
        "type": "indirect"
      }
    },
    "root": {
      "inputs": {
        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs"
      }
    },
    "systems": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "owner": "nix-systems",
        "repo": "default",
        "type": "github"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/flake.nix
+++ b/flake.nix
@ -0,0 +1,24 @@
 {
  inputs = {
    nixpkgs.url = "nixpkgs";
    flake-utils.url = "github:numtide/flake-utils";
  };
  outputs = { self, nixpkgs, flake-utils }:
    flake-utils.lib.eachDefaultSystem (system:
      let
        pkgs = nixpkgs.legacyPackages.${system};
        libraries = with pkgs;[
          libglvnd
        ];
      in
      {
        devShell = pkgs.mkShell {
          shellHook =
            ''
              export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath libraries}:$LD_LIBRARY_PATH
            '';
        };
      });
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,224 @@
 use std::borrow::Cow;
 use wgpu::{Instance, util::DeviceExt};
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    let seq_id = "minecraft:entities/blaze";
    let batch_size = 512;
    let mut base_seed = 0u64;
    let mut seq_seed = md5::compute(seq_id).0;
    seq_seed.reverse();
    let instance = Instance::default();
    let adapter = instance
        .request_adapter(&wgpu::RequestAdapterOptions::default())
        .await
        .ok_or(anyhow::anyhow!("could not get adapter"))?;
    let (device, queue) = adapter
        .request_device(
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
                limits: wgpu::Limits::downlevel_defaults(),
            },
            None,
        )
        .await?;
    let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
        label: None,
        source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("main.wgsl"))),
    });
    // Instantiates buffer without data.
    // `usage` of buffer specifies how it can be used:
    //   `BufferUsages::MAP_READ` allows it to be read (outside the shader).
    //   `BufferUsages::COPY_DST` allows it to be the destination of the copy.
    let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
        label: None,
        size: (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress,
        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
        mapped_at_creation: false,
    });
    // Instantiates buffer without data.
    // `usage` of buffer specifies how it can be used:
    //   `BufferUsages::MAP_READ` allows it to be read (outside the shader).
    //   `BufferUsages::COPY_DST` allows it to be the destination of the copy.
    let staging_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
        label: None,
        contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
        usage: wgpu::BufferUsages::MAP_WRITE | wgpu::BufferUsages::COPY_SRC,
    });
    // Instantiates buffer with data (`numbers`).
    // Usage allowing the buffer to be:
    //   A storage buffer (can be bound within a bind group and thus available to a shader).
    //   The destination of a copy.
    //   The source of a copy.
    let storage_buffer_0 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
        label: Some("Base seed"),
        contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
    });
    // Instantiates buffer with data (`numbers`).
    // Usage allowing the buffer to be:
    //   A storage buffer (can be bound within a bind group and thus available to a shader).
    //   The destination of a copy.
    //   The source of a copy.
    let storage_buffer_1 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
        label: Some("Result Buffer"),
        contents: &vec![0_u8; batch_size * 64 * std::mem::size_of::<u32>()],
        usage: wgpu::BufferUsages::STORAGE
            | wgpu::BufferUsages::COPY_SRC,
    });
    // Instantiates buffer with data (`numbers`).
    // Usage allowing the buffer to be:
    //   A storage buffer (can be bound within a bind group and thus available to a shader).
    //   The destination of a copy.
    //   The source of a copy.
    let storage_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
        label: Some("Sequence MD5"),
        contents: &seq_seed,
        usage: wgpu::BufferUsages::STORAGE,
    });
    // A bind group defines how buffers are accessed by shaders.
    // It is to WebGPU what a descriptor set is to Vulkan.
    // `binding` here refers to the `binding` of a buffer in the shader (`layout(set = 0, binding = 0) buffer`).
    // A pipeline specifies the operation of a shader
    // Instantiates the pipeline.
    let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
        label: None,
        layout: None,
        module: &cs_module,
        entry_point: "main",
    });
    // Instantiates the bind group, once again specifying the binding of buffers.
    let bind_group_0_layout = compute_pipeline.get_bind_group_layout(0);
    let bind_group_0 = device.create_bind_group(&wgpu::BindGroupDescriptor {
        label: None,
        layout: &bind_group_0_layout,
        entries: &[wgpu::BindGroupEntry {
            binding: 0,
            resource: storage_buffer_0.as_entire_binding(),
        }],
    });
    // Instantiates the bind group, once again specifying the binding of buffers.
    let bind_group_1_layout = compute_pipeline.get_bind_group_layout(1);
    let bind_group_1 = device.create_bind_group(&wgpu::BindGroupDescriptor {
        label: None,
        layout: &bind_group_1_layout,
        entries: &[wgpu::BindGroupEntry {
            binding: 0,
            resource: storage_buffer_1.as_entire_binding(),
        }],
    });
    // Instantiates the bind group, once again specifying the binding of buffers.
    let bind_group_2_layout = compute_pipeline.get_bind_group_layout(2);
    let bind_group_2 = device.create_bind_group(&wgpu::BindGroupDescriptor {
        label: None,
        layout: &bind_group_2_layout,
        entries: &[wgpu::BindGroupEntry {
            binding: 0,
            resource: storage_buffer_2.as_entire_binding(),
        }],
    });
    loop {
        // println!("testing seeds {}", base_seeds[0]);
        // A command encoder executes one or many pipelines.
        // It is to WebGPU what a command buffer is to Vulkan.
        let mut encoder =
            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
        encoder.copy_buffer_to_buffer(&staging_buffer_2, 0, &storage_buffer_0, 0, (std::mem::size_of::<u64>()) as wgpu::BufferAddress);
        {
            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
            cpass.set_pipeline(&compute_pipeline);
            cpass.set_bind_group(0, &bind_group_0, &[]);
            cpass.set_bind_group(1, &bind_group_1, &[]);
            cpass.set_bind_group(2, &bind_group_2, &[]);
            cpass.dispatch_workgroups(batch_size as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
        }
        // Sets adds copy operation to command encoder.
        // Will copy data from storage buffer on GPU to staging buffer on CPU.
        encoder.copy_buffer_to_buffer(&storage_buffer_1, 0, &staging_buffer, 0, (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress);
        // Submits command encoder for processing
        queue.submit(Some(encoder.finish()));
        // Note that we're not calling `.await` here.
        let buffer_slice = staging_buffer.slice(..);
        // Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
        let (sender, receiver) = tokio::sync::oneshot::channel();
        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
        // Poll the device in a blocking manner so that our future resolves.
        // In an actual application, `device.poll(...)` should
        // be called in an event loop or on another thread.
        device.poll(wgpu::Maintain::Wait);
        // Awaits until `buffer_future` can be read from
        receiver.await??;
        // Gets contents of buffer
        let data = buffer_slice.get_mapped_range();
        // Since contents are got in bytes, this converts these bytes back to u32
        let result: Vec<i32> = bytemuck::cast_slice(&data).to_vec();
        // With the current interface, we have to make sure all mapped views are
        // dropped before we unmap the buffer.
        drop(data);
        staging_buffer.unmap(); // Unmaps buffer from memory
                                // If you are familiar with C++ these 2 lines can be thought of similarly to:
                                //   delete myPointer;
                                //   myPointer = NULL;
                                // It effectively frees the memory
        // Returns data from buffer
        for (i, n) in result.into_iter().enumerate() {
            if n == 0 {
                let seed = base_seed + i as u64;
                println!("{:?}", seed);
                std::process::exit(0);
            }
        }
        base_seed += batch_size as u64 * 64;
        let buffer_slice = staging_buffer_2.slice(..);
        // Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
        let (sender, receiver) = tokio::sync::oneshot::channel();
        buffer_slice.map_async(wgpu::MapMode::Write, move |v| sender.send(v).unwrap());
        // Poll the device in a blocking manner so that our future resolves.
        // In an actual application, `device.poll(...)` should
        // be called in an event loop or on another thread.
        device.poll(wgpu::Maintain::Wait);
        // Awaits until `buffer_future` can be read from
        receiver.await??;
        let mut data = buffer_slice.get_mapped_range_mut();
        data.copy_from_slice(bytemuck::cast_slice(&base_seed.to_ne_bytes()));
        // With the current interface, we have to make sure all mapped views are
        // dropped before we unmap the buffer.
        drop(data);
        staging_buffer_2.unmap(); // Unmaps buffer from memory
                                // If you are familiar with C++ these 2 lines can be thought of similarly to:
                                //   delete myPointer;
                                //   myPointer = NULL;
                                // It effectively frees the memory
    }
 }
--- a/src/main.wgsl
+++ b/src/main.wgsl
@ -0,0 +1,229 @@
 struct U64 {
    hi: u32,
    lo: u32,
 }
 struct Xoroshiro128PlusPlusState {
    s0: U64,
    s1: U64,
 }
 fn rotl17(x: U64) -> U64 {
    return U64(
        (x.hi << 17u) | (x.lo >> 15u),
        (x.hi >> 15u) | (x.lo << 17u)
    );
 }
 fn rotl28(x: U64) -> U64 {
    return U64(
        (x.hi << 28u) | (x.lo >> 4u),
        (x.hi >> 4u) | (x.lo << 28u)
    );
 }
 fn rotl49(x: U64) -> U64 {
    return U64(
        (x.lo << 17u) | (x.hi >> 15u),
        (x.lo >> 15u) | (x.hi << 17u)
    );
 }
 fn add(lhs: U64, rhs: U64) -> U64 {
    if (((lhs.lo >> 1u) + (rhs.lo >> 1u) + ((lhs.lo & 1u) & (rhs.lo & 1u))) >> 31u) == 1u {
        return U64(
            lhs.hi + rhs.hi + 1u,
            lhs.lo + rhs.lo
        );
    } else {
        return U64(
            lhs.hi + rhs.hi,
            lhs.lo + rhs.lo
        );
    }
 }
 fn next(state: ptr<function, Xoroshiro128PlusPlusState>) -> U64 {
    let ret: U64 = add(rotl17(add((*state).s0, (*state).s1)), (*state).s0);
    (*state).s1.hi ^= (*state).s0.hi;
    (*state).s1.lo ^= (*state).s0.lo;
    (*state).s0 = rotl49((*state).s0);
    (*state).s0.hi ^= (*state).s1.hi ^ (((*state).s1.hi << 21u) | ((*state).s1.lo >> 11u));
    (*state).s0.lo ^= (*state).s1.lo ^ ((*state).s1.lo << 21u);
    (*state).s1 = rotl28((*state).s1);
    return ret;
 }
 fn inc(x: U64) -> U64 {
    return add(x, U64(0u, 1u));
 }
 fn inv(x: U64) -> U64 {
    return U64(~x.hi, ~x.lo);
 }
 fn neg(x: U64) -> U64 {
    return inc(inv(x));
 }
 fn abs64(x: U64) -> U64 {
    if (x.hi >> 31u) == 1u {
        return neg(x);
    } else {
        return x;
    }
 }
 fn smul(lhs: U64, rhs: U64) -> U64 {
    let sign = ((rhs.hi >> 31u) ^ (lhs.hi >> 31u)) == 1u;
    let lhs = abs64(lhs);
    let rhs = abs64(rhs);
    if sign {
        return neg(umul(lhs, rhs));
    } else {
        return umul(lhs, rhs);
    }
 }
 fn sftr(lhs: U64, rhs: u32) -> U64 {
    return U64(
        (lhs.hi << rhs) | (lhs.lo >> (32u - rhs)),
        lhs.lo << rhs
    );
 }
 fn umul32_hi(lhs: u32, rhs: u32) -> u32 {
    let lhs_hi = lhs >> 16u;
    let lhs_lo = lhs & 0xFFFFu;
    let rhs_hi = rhs >> 16u;
    let rhs_lo = rhs & 0xFFFFu;
    let hi_hi = lhs_hi * rhs_hi;
    let hi_lo = lhs_hi * rhs_lo;
    let lo_hi = lhs_lo * rhs_hi;
    let lo_lo = lhs_lo * rhs_lo;
    return hi_hi + (hi_lo >> 16u) + (lo_hi >> 16u) + (((lo_lo >> 16u) + (hi_lo & 0xFFFFu) + (lo_hi & 0xFFFFu)) >> 16u);
 }
 fn umul(lhs: U64, rhs: U64) -> U64 {
    let hi1 = lhs.hi * rhs.lo;
    let hi2 = lhs.lo * rhs.hi;
    let lo = lhs.lo * rhs.lo;
    let hi = hi1 + hi2 + umul32_hi(lhs.lo, rhs.lo);
    return U64(hi, lo);
 }
 fn sftl30(x: U64) -> U64 {
    return U64(
        (x.hi >> 30u),
        (x.hi << 2u) | (x.lo >> 30u)
    );
 }
 fn sftl27(x: U64) -> U64 {
    return U64(
        (x.hi >> 27u),
        (x.hi << 5u) | (x.lo >> 27u)
    );
 }
 fn sftl31(x: U64) -> U64 {
    return U64(
        (x.hi >> 31u),
        (x.hi << 1u) | (x.lo >> 31u)
    );
 }
 fn xor(lhs: U64, rhs: U64) -> U64 {
    return U64(
        lhs.hi ^ rhs.hi,
        lhs.lo ^ rhs.lo
    );
 }
 fn mix_stafford_13(x: U64) -> U64 {
    let x1 = smul(xor(x, sftl30(x)), U64(0xbf58476du, 0x1ce4e5b9u));
    let x2 = smul(xor(x1, sftl27(x1)), U64(0x94d049bbu, 0x133111ebu));
    return xor(x2, sftl31(x2));
 }
 fn derive_from_world(seed: U64) -> Xoroshiro128PlusPlusState {
    let s0 = U64(seed.hi ^ 0x6a09e667u, seed.lo ^ 0xf3bcc909u);
    let s1 = add(s0, U64(0x9e3779b9u, 0x7f4a7c15u));
    return Xoroshiro128PlusPlusState(
        mix_stafford_13(xor(s0, U64(seq_hash.w, seq_hash.z))),
        mix_stafford_13(xor(s1, U64(seq_hash.y, seq_hash.x)))
    );
 }
 fn to_vec2(x: U64) -> vec2<u32> {
    return vec2(x.lo, x.hi);
 }
@group(0)
@binding(0)
 var<storage, read> base_seed: vec2<u32>;
@group(1)
@binding(0)
 var<storage, read_write> results: array<u32>;
@group(2)
@binding(0)
 var<storage, read> seq_hash: vec4<u32>;
@compute
@workgroup_size(64)
 fn main(
    @builtin(local_invocation_index) local_invocation_index: u32,
    @builtin(workgroup_id) workgroup_id: vec3<u32>,
    @builtin(num_workgroups) num_workgroups: vec3<u32>,
 ) {
    let workgroup_idx = workgroup_id.x;
    let world_seed = add(U64(base_seed.y, base_seed.x), U64(0u, local_invocation_index + (workgroup_idx * 64u)));
    var state = derive_from_world(world_seed);
    results[local_invocation_index + (workgroup_idx * 64u)] = 0u;
    for (var i: u32 = 0u; i < 32u; i++) {
        let value = next_int_with_max(&state, 2u);
        if value != 1u {
            results[local_invocation_index + (workgroup_idx * 64u)] = 1u;
            break;
        }
    }
 }
 fn lt(lhs: U64, rhs: u32) -> bool {
    if lhs.hi == 0u {
        return lhs.lo < rhs;
    } else {
        return false;
    }
 }
 fn next_int(state: ptr<function, Xoroshiro128PlusPlusState>) -> i32 {
    return i32(next(state).lo);
 }
 fn next_int_with_max(state: ptr<function, Xoroshiro128PlusPlusState>, max: u32) -> u32 {
    var l: U64 = U64(0u, next(state).lo);
    var m: U64 = smul(l, U64(0u, max));
    var n: U64 = U64(0u, m.lo);
    if (lt(n, max)) {
        for(var j: u32 = (((~max) + 1u) % max); lt(n, j); n = U64(0u, m.lo)) {
            l = U64(0u, next(state).lo);
            m = smul(l, U64(0u, max));
        }
    }
    return m.hi;
 }
 fn next_float(state: ptr<function, Xoroshiro128PlusPlusState>) -> f32 {
    return f32(next(state).hi >> 8u) * 5.9604645E-8;
 }