feat: the entire thing

This commit is contained in:
Skye 2023-06-10 22:20:47 +09:00
commit b908b26dab
Signed by: me
GPG key ID: 0104BC05F41B77B8
8 changed files with 1695 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
/.direnv

1143
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

13
Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "seedcracker"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.71"
bytemuck = "1.13.1"
md5 = "0.7.0"
tokio = { version = "1.28.2", features = ["full"] }
wgpu = "0.16.1"

59
flake.lock Normal file
View file

@ -0,0 +1,59 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1685518550,
"narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1686338508,
"narHash": "sha256-F0bZVV5ChaduBQwAdee0o3zazmCufbXxn/teqsVRqXU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "66c418d299cd454bd74644eaad905ec4ba2f81a1",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

24
flake.nix Normal file
View file

@ -0,0 +1,24 @@
{
inputs = {
nixpkgs.url = "nixpkgs";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
libraries = with pkgs;[
libglvnd
];
in
{
devShell = pkgs.mkShell {
shellHook =
''
export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath libraries}:$LD_LIBRARY_PATH
'';
};
});
}

224
src/main.rs Normal file
View file

@ -0,0 +1,224 @@
use std::borrow::Cow;
use wgpu::{Instance, util::DeviceExt};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let seq_id = "minecraft:entities/blaze";
let batch_size = 512;
let mut base_seed = 0u64;
let mut seq_seed = md5::compute(seq_id).0;
seq_seed.reverse();
let instance = Instance::default();
let adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions::default())
.await
.ok_or(anyhow::anyhow!("could not get adapter"))?;
let (device, queue) = adapter
.request_device(
&wgpu::DeviceDescriptor {
label: None,
features: wgpu::Features::empty(),
limits: wgpu::Limits::downlevel_defaults(),
},
None,
)
.await?;
let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("main.wgsl"))),
});
// Instantiates buffer without data.
// `usage` of buffer specifies how it can be used:
// `BufferUsages::MAP_READ` allows it to be read (outside the shader).
// `BufferUsages::COPY_DST` allows it to be the destination of the copy.
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
// Instantiates buffer without data.
// `usage` of buffer specifies how it can be used:
// `BufferUsages::MAP_READ` allows it to be read (outside the shader).
// `BufferUsages::COPY_DST` allows it to be the destination of the copy.
let staging_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: None,
contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
usage: wgpu::BufferUsages::MAP_WRITE | wgpu::BufferUsages::COPY_SRC,
});
// Instantiates buffer with data (`numbers`).
// Usage allowing the buffer to be:
// A storage buffer (can be bound within a bind group and thus available to a shader).
// The destination of a copy.
// The source of a copy.
let storage_buffer_0 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Base seed"),
contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
});
// Instantiates buffer with data (`numbers`).
// Usage allowing the buffer to be:
// A storage buffer (can be bound within a bind group and thus available to a shader).
// The destination of a copy.
// The source of a copy.
let storage_buffer_1 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Result Buffer"),
contents: &vec![0_u8; batch_size * 64 * std::mem::size_of::<u32>()],
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_SRC,
});
// Instantiates buffer with data (`numbers`).
// Usage allowing the buffer to be:
// A storage buffer (can be bound within a bind group and thus available to a shader).
// The destination of a copy.
// The source of a copy.
let storage_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Sequence MD5"),
contents: &seq_seed,
usage: wgpu::BufferUsages::STORAGE,
});
// A bind group defines how buffers are accessed by shaders.
// It is to WebGPU what a descriptor set is to Vulkan.
// `binding` here refers to the `binding` of a buffer in the shader (`layout(set = 0, binding = 0) buffer`).
// A pipeline specifies the operation of a shader
// Instantiates the pipeline.
let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: None,
layout: None,
module: &cs_module,
entry_point: "main",
});
// Instantiates the bind group, once again specifying the binding of buffers.
let bind_group_0_layout = compute_pipeline.get_bind_group_layout(0);
let bind_group_0 = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_0_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: storage_buffer_0.as_entire_binding(),
}],
});
// Instantiates the bind group, once again specifying the binding of buffers.
let bind_group_1_layout = compute_pipeline.get_bind_group_layout(1);
let bind_group_1 = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_1_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: storage_buffer_1.as_entire_binding(),
}],
});
// Instantiates the bind group, once again specifying the binding of buffers.
let bind_group_2_layout = compute_pipeline.get_bind_group_layout(2);
let bind_group_2 = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_2_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: storage_buffer_2.as_entire_binding(),
}],
});
loop {
// println!("testing seeds {}", base_seeds[0]);
// A command encoder executes one or many pipelines.
// It is to WebGPU what a command buffer is to Vulkan.
let mut encoder =
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
encoder.copy_buffer_to_buffer(&staging_buffer_2, 0, &storage_buffer_0, 0, (std::mem::size_of::<u64>()) as wgpu::BufferAddress);
{
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
cpass.set_pipeline(&compute_pipeline);
cpass.set_bind_group(0, &bind_group_0, &[]);
cpass.set_bind_group(1, &bind_group_1, &[]);
cpass.set_bind_group(2, &bind_group_2, &[]);
cpass.dispatch_workgroups(batch_size as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
}
// Sets adds copy operation to command encoder.
// Will copy data from storage buffer on GPU to staging buffer on CPU.
encoder.copy_buffer_to_buffer(&storage_buffer_1, 0, &staging_buffer, 0, (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress);
// Submits command encoder for processing
queue.submit(Some(encoder.finish()));
// Note that we're not calling `.await` here.
let buffer_slice = staging_buffer.slice(..);
// Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
let (sender, receiver) = tokio::sync::oneshot::channel();
buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
// Poll the device in a blocking manner so that our future resolves.
// In an actual application, `device.poll(...)` should
// be called in an event loop or on another thread.
device.poll(wgpu::Maintain::Wait);
// Awaits until `buffer_future` can be read from
receiver.await??;
// Gets contents of buffer
let data = buffer_slice.get_mapped_range();
// Since contents are got in bytes, this converts these bytes back to u32
let result: Vec<i32> = bytemuck::cast_slice(&data).to_vec();
// With the current interface, we have to make sure all mapped views are
// dropped before we unmap the buffer.
drop(data);
staging_buffer.unmap(); // Unmaps buffer from memory
// If you are familiar with C++ these 2 lines can be thought of similarly to:
// delete myPointer;
// myPointer = NULL;
// It effectively frees the memory
// Returns data from buffer
for (i, n) in result.into_iter().enumerate() {
if n == 0 {
let seed = base_seed + i as u64;
println!("{:?}", seed);
std::process::exit(0);
}
}
base_seed += batch_size as u64 * 64;
let buffer_slice = staging_buffer_2.slice(..);
// Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
let (sender, receiver) = tokio::sync::oneshot::channel();
buffer_slice.map_async(wgpu::MapMode::Write, move |v| sender.send(v).unwrap());
// Poll the device in a blocking manner so that our future resolves.
// In an actual application, `device.poll(...)` should
// be called in an event loop or on another thread.
device.poll(wgpu::Maintain::Wait);
// Awaits until `buffer_future` can be read from
receiver.await??;
let mut data = buffer_slice.get_mapped_range_mut();
data.copy_from_slice(bytemuck::cast_slice(&base_seed.to_ne_bytes()));
// With the current interface, we have to make sure all mapped views are
// dropped before we unmap the buffer.
drop(data);
staging_buffer_2.unmap(); // Unmaps buffer from memory
// If you are familiar with C++ these 2 lines can be thought of similarly to:
// delete myPointer;
// myPointer = NULL;
// It effectively frees the memory
}
}

229
src/main.wgsl Normal file
View file

@ -0,0 +1,229 @@
struct U64 {
hi: u32,
lo: u32,
}
struct Xoroshiro128PlusPlusState {
s0: U64,
s1: U64,
}
fn rotl17(x: U64) -> U64 {
return U64(
(x.hi << 17u) | (x.lo >> 15u),
(x.hi >> 15u) | (x.lo << 17u)
);
}
fn rotl28(x: U64) -> U64 {
return U64(
(x.hi << 28u) | (x.lo >> 4u),
(x.hi >> 4u) | (x.lo << 28u)
);
}
fn rotl49(x: U64) -> U64 {
return U64(
(x.lo << 17u) | (x.hi >> 15u),
(x.lo >> 15u) | (x.hi << 17u)
);
}
fn add(lhs: U64, rhs: U64) -> U64 {
if (((lhs.lo >> 1u) + (rhs.lo >> 1u) + ((lhs.lo & 1u) & (rhs.lo & 1u))) >> 31u) == 1u {
return U64(
lhs.hi + rhs.hi + 1u,
lhs.lo + rhs.lo
);
} else {
return U64(
lhs.hi + rhs.hi,
lhs.lo + rhs.lo
);
}
}
fn next(state: ptr<function, Xoroshiro128PlusPlusState>) -> U64 {
let ret: U64 = add(rotl17(add((*state).s0, (*state).s1)), (*state).s0);
(*state).s1.hi ^= (*state).s0.hi;
(*state).s1.lo ^= (*state).s0.lo;
(*state).s0 = rotl49((*state).s0);
(*state).s0.hi ^= (*state).s1.hi ^ (((*state).s1.hi << 21u) | ((*state).s1.lo >> 11u));
(*state).s0.lo ^= (*state).s1.lo ^ ((*state).s1.lo << 21u);
(*state).s1 = rotl28((*state).s1);
return ret;
}
fn inc(x: U64) -> U64 {
return add(x, U64(0u, 1u));
}
fn inv(x: U64) -> U64 {
return U64(~x.hi, ~x.lo);
}
fn neg(x: U64) -> U64 {
return inc(inv(x));
}
fn abs64(x: U64) -> U64 {
if (x.hi >> 31u) == 1u {
return neg(x);
} else {
return x;
}
}
fn smul(lhs: U64, rhs: U64) -> U64 {
let sign = ((rhs.hi >> 31u) ^ (lhs.hi >> 31u)) == 1u;
let lhs = abs64(lhs);
let rhs = abs64(rhs);
if sign {
return neg(umul(lhs, rhs));
} else {
return umul(lhs, rhs);
}
}
fn sftr(lhs: U64, rhs: u32) -> U64 {
return U64(
(lhs.hi << rhs) | (lhs.lo >> (32u - rhs)),
lhs.lo << rhs
);
}
fn umul32_hi(lhs: u32, rhs: u32) -> u32 {
let lhs_hi = lhs >> 16u;
let lhs_lo = lhs & 0xFFFFu;
let rhs_hi = rhs >> 16u;
let rhs_lo = rhs & 0xFFFFu;
let hi_hi = lhs_hi * rhs_hi;
let hi_lo = lhs_hi * rhs_lo;
let lo_hi = lhs_lo * rhs_hi;
let lo_lo = lhs_lo * rhs_lo;
return hi_hi + (hi_lo >> 16u) + (lo_hi >> 16u) + (((lo_lo >> 16u) + (hi_lo & 0xFFFFu) + (lo_hi & 0xFFFFu)) >> 16u);
}
fn umul(lhs: U64, rhs: U64) -> U64 {
let hi1 = lhs.hi * rhs.lo;
let hi2 = lhs.lo * rhs.hi;
let lo = lhs.lo * rhs.lo;
let hi = hi1 + hi2 + umul32_hi(lhs.lo, rhs.lo);
return U64(hi, lo);
}
fn sftl30(x: U64) -> U64 {
return U64(
(x.hi >> 30u),
(x.hi << 2u) | (x.lo >> 30u)
);
}
fn sftl27(x: U64) -> U64 {
return U64(
(x.hi >> 27u),
(x.hi << 5u) | (x.lo >> 27u)
);
}
fn sftl31(x: U64) -> U64 {
return U64(
(x.hi >> 31u),
(x.hi << 1u) | (x.lo >> 31u)
);
}
fn xor(lhs: U64, rhs: U64) -> U64 {
return U64(
lhs.hi ^ rhs.hi,
lhs.lo ^ rhs.lo
);
}
fn mix_stafford_13(x: U64) -> U64 {
let x1 = smul(xor(x, sftl30(x)), U64(0xbf58476du, 0x1ce4e5b9u));
let x2 = smul(xor(x1, sftl27(x1)), U64(0x94d049bbu, 0x133111ebu));
return xor(x2, sftl31(x2));
}
fn derive_from_world(seed: U64) -> Xoroshiro128PlusPlusState {
let s0 = U64(seed.hi ^ 0x6a09e667u, seed.lo ^ 0xf3bcc909u);
let s1 = add(s0, U64(0x9e3779b9u, 0x7f4a7c15u));
return Xoroshiro128PlusPlusState(
mix_stafford_13(xor(s0, U64(seq_hash.w, seq_hash.z))),
mix_stafford_13(xor(s1, U64(seq_hash.y, seq_hash.x)))
);
}
fn to_vec2(x: U64) -> vec2<u32> {
return vec2(x.lo, x.hi);
}
@group(0)
@binding(0)
var<storage, read> base_seed: vec2<u32>;
@group(1)
@binding(0)
var<storage, read_write> results: array<u32>;
@group(2)
@binding(0)
var<storage, read> seq_hash: vec4<u32>;
@compute
@workgroup_size(64)
fn main(
@builtin(local_invocation_index) local_invocation_index: u32,
@builtin(workgroup_id) workgroup_id: vec3<u32>,
@builtin(num_workgroups) num_workgroups: vec3<u32>,
) {
let workgroup_idx = workgroup_id.x;
let world_seed = add(U64(base_seed.y, base_seed.x), U64(0u, local_invocation_index + (workgroup_idx * 64u)));
var state = derive_from_world(world_seed);
results[local_invocation_index + (workgroup_idx * 64u)] = 0u;
for (var i: u32 = 0u; i < 32u; i++) {
let value = next_int_with_max(&state, 2u);
if value != 1u {
results[local_invocation_index + (workgroup_idx * 64u)] = 1u;
break;
}
}
}
fn lt(lhs: U64, rhs: u32) -> bool {
if lhs.hi == 0u {
return lhs.lo < rhs;
} else {
return false;
}
}
fn next_int(state: ptr<function, Xoroshiro128PlusPlusState>) -> i32 {
return i32(next(state).lo);
}
fn next_int_with_max(state: ptr<function, Xoroshiro128PlusPlusState>, max: u32) -> u32 {
var l: U64 = U64(0u, next(state).lo);
var m: U64 = smul(l, U64(0u, max));
var n: U64 = U64(0u, m.lo);
if (lt(n, max)) {
for(var j: u32 = (((~max) + 1u) % max); lt(n, j); n = U64(0u, m.lo)) {
l = U64(0u, next(state).lo);
m = smul(l, U64(0u, max));
}
}
return m.hi;
}
fn next_float(state: ptr<function, Xoroshiro128PlusPlusState>) -> f32 {
return f32(next(state).hi >> 8u) * 5.9604645E-8;
}