feat: the entire thing
This commit is contained in:
commit
b908b26dab
8 changed files with 1695 additions and 0 deletions
1
.envrc
Normal file
1
.envrc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
use flake
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
/.direnv
|
1143
Cargo.lock
generated
Normal file
1143
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
[package]
|
||||||
|
name = "seedcracker"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.71"
|
||||||
|
bytemuck = "1.13.1"
|
||||||
|
md5 = "0.7.0"
|
||||||
|
tokio = { version = "1.28.2", features = ["full"] }
|
||||||
|
wgpu = "0.16.1"
|
59
flake.lock
Normal file
59
flake.lock
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1685518550,
|
||||||
|
"narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1686338508,
|
||||||
|
"narHash": "sha256-F0bZVV5ChaduBQwAdee0o3zazmCufbXxn/teqsVRqXU=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "66c418d299cd454bd74644eaad905ec4ba2f81a1",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "nixpkgs",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
24
flake.nix
Normal file
24
flake.nix
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "nixpkgs";
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs, flake-utils }:
|
||||||
|
flake-utils.lib.eachDefaultSystem (system:
|
||||||
|
let
|
||||||
|
pkgs = nixpkgs.legacyPackages.${system};
|
||||||
|
|
||||||
|
libraries = with pkgs;[
|
||||||
|
libglvnd
|
||||||
|
];
|
||||||
|
in
|
||||||
|
{
|
||||||
|
devShell = pkgs.mkShell {
|
||||||
|
shellHook =
|
||||||
|
''
|
||||||
|
export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath libraries}:$LD_LIBRARY_PATH
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
224
src/main.rs
Normal file
224
src/main.rs
Normal file
|
@ -0,0 +1,224 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use wgpu::{Instance, util::DeviceExt};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
let seq_id = "minecraft:entities/blaze";
|
||||||
|
let batch_size = 512;
|
||||||
|
let mut base_seed = 0u64;
|
||||||
|
|
||||||
|
let mut seq_seed = md5::compute(seq_id).0;
|
||||||
|
seq_seed.reverse();
|
||||||
|
|
||||||
|
let instance = Instance::default();
|
||||||
|
let adapter = instance
|
||||||
|
.request_adapter(&wgpu::RequestAdapterOptions::default())
|
||||||
|
.await
|
||||||
|
.ok_or(anyhow::anyhow!("could not get adapter"))?;
|
||||||
|
let (device, queue) = adapter
|
||||||
|
.request_device(
|
||||||
|
&wgpu::DeviceDescriptor {
|
||||||
|
label: None,
|
||||||
|
features: wgpu::Features::empty(),
|
||||||
|
limits: wgpu::Limits::downlevel_defaults(),
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||||||
|
label: None,
|
||||||
|
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("main.wgsl"))),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates buffer without data.
|
||||||
|
// `usage` of buffer specifies how it can be used:
|
||||||
|
// `BufferUsages::MAP_READ` allows it to be read (outside the shader).
|
||||||
|
// `BufferUsages::COPY_DST` allows it to be the destination of the copy.
|
||||||
|
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
|
label: None,
|
||||||
|
size: (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress,
|
||||||
|
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates buffer without data.
|
||||||
|
// `usage` of buffer specifies how it can be used:
|
||||||
|
// `BufferUsages::MAP_READ` allows it to be read (outside the shader).
|
||||||
|
// `BufferUsages::COPY_DST` allows it to be the destination of the copy.
|
||||||
|
let staging_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||||
|
label: None,
|
||||||
|
contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
|
||||||
|
usage: wgpu::BufferUsages::MAP_WRITE | wgpu::BufferUsages::COPY_SRC,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates buffer with data (`numbers`).
|
||||||
|
// Usage allowing the buffer to be:
|
||||||
|
// A storage buffer (can be bound within a bind group and thus available to a shader).
|
||||||
|
// The destination of a copy.
|
||||||
|
// The source of a copy.
|
||||||
|
let storage_buffer_0 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||||
|
label: Some("Base seed"),
|
||||||
|
contents: bytemuck::cast_slice(&base_seed.to_ne_bytes()),
|
||||||
|
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates buffer with data (`numbers`).
|
||||||
|
// Usage allowing the buffer to be:
|
||||||
|
// A storage buffer (can be bound within a bind group and thus available to a shader).
|
||||||
|
// The destination of a copy.
|
||||||
|
// The source of a copy.
|
||||||
|
let storage_buffer_1 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||||
|
label: Some("Result Buffer"),
|
||||||
|
contents: &vec![0_u8; batch_size * 64 * std::mem::size_of::<u32>()],
|
||||||
|
usage: wgpu::BufferUsages::STORAGE
|
||||||
|
| wgpu::BufferUsages::COPY_SRC,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates buffer with data (`numbers`).
|
||||||
|
// Usage allowing the buffer to be:
|
||||||
|
// A storage buffer (can be bound within a bind group and thus available to a shader).
|
||||||
|
// The destination of a copy.
|
||||||
|
// The source of a copy.
|
||||||
|
let storage_buffer_2 = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||||
|
label: Some("Sequence MD5"),
|
||||||
|
contents: &seq_seed,
|
||||||
|
usage: wgpu::BufferUsages::STORAGE,
|
||||||
|
});
|
||||||
|
|
||||||
|
// A bind group defines how buffers are accessed by shaders.
|
||||||
|
// It is to WebGPU what a descriptor set is to Vulkan.
|
||||||
|
// `binding` here refers to the `binding` of a buffer in the shader (`layout(set = 0, binding = 0) buffer`).
|
||||||
|
|
||||||
|
// A pipeline specifies the operation of a shader
|
||||||
|
|
||||||
|
// Instantiates the pipeline.
|
||||||
|
let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
|
||||||
|
label: None,
|
||||||
|
layout: None,
|
||||||
|
module: &cs_module,
|
||||||
|
entry_point: "main",
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates the bind group, once again specifying the binding of buffers.
|
||||||
|
let bind_group_0_layout = compute_pipeline.get_bind_group_layout(0);
|
||||||
|
let bind_group_0 = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: None,
|
||||||
|
layout: &bind_group_0_layout,
|
||||||
|
entries: &[wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: storage_buffer_0.as_entire_binding(),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates the bind group, once again specifying the binding of buffers.
|
||||||
|
let bind_group_1_layout = compute_pipeline.get_bind_group_layout(1);
|
||||||
|
let bind_group_1 = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: None,
|
||||||
|
layout: &bind_group_1_layout,
|
||||||
|
entries: &[wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: storage_buffer_1.as_entire_binding(),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Instantiates the bind group, once again specifying the binding of buffers.
|
||||||
|
let bind_group_2_layout = compute_pipeline.get_bind_group_layout(2);
|
||||||
|
let bind_group_2 = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: None,
|
||||||
|
layout: &bind_group_2_layout,
|
||||||
|
entries: &[wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: storage_buffer_2.as_entire_binding(),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// println!("testing seeds {}", base_seeds[0]);
|
||||||
|
// A command encoder executes one or many pipelines.
|
||||||
|
// It is to WebGPU what a command buffer is to Vulkan.
|
||||||
|
let mut encoder =
|
||||||
|
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
|
||||||
|
|
||||||
|
encoder.copy_buffer_to_buffer(&staging_buffer_2, 0, &storage_buffer_0, 0, (std::mem::size_of::<u64>()) as wgpu::BufferAddress);
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
|
||||||
|
cpass.set_pipeline(&compute_pipeline);
|
||||||
|
cpass.set_bind_group(0, &bind_group_0, &[]);
|
||||||
|
cpass.set_bind_group(1, &bind_group_1, &[]);
|
||||||
|
cpass.set_bind_group(2, &bind_group_2, &[]);
|
||||||
|
cpass.dispatch_workgroups(batch_size as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
|
||||||
|
}
|
||||||
|
// Sets adds copy operation to command encoder.
|
||||||
|
// Will copy data from storage buffer on GPU to staging buffer on CPU.
|
||||||
|
encoder.copy_buffer_to_buffer(&storage_buffer_1, 0, &staging_buffer, 0, (batch_size * 64 * std::mem::size_of::<u32>()) as wgpu::BufferAddress);
|
||||||
|
|
||||||
|
// Submits command encoder for processing
|
||||||
|
queue.submit(Some(encoder.finish()));
|
||||||
|
|
||||||
|
// Note that we're not calling `.await` here.
|
||||||
|
let buffer_slice = staging_buffer.slice(..);
|
||||||
|
// Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
|
||||||
|
let (sender, receiver) = tokio::sync::oneshot::channel();
|
||||||
|
buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
|
||||||
|
|
||||||
|
// Poll the device in a blocking manner so that our future resolves.
|
||||||
|
// In an actual application, `device.poll(...)` should
|
||||||
|
// be called in an event loop or on another thread.
|
||||||
|
device.poll(wgpu::Maintain::Wait);
|
||||||
|
|
||||||
|
// Awaits until `buffer_future` can be read from
|
||||||
|
receiver.await??;
|
||||||
|
// Gets contents of buffer
|
||||||
|
let data = buffer_slice.get_mapped_range();
|
||||||
|
// Since contents are got in bytes, this converts these bytes back to u32
|
||||||
|
let result: Vec<i32> = bytemuck::cast_slice(&data).to_vec();
|
||||||
|
|
||||||
|
// With the current interface, we have to make sure all mapped views are
|
||||||
|
// dropped before we unmap the buffer.
|
||||||
|
drop(data);
|
||||||
|
staging_buffer.unmap(); // Unmaps buffer from memory
|
||||||
|
// If you are familiar with C++ these 2 lines can be thought of similarly to:
|
||||||
|
// delete myPointer;
|
||||||
|
// myPointer = NULL;
|
||||||
|
// It effectively frees the memory
|
||||||
|
|
||||||
|
// Returns data from buffer
|
||||||
|
for (i, n) in result.into_iter().enumerate() {
|
||||||
|
if n == 0 {
|
||||||
|
let seed = base_seed + i as u64;
|
||||||
|
println!("{:?}", seed);
|
||||||
|
std::process::exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
base_seed += batch_size as u64 * 64;
|
||||||
|
|
||||||
|
let buffer_slice = staging_buffer_2.slice(..);
|
||||||
|
// Sets the buffer up for mapping, sending over the result of the mapping back to us when it is finished.
|
||||||
|
let (sender, receiver) = tokio::sync::oneshot::channel();
|
||||||
|
buffer_slice.map_async(wgpu::MapMode::Write, move |v| sender.send(v).unwrap());
|
||||||
|
|
||||||
|
// Poll the device in a blocking manner so that our future resolves.
|
||||||
|
// In an actual application, `device.poll(...)` should
|
||||||
|
// be called in an event loop or on another thread.
|
||||||
|
device.poll(wgpu::Maintain::Wait);
|
||||||
|
|
||||||
|
// Awaits until `buffer_future` can be read from
|
||||||
|
receiver.await??;
|
||||||
|
let mut data = buffer_slice.get_mapped_range_mut();
|
||||||
|
data.copy_from_slice(bytemuck::cast_slice(&base_seed.to_ne_bytes()));
|
||||||
|
|
||||||
|
// With the current interface, we have to make sure all mapped views are
|
||||||
|
// dropped before we unmap the buffer.
|
||||||
|
drop(data);
|
||||||
|
staging_buffer_2.unmap(); // Unmaps buffer from memory
|
||||||
|
// If you are familiar with C++ these 2 lines can be thought of similarly to:
|
||||||
|
// delete myPointer;
|
||||||
|
// myPointer = NULL;
|
||||||
|
// It effectively frees the memory
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
229
src/main.wgsl
Normal file
229
src/main.wgsl
Normal file
|
@ -0,0 +1,229 @@
|
||||||
|
struct U64 {
|
||||||
|
hi: u32,
|
||||||
|
lo: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Xoroshiro128PlusPlusState {
|
||||||
|
s0: U64,
|
||||||
|
s1: U64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rotl17(x: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(x.hi << 17u) | (x.lo >> 15u),
|
||||||
|
(x.hi >> 15u) | (x.lo << 17u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rotl28(x: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(x.hi << 28u) | (x.lo >> 4u),
|
||||||
|
(x.hi >> 4u) | (x.lo << 28u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rotl49(x: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(x.lo << 17u) | (x.hi >> 15u),
|
||||||
|
(x.lo >> 15u) | (x.hi << 17u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(lhs: U64, rhs: U64) -> U64 {
|
||||||
|
if (((lhs.lo >> 1u) + (rhs.lo >> 1u) + ((lhs.lo & 1u) & (rhs.lo & 1u))) >> 31u) == 1u {
|
||||||
|
return U64(
|
||||||
|
lhs.hi + rhs.hi + 1u,
|
||||||
|
lhs.lo + rhs.lo
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
return U64(
|
||||||
|
lhs.hi + rhs.hi,
|
||||||
|
lhs.lo + rhs.lo
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next(state: ptr<function, Xoroshiro128PlusPlusState>) -> U64 {
|
||||||
|
let ret: U64 = add(rotl17(add((*state).s0, (*state).s1)), (*state).s0);
|
||||||
|
|
||||||
|
(*state).s1.hi ^= (*state).s0.hi;
|
||||||
|
(*state).s1.lo ^= (*state).s0.lo;
|
||||||
|
|
||||||
|
(*state).s0 = rotl49((*state).s0);
|
||||||
|
(*state).s0.hi ^= (*state).s1.hi ^ (((*state).s1.hi << 21u) | ((*state).s1.lo >> 11u));
|
||||||
|
(*state).s0.lo ^= (*state).s1.lo ^ ((*state).s1.lo << 21u);
|
||||||
|
|
||||||
|
(*state).s1 = rotl28((*state).s1);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inc(x: U64) -> U64 {
|
||||||
|
return add(x, U64(0u, 1u));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inv(x: U64) -> U64 {
|
||||||
|
return U64(~x.hi, ~x.lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn neg(x: U64) -> U64 {
|
||||||
|
return inc(inv(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn abs64(x: U64) -> U64 {
|
||||||
|
if (x.hi >> 31u) == 1u {
|
||||||
|
return neg(x);
|
||||||
|
} else {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn smul(lhs: U64, rhs: U64) -> U64 {
|
||||||
|
let sign = ((rhs.hi >> 31u) ^ (lhs.hi >> 31u)) == 1u;
|
||||||
|
let lhs = abs64(lhs);
|
||||||
|
let rhs = abs64(rhs);
|
||||||
|
if sign {
|
||||||
|
return neg(umul(lhs, rhs));
|
||||||
|
} else {
|
||||||
|
return umul(lhs, rhs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sftr(lhs: U64, rhs: u32) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(lhs.hi << rhs) | (lhs.lo >> (32u - rhs)),
|
||||||
|
lhs.lo << rhs
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn umul32_hi(lhs: u32, rhs: u32) -> u32 {
|
||||||
|
let lhs_hi = lhs >> 16u;
|
||||||
|
let lhs_lo = lhs & 0xFFFFu;
|
||||||
|
let rhs_hi = rhs >> 16u;
|
||||||
|
let rhs_lo = rhs & 0xFFFFu;
|
||||||
|
|
||||||
|
let hi_hi = lhs_hi * rhs_hi;
|
||||||
|
let hi_lo = lhs_hi * rhs_lo;
|
||||||
|
let lo_hi = lhs_lo * rhs_hi;
|
||||||
|
let lo_lo = lhs_lo * rhs_lo;
|
||||||
|
|
||||||
|
return hi_hi + (hi_lo >> 16u) + (lo_hi >> 16u) + (((lo_lo >> 16u) + (hi_lo & 0xFFFFu) + (lo_hi & 0xFFFFu)) >> 16u);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn umul(lhs: U64, rhs: U64) -> U64 {
|
||||||
|
let hi1 = lhs.hi * rhs.lo;
|
||||||
|
let hi2 = lhs.lo * rhs.hi;
|
||||||
|
|
||||||
|
let lo = lhs.lo * rhs.lo;
|
||||||
|
let hi = hi1 + hi2 + umul32_hi(lhs.lo, rhs.lo);
|
||||||
|
|
||||||
|
return U64(hi, lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sftl30(x: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(x.hi >> 30u),
|
||||||
|
(x.hi << 2u) | (x.lo >> 30u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sftl27(x: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(x.hi >> 27u),
|
||||||
|
(x.hi << 5u) | (x.lo >> 27u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sftl31(x: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
(x.hi >> 31u),
|
||||||
|
(x.hi << 1u) | (x.lo >> 31u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn xor(lhs: U64, rhs: U64) -> U64 {
|
||||||
|
return U64(
|
||||||
|
lhs.hi ^ rhs.hi,
|
||||||
|
lhs.lo ^ rhs.lo
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mix_stafford_13(x: U64) -> U64 {
|
||||||
|
let x1 = smul(xor(x, sftl30(x)), U64(0xbf58476du, 0x1ce4e5b9u));
|
||||||
|
let x2 = smul(xor(x1, sftl27(x1)), U64(0x94d049bbu, 0x133111ebu));
|
||||||
|
return xor(x2, sftl31(x2));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn derive_from_world(seed: U64) -> Xoroshiro128PlusPlusState {
|
||||||
|
let s0 = U64(seed.hi ^ 0x6a09e667u, seed.lo ^ 0xf3bcc909u);
|
||||||
|
let s1 = add(s0, U64(0x9e3779b9u, 0x7f4a7c15u));
|
||||||
|
return Xoroshiro128PlusPlusState(
|
||||||
|
mix_stafford_13(xor(s0, U64(seq_hash.w, seq_hash.z))),
|
||||||
|
mix_stafford_13(xor(s1, U64(seq_hash.y, seq_hash.x)))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_vec2(x: U64) -> vec2<u32> {
|
||||||
|
return vec2(x.lo, x.hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
@group(0)
|
||||||
|
@binding(0)
|
||||||
|
var<storage, read> base_seed: vec2<u32>;
|
||||||
|
|
||||||
|
@group(1)
|
||||||
|
@binding(0)
|
||||||
|
var<storage, read_write> results: array<u32>;
|
||||||
|
|
||||||
|
@group(2)
|
||||||
|
@binding(0)
|
||||||
|
var<storage, read> seq_hash: vec4<u32>;
|
||||||
|
|
||||||
|
@compute
|
||||||
|
@workgroup_size(64)
|
||||||
|
fn main(
|
||||||
|
@builtin(local_invocation_index) local_invocation_index: u32,
|
||||||
|
@builtin(workgroup_id) workgroup_id: vec3<u32>,
|
||||||
|
@builtin(num_workgroups) num_workgroups: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let workgroup_idx = workgroup_id.x;
|
||||||
|
let world_seed = add(U64(base_seed.y, base_seed.x), U64(0u, local_invocation_index + (workgroup_idx * 64u)));
|
||||||
|
var state = derive_from_world(world_seed);
|
||||||
|
results[local_invocation_index + (workgroup_idx * 64u)] = 0u;
|
||||||
|
for (var i: u32 = 0u; i < 32u; i++) {
|
||||||
|
let value = next_int_with_max(&state, 2u);
|
||||||
|
if value != 1u {
|
||||||
|
results[local_invocation_index + (workgroup_idx * 64u)] = 1u;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lt(lhs: U64, rhs: u32) -> bool {
|
||||||
|
if lhs.hi == 0u {
|
||||||
|
return lhs.lo < rhs;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_int(state: ptr<function, Xoroshiro128PlusPlusState>) -> i32 {
|
||||||
|
return i32(next(state).lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_int_with_max(state: ptr<function, Xoroshiro128PlusPlusState>, max: u32) -> u32 {
|
||||||
|
var l: U64 = U64(0u, next(state).lo);
|
||||||
|
var m: U64 = smul(l, U64(0u, max));
|
||||||
|
var n: U64 = U64(0u, m.lo);
|
||||||
|
if (lt(n, max)) {
|
||||||
|
for(var j: u32 = (((~max) + 1u) % max); lt(n, j); n = U64(0u, m.lo)) {
|
||||||
|
l = U64(0u, next(state).lo);
|
||||||
|
m = smul(l, U64(0u, max));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m.hi;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_float(state: ptr<function, Xoroshiro128PlusPlusState>) -> f32 {
|
||||||
|
return f32(next(state).hi >> 8u) * 5.9604645E-8;
|
||||||
|
}
|
Loading…
Reference in a new issue