commit 24937326f778f9b4af4e0af0f77be87399ebe523 Author: Skye Date: Wed Sep 21 18:05:52 2022 +0900 proc macros are black magic diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a718a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +/Cargo.lock +/.direnv \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..cb55ef1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "uwurandom-rs" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dev-dependencies] +rand_pcg = "0.3.1" + +[dependencies] +rand_core = "0.6.4" +uwurandom-proc-macros = { version = "0.1.0", path = "uwurandom-proc-macros" } + +[workspace] +members = ["uwurandom-proc-macros"] diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..1ed5f08 --- /dev/null +++ b/flake.lock @@ -0,0 +1,77 @@ +{ + "nodes": { + "naersk": { + "inputs": { + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1659610603, + "narHash": "sha256-LYgASYSPYo7O71WfeUOaEUzYfzuXm8c8eavJcel+pfI=", + "owner": "nix-community", + "repo": "naersk", + "rev": "c6a45e4277fa58abd524681466d3450f896dc094", + "type": "github" + }, + "original": { + "owner": "nix-community", + "ref": "master", + "repo": "naersk", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1661353537, + "narHash": "sha256-1E2IGPajOsrkR49mM5h55OtYnU0dGyre6gl60NXKITE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0e304ff0d9db453a4b230e9386418fd974d5804a", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1661353537, + "narHash": "sha256-1E2IGPajOsrkR49mM5h55OtYnU0dGyre6gl60NXKITE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0e304ff0d9db453a4b230e9386418fd974d5804a", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "naersk": "naersk", + "nixpkgs": "nixpkgs_2", + "utils": "utils" + } + }, + "utils": { + "locked": { + "lastModified": 1659877975, + "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..1935265 --- /dev/null +++ b/flake.nix @@ -0,0 +1,26 @@ +{ + inputs = { + naersk.url = "github:nix-community/naersk/master"; + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, utils, naersk }: + utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + naersk-lib = pkgs.callPackage naersk { }; + in + { + defaultPackage = naersk-lib.buildPackage ./.; + + defaultApp = utils.lib.mkApp { + drv = self.defaultPackage."${system}"; + }; + + devShell = with pkgs; mkShell { + buildInputs = [ cargo rustc rustfmt pre-commit rustPackages.clippy ]; + RUST_SRC_PATH = rustPlatform.rustLibSrc; + }; + }); +} \ No newline at end of file diff --git a/markov.js b/markov.js new file mode 100644 index 0000000..071c128 --- /dev/null +++ b/markov.js @@ -0,0 +1,274 @@ +const { table } = require('console'); +const util = require('util'); + +// An array maps indices to values. Inverting it means returning a map of values to indices. +const invertArray = arr => { + const inverted = {}; + for (let i = 0; i < arr.length; i++) { + inverted[arr[i]] = i; + } + return inverted; +} + +const gcd = (a, b) => { + if (b > a) { + const tmp = a; + a = b; + b = tmp; + } + while (true) { + if (b === 0) return a; + a %= b; + if (a === 0) return b; + b %= a; + } +} + +const generateMarkovIntoTable = (str, order, table, chars, ngrams) => { + for (let i = 0; i < str.length - order; i++) { + const currentNgram = str.slice(i, i + order); + ngrams.add(currentNgram); + const nextChar = str[i + order]; + chars.add(nextChar); + + let tableEntry; + if (Object.prototype.hasOwnProperty.call(table, currentNgram)) { + tableEntry = table[currentNgram]; + if (Object.prototype.hasOwnProperty.call(tableEntry, nextChar)) { + tableEntry[nextChar]++; + } else { + tableEntry[nextChar] = 1; + } + } else { + tableEntry = {[nextChar]: 1}; + table[currentNgram] = tableEntry; + } + + if (i === (str.length - order) - 1) { + // make sure we get the *last* ngram too + ngrams.add(str.slice(i + 1)); + } + } +} + +// Generate a Markov chain probability table from a string or array of strings +const generateMarkov = (strings, order = 2) => { + const table = {}; + const chars = new Set(); + const ngrams = new Set(); + if (Array.isArray(strings)) { + for (const str of strings) { + generateMarkovIntoTable(str, order, table, chars, ngrams); + } + } else { + generateMarkovIntoTable(strings, order, table, chars, ngrams); + } + + const ngramsArr = Array.from(ngrams).sort(); + const ngramIndices = invertArray(ngramsArr); + + // Convert string-based table (slow) to numeric indices (fast) + const markovArr = []; + + for (let i = 0; i < ngramsArr.length; i++) { + const ngram = ngramsArr[i]; + const probs = table[ngram]; + const probsArr = []; + let totalProbability = 0; + for (const nextChar of Object.keys(probs)) { + const nextNgram = ngram.slice(1) + nextChar; + const probability = probs[nextChar]; + totalProbability += probability; + probsArr.push({ + nextNgram: ngramIndices[nextNgram], + nextChar, + probability + }); + } + // sort by probability in descending order to minimize linear search steps + probsArr.sort((a, b) => b.probability - a.probability); + let sum = 0; + for (let i = 0; i < probsArr.length; i++) { + sum += probsArr[i].probability; + probsArr[i].cumulativeProbability = sum; + delete probsArr[i].probability; + } + let probGCD = probsArr[0].cumulativeProbability; + if (probsArr.length > 1) { + for (let i = 1; i < probsArr.length; i++) { + probGCD = gcd(probGCD, probsArr[i].cumulativeProbability); + } + } + if (probGCD > 1) { + for (const entry of probsArr) { + entry.cumulativeProbability /= probGCD; + } + totalProbability /= probGCD; + } + + markovArr.push({choices: probsArr, totalProbability}); + } + + return { + table, + markovArr, + order, + ngrams: {values: ngramsArr, indices: ngramIndices} + }; +} + +const weightedRand = (table) => { + const tableSum = Object.values(table).reduce((prev, cur) => prev + cur, 0); + const random = Math.floor(Math.random() * tableSum); + let sum = 0; + for (const key of Object.keys(table)) { + sum += table[key]; + if (random < sum) return key; + } +} + +const generateFromTable = ({table, order}, len, start) => { + let generated = start; + + while (generated.length < len) { + const prev = generated.slice(generated.length - order, generated.length); + if (!Object.prototype.hasOwnProperty.call(table, prev)) return generated; + const nextOptions = table[prev]; + + generated += weightedRand(nextOptions); + } + + return generated; +} + +const generateFromArray = ({markovArr: table, ngrams}, len, start) => { + let generated = start; + let ngram = ngrams.indices[start]; + + while (generated.length < len) { + const {choices, totalProbability} = table[ngram]; + const random = Math.floor(Math.random() * totalProbability); + for (let i = 0; i < choices.length; i++) { + const {nextChar, nextNgram, cumulativeProbability} = choices[i]; + if (random < cumulativeProbability) { + ngram = nextNgram; + generated += nextChar; + break; + } + } + } + + return generated; +} + +const markovArrToC = ({markovArr: table, ngrams}, name) => { + const choicesDefs = []; + const ngramDefs = []; + + for (let i = 0; i < table.length; i++) { + const listName = `${name}_ngram${i}_choices`; + const {choices, totalProbability} = table[i]; + const choiceDefs = []; + for (let i = 0; i < choices.length; i++) { + const {nextChar, nextNgram, cumulativeProbability} = choices[i]; + choiceDefs.push( +` {.next_ngram = ${nextNgram}, .cumulative_probability = ${cumulativeProbability}, .next_char = '${nextChar}'}` +); + } + + choicesDefs.push( +`static struct uwu_markov_choice ${listName}[] = { +${choiceDefs.join(',\n')} +};` + ); + + ngramDefs.push(` {.choices = ${listName}, .total_probability = ${totalProbability}}${i === table.length - 1 ? '' : ','} // ${ngrams.values[i]}`); + } + + const code = +`${choicesDefs.join('\n')} + +static uwu_markov_ngram ${name}_ngrams[] = { +${ngramDefs.join('\n')} +};`; + + return code; +} + +function magic({markovArr: table, ngrams}) { + for (let index = 0; index < table.length; index++) { + table[index].name = ngrams.values[index] + } + return table +} + +const catgirlNonsense = `mraowmraowmewmraowmrrppurrrrmraownyanyamraowwwwwmrwmraowmreowmewmrowmraowmewmraownya +mrrrowmeowmrowrmrowpurrrmrowmeowmraowmewmrowrmewnyaaaamrowrnyaamewmeowmrowrmeownya +mrowrmeow +meowmrowrmrowmeow +mrowrnyaaaaa +mrrowmeowmrowrmrrowmeowmrowrnyaaamrowmewmeowmewpurrrrrmeowmrowrmeownya`; + +const keysmash = `alksdhfl;ag;kdhfgjkfhgadskfagdfkajfdhgbklkafghkahgsdfka;dfglkfjhgajdfghkgahjfgafgfkjdhg;lskgjhjkhajhdgfjhkafgl;ajdfglkajdflg;hdkjafhgkgaurgjrahdfgbahiurghrgh;arhnguahraufjalfgnhjhaujeghfgadjog;aldhhjlahuegjfdbhgajkfghafkjgahiurg`; + +const scrunklyBase = "the little tienpsy! so adornale and cutest tootsit! awww like and double tap now so it can the to live the cute! happy kdb! awww the scrunkly! scrunkly the when! the boinky spunge! crinkly doo! shronkle scrimblo! aww when the.. the limtle tootsie,, tienpsy widdle scrimblo boinkus! boinky spunge! crinkly doo! scrunkly,,, the widdle. the cutest adornale tienpsy tootsit,,, whem the kity,, n flunf and it,, i,. yay! lookit aw!! lookit the little crungle boinko! aw icamt,,, the kimty and cat sooo mipy. little meowmeow tienpsy and smol sproingle scrunkly" + +const punctuationRegex = /[,.!]+ ?/g; + +const scrunks = []; + +const xoshiro128 = (() => { + const s = [1697843356, 1544689657, 3646425737, 2133148247]; + + const rotl = (x, k) => (x << k) | (x >> (32 - k)); + + return () => { + const x = s[0] + s[3]; + const result = (rotl(s[0] + s[3], 7) + s[0]) & 0xffffffff; + + const t = s[1] << 9; + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 11); + + return result >>> 0; + } +})(); + +for (let i = 0; i < 100; i++) { + scrunks.push(scrunklyBase.replace(punctuationRegex, match => { + if (xoshiro128() % 3 === 0) { + return ','.repeat((xoshiro128() % 3) + 2); + } + return ['!', '.'][xoshiro128() % 2].repeat((xoshiro128() % 4) + 1) + ' '; + })) +} + +const catgirlTable = generateMarkov(catgirlNonsense.split('\n'), 2) +const keysmashTable = generateMarkov(keysmash, 1); +const scrunklyTable = generateMarkov(scrunks, 2); + +console.log(JSON.stringify(magic(catgirlTable))) + +// console.log(generateFromArray(catgirlTable, 100, 'ny')); +// +// console.time('from table'); +// for (let i = 0; i < 1000; i++) { +// generateFromTable(catgirlTable, 1000, 'ny'); +// } +// console.timeEnd('from table'); +// +// console.time('from array'); +// for (let i = 0; i < 1000; i++) { +// generateFromArray(catgirlTable, 1000, 'ny'); +// } +// console.timeEnd('from array'); + +// console.log(markovArrToC(catgirlTable, 'catnonsense')); +// console.log(markovArrToC(keysmashTable, 'keysmash')); +// console.log(markovArrToC(scrunklyTable, 'scrunkly')); diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1b1a06d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,24 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn it_works() { + let mut rng = rand_pcg::Pcg32::new(0xcafef00dd15ea5e5, 0xa02bdbf7bb3c0a7); + let mut state_machine = StateMachine::Ny; + let mut result = String::from("ny"); + for _ in 0..100 { + let (new_state, generated) = state_machine.generate(&mut rng); + result.push(generated); + state_machine = new_state; + } + assert_eq!(&result, "nyaaaameowmrowrmrowmrrmeowmrowmeownyanyaaaaaaaaaaaaamraowrmeowwwmeowmraowmrowmrowmeowmeowrnyamreownyaa"); + } +} + +uwurandom_proc_macros::gen_fsm![{"choices":[{"nextNgram":0,"nextChar":"a","cumulativeProbability":2},{"nextNgram":1,"nextChar":"m","cumulativeProbability":3}],"totalProbability":3,"name":"aa"},{"choices":[{"nextNgram":7,"nextChar":"r","cumulativeProbability":3},{"nextNgram":6,"nextChar":"e","cumulativeProbability":4}],"totalProbability":4,"name":"am"},{"choices":[{"nextNgram":8,"nextChar":"y","cumulativeProbability":1}],"totalProbability":1,"name":"an"},{"choices":[{"nextNgram":9,"nextChar":"w","cumulativeProbability":1}],"totalProbability":1,"name":"ao"},{"choices":[{"nextNgram":9,"nextChar":"w","cumulativeProbability":1}],"totalProbability":1,"name":"eo"},{"choices":[{"nextNgram":21,"nextChar":"m","cumulativeProbability":6},{"nextNgram":22,"nextChar":"n","cumulativeProbability":7},{"nextNgram":23,"nextChar":"p","cumulativeProbability":8}],"totalProbability":8,"name":"ew"},{"choices":[{"nextNgram":4,"nextChar":"o","cumulativeProbability":3},{"nextNgram":5,"nextChar":"w","cumulativeProbability":5}],"totalProbability":5,"name":"me"},{"choices":[{"nextNgram":16,"nextChar":"o","cumulativeProbability":15},{"nextNgram":12,"nextChar":"a","cumulativeProbability":24},{"nextNgram":18,"nextChar":"r","cumulativeProbability":28},{"nextNgram":19,"nextChar":"w","cumulativeProbability":29},{"nextNgram":13,"nextChar":"e","cumulativeProbability":30}],"totalProbability":30,"name":"mr"},{"choices":[{"nextNgram":26,"nextChar":"a","cumulativeProbability":1}],"totalProbability":1,"name":"ny"},{"choices":[{"nextNgram":21,"nextChar":"m","cumulativeProbability":22},{"nextNgram":24,"nextChar":"r","cumulativeProbability":32},{"nextNgram":22,"nextChar":"n","cumulativeProbability":36},{"nextNgram":25,"nextChar":"w","cumulativeProbability":37},{"nextNgram":23,"nextChar":"p","cumulativeProbability":38}],"totalProbability":38,"name":"ow"},{"choices":[{"nextNgram":11,"nextChar":"u","cumulativeProbability":1}],"totalProbability":1,"name":"pp"},{"choices":[{"nextNgram":20,"nextChar":"r","cumulativeProbability":1}],"totalProbability":1,"name":"pu"},{"choices":[{"nextNgram":3,"nextChar":"o","cumulativeProbability":1}],"totalProbability":1,"name":"ra"},{"choices":[{"nextNgram":4,"nextChar":"o","cumulativeProbability":1}],"totalProbability":1,"name":"re"},{"choices":[{"nextNgram":7,"nextChar":"r","cumulativeProbability":1},{"nextNgram":6,"nextChar":"e","cumulativeProbability":2}],"totalProbability":2,"name":"rm"},{"choices":[{"nextNgram":8,"nextChar":"y","cumulativeProbability":1}],"totalProbability":1,"name":"rn"},{"choices":[{"nextNgram":9,"nextChar":"w","cumulativeProbability":1}],"totalProbability":1,"name":"ro"},{"choices":[{"nextNgram":10,"nextChar":"p","cumulativeProbability":1}],"totalProbability":1,"name":"rp"},{"choices":[{"nextNgram":18,"nextChar":"r","cumulativeProbability":7},{"nextNgram":14,"nextChar":"m","cumulativeProbability":10},{"nextNgram":16,"nextChar":"o","cumulativeProbability":13},{"nextNgram":17,"nextChar":"p","cumulativeProbability":14}],"totalProbability":14,"name":"rr"},{"choices":[{"nextNgram":21,"nextChar":"m","cumulativeProbability":1}],"totalProbability":1,"name":"rw"},{"choices":[{"nextNgram":18,"nextChar":"r","cumulativeProbability":1}],"totalProbability":1,"name":"ur"},{"choices":[{"nextNgram":7,"nextChar":"r","cumulativeProbability":17},{"nextNgram":6,"nextChar":"e","cumulativeProbability":30}],"totalProbability":30,"name":"wm"},{"choices":[{"nextNgram":8,"nextChar":"y","cumulativeProbability":1}],"totalProbability":1,"name":"wn"},{"choices":[{"nextNgram":11,"nextChar":"u","cumulativeProbability":1}],"totalProbability":1,"name":"wp"},{"choices":[{"nextNgram":14,"nextChar":"m","cumulativeProbability":7},{"nextNgram":15,"nextChar":"n","cumulativeProbability":10}],"totalProbability":10,"name":"wr"},{"choices":[{"nextNgram":25,"nextChar":"w","cumulativeProbability":3},{"nextNgram":21,"nextChar":"m","cumulativeProbability":4}],"totalProbability":4,"name":"ww"},{"choices":[{"nextNgram":0,"nextChar":"a","cumulativeProbability":4},{"nextNgram":2,"nextChar":"n","cumulativeProbability":5},{"nextNgram":1,"nextChar":"m","cumulativeProbability":6}],"totalProbability":6,"name":"ya"}]; diff --git a/uwurandom-proc-macros/Cargo.toml b/uwurandom-proc-macros/Cargo.toml new file mode 100644 index 0000000..b109ab3 --- /dev/null +++ b/uwurandom-proc-macros/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "uwurandom-proc-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +quote = "1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +syn = { version = "1.0", features = ["parsing"] } +convert_case = "0.6.0" \ No newline at end of file diff --git a/uwurandom-proc-macros/src/json.rs b/uwurandom-proc-macros/src/json.rs new file mode 100644 index 0000000..1b99d38 --- /dev/null +++ b/uwurandom-proc-macros/src/json.rs @@ -0,0 +1,17 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct MarkovArr { + pub choices: Vec, + pub total_probability: u32, + pub name: String, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Choice { + pub next_ngram: usize, + pub next_char: char, + pub cumulative_probability: u32, +} diff --git a/uwurandom-proc-macros/src/lib.rs b/uwurandom-proc-macros/src/lib.rs new file mode 100644 index 0000000..ca6bac8 --- /dev/null +++ b/uwurandom-proc-macros/src/lib.rs @@ -0,0 +1,66 @@ +use convert_case::{Case, Casing}; +use proc_macro::{Span, TokenStream}; +use quote::quote; +use syn::{Ident, LitChar}; + +use crate::json::MarkovArr; + +mod json; + +#[proc_macro] +pub fn gen_fsm(item: TokenStream) -> TokenStream { + let input: Vec = serde_json::from_str(&format!("[{}]", item)).unwrap(); + let mut match_arms = quote!(); + let mut variants = quote!(); + for state in input.iter() { + let name = state.name.to_case(Case::Pascal); + let name = Ident::new(&name, Span::call_site().into()); + variants = quote!( + #variants + #name, + ); + let mut inner_match_arms = quote!(); + if state.total_probability == 1 { + let choice = &state.choices[0]; + let next_state = input[choice.next_ngram].name.to_case(Case::Pascal); + let next_state = Ident::new(&next_state, Span::call_site().into()); + let next_char = LitChar::new(choice.next_char, Span::call_site().into()); + match_arms = quote!( + #match_arms + Self::#name => (Self::#next_state, #next_char), + ); + continue; + } + for choice in &state.choices { + let next_state = input[choice.next_ngram].name.to_case(Case::Pascal); + let next_state = Ident::new(&next_state, Span::call_site().into()); + let cumulative_probability = choice.cumulative_probability - 1; + let next_char = LitChar::new(choice.next_char, Span::call_site().into()); + inner_match_arms = quote!( + #inner_match_arms + 0..=#cumulative_probability => (Self::#next_state, #next_char), + ) + } + let total_probability = state.total_probability; + match_arms = quote!( + #match_arms + Self::#name => match rng.next_u32() % #total_probability { + #inner_match_arms + _ => unreachable!(), + }, + ); + } + quote!( + #[derive(Debug, Clone, Copy)] + enum StateMachine { + #variants + } + impl StateMachine { + fn generate(self, mut rng: impl ::rand_core::RngCore) -> (Self, char) { + match self { + #match_arms + } + } + } + ).into() +}