proc macros are black magic

This commit is contained in:
Skye 2022-09-21 18:05:52 +09:00
commit 24937326f7
10 changed files with 520 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/target
/Cargo.lock
/.direnv

16
Cargo.toml Normal file
View file

@ -0,0 +1,16 @@
[package]
name = "uwurandom-rs"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dev-dependencies]
rand_pcg = "0.3.1"
[dependencies]
rand_core = "0.6.4"
uwurandom-proc-macros = { version = "0.1.0", path = "uwurandom-proc-macros" }
[workspace]
members = ["uwurandom-proc-macros"]

77
flake.lock Normal file
View file

@ -0,0 +1,77 @@
{
"nodes": {
"naersk": {
"inputs": {
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1659610603,
"narHash": "sha256-LYgASYSPYo7O71WfeUOaEUzYfzuXm8c8eavJcel+pfI=",
"owner": "nix-community",
"repo": "naersk",
"rev": "c6a45e4277fa58abd524681466d3450f896dc094",
"type": "github"
},
"original": {
"owner": "nix-community",
"ref": "master",
"repo": "naersk",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1661353537,
"narHash": "sha256-1E2IGPajOsrkR49mM5h55OtYnU0dGyre6gl60NXKITE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0e304ff0d9db453a4b230e9386418fd974d5804a",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1661353537,
"narHash": "sha256-1E2IGPajOsrkR49mM5h55OtYnU0dGyre6gl60NXKITE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0e304ff0d9db453a4b230e9386418fd974d5804a",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"naersk": "naersk",
"nixpkgs": "nixpkgs_2",
"utils": "utils"
}
},
"utils": {
"locked": {
"lastModified": 1659877975,
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

26
flake.nix Normal file
View file

@ -0,0 +1,26 @@
{
inputs = {
naersk.url = "github:nix-community/naersk/master";
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, utils, naersk }:
utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
naersk-lib = pkgs.callPackage naersk { };
in
{
defaultPackage = naersk-lib.buildPackage ./.;
defaultApp = utils.lib.mkApp {
drv = self.defaultPackage."${system}";
};
devShell = with pkgs; mkShell {
buildInputs = [ cargo rustc rustfmt pre-commit rustPackages.clippy ];
RUST_SRC_PATH = rustPlatform.rustLibSrc;
};
});
}

274
markov.js Normal file
View file

@ -0,0 +1,274 @@
const { table } = require('console');
const util = require('util');
// An array maps indices to values. Inverting it means returning a map of values to indices.
const invertArray = arr => {
const inverted = {};
for (let i = 0; i < arr.length; i++) {
inverted[arr[i]] = i;
}
return inverted;
}
const gcd = (a, b) => {
if (b > a) {
const tmp = a;
a = b;
b = tmp;
}
while (true) {
if (b === 0) return a;
a %= b;
if (a === 0) return b;
b %= a;
}
}
const generateMarkovIntoTable = (str, order, table, chars, ngrams) => {
for (let i = 0; i < str.length - order; i++) {
const currentNgram = str.slice(i, i + order);
ngrams.add(currentNgram);
const nextChar = str[i + order];
chars.add(nextChar);
let tableEntry;
if (Object.prototype.hasOwnProperty.call(table, currentNgram)) {
tableEntry = table[currentNgram];
if (Object.prototype.hasOwnProperty.call(tableEntry, nextChar)) {
tableEntry[nextChar]++;
} else {
tableEntry[nextChar] = 1;
}
} else {
tableEntry = {[nextChar]: 1};
table[currentNgram] = tableEntry;
}
if (i === (str.length - order) - 1) {
// make sure we get the *last* ngram too
ngrams.add(str.slice(i + 1));
}
}
}
// Generate a Markov chain probability table from a string or array of strings
const generateMarkov = (strings, order = 2) => {
const table = {};
const chars = new Set();
const ngrams = new Set();
if (Array.isArray(strings)) {
for (const str of strings) {
generateMarkovIntoTable(str, order, table, chars, ngrams);
}
} else {
generateMarkovIntoTable(strings, order, table, chars, ngrams);
}
const ngramsArr = Array.from(ngrams).sort();
const ngramIndices = invertArray(ngramsArr);
// Convert string-based table (slow) to numeric indices (fast)
const markovArr = [];
for (let i = 0; i < ngramsArr.length; i++) {
const ngram = ngramsArr[i];
const probs = table[ngram];
const probsArr = [];
let totalProbability = 0;
for (const nextChar of Object.keys(probs)) {
const nextNgram = ngram.slice(1) + nextChar;
const probability = probs[nextChar];
totalProbability += probability;
probsArr.push({
nextNgram: ngramIndices[nextNgram],
nextChar,
probability
});
}
// sort by probability in descending order to minimize linear search steps
probsArr.sort((a, b) => b.probability - a.probability);
let sum = 0;
for (let i = 0; i < probsArr.length; i++) {
sum += probsArr[i].probability;
probsArr[i].cumulativeProbability = sum;
delete probsArr[i].probability;
}
let probGCD = probsArr[0].cumulativeProbability;
if (probsArr.length > 1) {
for (let i = 1; i < probsArr.length; i++) {
probGCD = gcd(probGCD, probsArr[i].cumulativeProbability);
}
}
if (probGCD > 1) {
for (const entry of probsArr) {
entry.cumulativeProbability /= probGCD;
}
totalProbability /= probGCD;
}
markovArr.push({choices: probsArr, totalProbability});
}
return {
table,
markovArr,
order,
ngrams: {values: ngramsArr, indices: ngramIndices}
};
}
const weightedRand = (table) => {
const tableSum = Object.values(table).reduce((prev, cur) => prev + cur, 0);
const random = Math.floor(Math.random() * tableSum);
let sum = 0;
for (const key of Object.keys(table)) {
sum += table[key];
if (random < sum) return key;
}
}
const generateFromTable = ({table, order}, len, start) => {
let generated = start;
while (generated.length < len) {
const prev = generated.slice(generated.length - order, generated.length);
if (!Object.prototype.hasOwnProperty.call(table, prev)) return generated;
const nextOptions = table[prev];
generated += weightedRand(nextOptions);
}
return generated;
}
const generateFromArray = ({markovArr: table, ngrams}, len, start) => {
let generated = start;
let ngram = ngrams.indices[start];
while (generated.length < len) {
const {choices, totalProbability} = table[ngram];
const random = Math.floor(Math.random() * totalProbability);
for (let i = 0; i < choices.length; i++) {
const {nextChar, nextNgram, cumulativeProbability} = choices[i];
if (random < cumulativeProbability) {
ngram = nextNgram;
generated += nextChar;
break;
}
}
}
return generated;
}
const markovArrToC = ({markovArr: table, ngrams}, name) => {
const choicesDefs = [];
const ngramDefs = [];
for (let i = 0; i < table.length; i++) {
const listName = `${name}_ngram${i}_choices`;
const {choices, totalProbability} = table[i];
const choiceDefs = [];
for (let i = 0; i < choices.length; i++) {
const {nextChar, nextNgram, cumulativeProbability} = choices[i];
choiceDefs.push(
` {.next_ngram = ${nextNgram}, .cumulative_probability = ${cumulativeProbability}, .next_char = '${nextChar}'}`
);
}
choicesDefs.push(
`static struct uwu_markov_choice ${listName}[] = {
${choiceDefs.join(',\n')}
};`
);
ngramDefs.push(` {.choices = ${listName}, .total_probability = ${totalProbability}}${i === table.length - 1 ? '' : ','} // ${ngrams.values[i]}`);
}
const code =
`${choicesDefs.join('\n')}
static uwu_markov_ngram ${name}_ngrams[] = {
${ngramDefs.join('\n')}
};`;
return code;
}
function magic({markovArr: table, ngrams}) {
for (let index = 0; index < table.length; index++) {
table[index].name = ngrams.values[index]
}
return table
}
const catgirlNonsense = `mraowmraowmewmraowmrrppurrrrmraownyanyamraowwwwwmrwmraowmreowmewmrowmraowmewmraownya
mrrrowmeowmrowrmrowpurrrmrowmeowmraowmewmrowrmewnyaaaamrowrnyaamewmeowmrowrmeownya
mrowrmeow
meowmrowrmrowmeow
mrowrnyaaaaa
mrrowmeowmrowrmrrowmeowmrowrnyaaamrowmewmeowmewpurrrrrmeowmrowrmeownya`;
const keysmash = `alksdhfl;ag;kdhfgjkfhgadskfagdfkajfdhgbklkafghkahgsdfka;dfglkfjhgajdfghkgahjfgafgfkjdhg;lskgjhjkhajhdgfjhkafgl;ajdfglkajdflg;hdkjafhgkgaurgjrahdfgbahiurghrgh;arhnguahraufjalfgnhjhaujeghfgadjog;aldhhjlahuegjfdbhgajkfghafkjgahiurg`;
const scrunklyBase = "the little tienpsy! so adornale and cutest tootsit! awww like and double tap now so it can the to live the cute! happy kdb! awww the scrunkly! scrunkly the when! the boinky spunge! crinkly doo! shronkle scrimblo! aww when the.. the limtle tootsie,, tienpsy widdle scrimblo boinkus! boinky spunge! crinkly doo! scrunkly,,, the widdle. the cutest adornale tienpsy tootsit,,, whem the kity,, n flunf and it,, i,. yay! lookit aw!! lookit the little crungle boinko! aw icamt,,, the kimty and cat sooo mipy. little meowmeow tienpsy and smol sproingle scrunkly"
const punctuationRegex = /[,.!]+ ?/g;
const scrunks = [];
const xoshiro128 = (() => {
const s = [1697843356, 1544689657, 3646425737, 2133148247];
const rotl = (x, k) => (x << k) | (x >> (32 - k));
return () => {
const x = s[0] + s[3];
const result = (rotl(s[0] + s[3], 7) + s[0]) & 0xffffffff;
const t = s[1] << 9;
s[2] ^= s[0];
s[3] ^= s[1];
s[1] ^= s[2];
s[0] ^= s[3];
s[2] ^= t;
s[3] = rotl(s[3], 11);
return result >>> 0;
}
})();
for (let i = 0; i < 100; i++) {
scrunks.push(scrunklyBase.replace(punctuationRegex, match => {
if (xoshiro128() % 3 === 0) {
return ','.repeat((xoshiro128() % 3) + 2);
}
return ['!', '.'][xoshiro128() % 2].repeat((xoshiro128() % 4) + 1) + ' ';
}))
}
const catgirlTable = generateMarkov(catgirlNonsense.split('\n'), 2)
const keysmashTable = generateMarkov(keysmash, 1);
const scrunklyTable = generateMarkov(scrunks, 2);
console.log(JSON.stringify(magic(catgirlTable)))
// console.log(generateFromArray(catgirlTable, 100, 'ny'));
//
// console.time('from table');
// for (let i = 0; i < 1000; i++) {
// generateFromTable(catgirlTable, 1000, 'ny');
// }
// console.timeEnd('from table');
//
// console.time('from array');
// for (let i = 0; i < 1000; i++) {
// generateFromArray(catgirlTable, 1000, 'ny');
// }
// console.timeEnd('from array');
// console.log(markovArrToC(catgirlTable, 'catnonsense'));
// console.log(markovArrToC(keysmashTable, 'keysmash'));
// console.log(markovArrToC(scrunklyTable, 'scrunkly'));

24
src/lib.rs Normal file
View file

@ -0,0 +1,24 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let mut rng = rand_pcg::Pcg32::new(0xcafef00dd15ea5e5, 0xa02bdbf7bb3c0a7);
let mut state_machine = StateMachine::Ny;
let mut result = String::from("ny");
for _ in 0..100 {
let (new_state, generated) = state_machine.generate(&mut rng);
result.push(generated);
state_machine = new_state;
}
assert_eq!(&result, "nyaaaameowmrowrmrowmrrmeowmrowmeownyanyaaaaaaaaaaaaamraowrmeowwwmeowmraowmrowmrowmeowmeowrnyamreownyaa");
}
}
uwurandom_proc_macros::gen_fsm![{"choices":[{"nextNgram":0,"nextChar":"a","cumulativeProbability":2},{"nextNgram":1,"nextChar":"m","cumulativeProbability":3}],"totalProbability":3,"name":"aa"},{"choices":[{"nextNgram":7,"nextChar":"r","cumulativeProbability":3},{"nextNgram":6,"nextChar":"e","cumulativeProbability":4}],"totalProbability":4,"name":"am"},{"choices":[{"nextNgram":8,"nextChar":"y","cumulativeProbability":1}],"totalProbability":1,"name":"an"},{"choices":[{"nextNgram":9,"nextChar":"w","cumulativeProbability":1}],"totalProbability":1,"name":"ao"},{"choices":[{"nextNgram":9,"nextChar":"w","cumulativeProbability":1}],"totalProbability":1,"name":"eo"},{"choices":[{"nextNgram":21,"nextChar":"m","cumulativeProbability":6},{"nextNgram":22,"nextChar":"n","cumulativeProbability":7},{"nextNgram":23,"nextChar":"p","cumulativeProbability":8}],"totalProbability":8,"name":"ew"},{"choices":[{"nextNgram":4,"nextChar":"o","cumulativeProbability":3},{"nextNgram":5,"nextChar":"w","cumulativeProbability":5}],"totalProbability":5,"name":"me"},{"choices":[{"nextNgram":16,"nextChar":"o","cumulativeProbability":15},{"nextNgram":12,"nextChar":"a","cumulativeProbability":24},{"nextNgram":18,"nextChar":"r","cumulativeProbability":28},{"nextNgram":19,"nextChar":"w","cumulativeProbability":29},{"nextNgram":13,"nextChar":"e","cumulativeProbability":30}],"totalProbability":30,"name":"mr"},{"choices":[{"nextNgram":26,"nextChar":"a","cumulativeProbability":1}],"totalProbability":1,"name":"ny"},{"choices":[{"nextNgram":21,"nextChar":"m","cumulativeProbability":22},{"nextNgram":24,"nextChar":"r","cumulativeProbability":32},{"nextNgram":22,"nextChar":"n","cumulativeProbability":36},{"nextNgram":25,"nextChar":"w","cumulativeProbability":37},{"nextNgram":23,"nextChar":"p","cumulativeProbability":38}],"totalProbability":38,"name":"ow"},{"choices":[{"nextNgram":11,"nextChar":"u","cumulativeProbability":1}],"totalProbability":1,"name":"pp"},{"choices":[{"nextNgram":20,"nextChar":"r","cumulativeProbability":1}],"totalProbability":1,"name":"pu"},{"choices":[{"nextNgram":3,"nextChar":"o","cumulativeProbability":1}],"totalProbability":1,"name":"ra"},{"choices":[{"nextNgram":4,"nextChar":"o","cumulativeProbability":1}],"totalProbability":1,"name":"re"},{"choices":[{"nextNgram":7,"nextChar":"r","cumulativeProbability":1},{"nextNgram":6,"nextChar":"e","cumulativeProbability":2}],"totalProbability":2,"name":"rm"},{"choices":[{"nextNgram":8,"nextChar":"y","cumulativeProbability":1}],"totalProbability":1,"name":"rn"},{"choices":[{"nextNgram":9,"nextChar":"w","cumulativeProbability":1}],"totalProbability":1,"name":"ro"},{"choices":[{"nextNgram":10,"nextChar":"p","cumulativeProbability":1}],"totalProbability":1,"name":"rp"},{"choices":[{"nextNgram":18,"nextChar":"r","cumulativeProbability":7},{"nextNgram":14,"nextChar":"m","cumulativeProbability":10},{"nextNgram":16,"nextChar":"o","cumulativeProbability":13},{"nextNgram":17,"nextChar":"p","cumulativeProbability":14}],"totalProbability":14,"name":"rr"},{"choices":[{"nextNgram":21,"nextChar":"m","cumulativeProbability":1}],"totalProbability":1,"name":"rw"},{"choices":[{"nextNgram":18,"nextChar":"r","cumulativeProbability":1}],"totalProbability":1,"name":"ur"},{"choices":[{"nextNgram":7,"nextChar":"r","cumulativeProbability":17},{"nextNgram":6,"nextChar":"e","cumulativeProbability":30}],"totalProbability":30,"name":"wm"},{"choices":[{"nextNgram":8,"nextChar":"y","cumulativeProbability":1}],"totalProbability":1,"name":"wn"},{"choices":[{"nextNgram":11,"nextChar":"u","cumulativeProbability":1}],"totalProbability":1,"name":"wp"},{"choices":[{"nextNgram":14,"nextChar":"m","cumulativeProbability":7},{"nextNgram":15,"nextChar":"n","cumulativeProbability":10}],"totalProbability":10,"name":"wr"},{"choices":[{"nextNgram":25,"nextChar":"w","cumulativeProbability":3},{"nextNgram":21,"nextChar":"m","cumulativeProbability":4}],"totalProbability":4,"name":"ww"},{"choices":[{"nextNgram":0,"nextChar":"a","cumulativeProbability":4},{"nextNgram":2,"nextChar":"n","cumulativeProbability":5},{"nextNgram":1,"nextChar":"m","cumulativeProbability":6}],"totalProbability":6,"name":"ya"}];

View file

@ -0,0 +1,16 @@
[package]
name = "uwurandom-proc-macros"
version = "0.1.0"
edition = "2021"
[lib]
proc-macro = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
quote = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
syn = { version = "1.0", features = ["parsing"] }
convert_case = "0.6.0"

View file

@ -0,0 +1,17 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct MarkovArr {
pub choices: Vec<Choice>,
pub total_probability: u32,
pub name: String,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Choice {
pub next_ngram: usize,
pub next_char: char,
pub cumulative_probability: u32,
}

View file

@ -0,0 +1,66 @@
use convert_case::{Case, Casing};
use proc_macro::{Span, TokenStream};
use quote::quote;
use syn::{Ident, LitChar};
use crate::json::MarkovArr;
mod json;
#[proc_macro]
pub fn gen_fsm(item: TokenStream) -> TokenStream {
let input: Vec<MarkovArr> = serde_json::from_str(&format!("[{}]", item)).unwrap();
let mut match_arms = quote!();
let mut variants = quote!();
for state in input.iter() {
let name = state.name.to_case(Case::Pascal);
let name = Ident::new(&name, Span::call_site().into());
variants = quote!(
#variants
#name,
);
let mut inner_match_arms = quote!();
if state.total_probability == 1 {
let choice = &state.choices[0];
let next_state = input[choice.next_ngram].name.to_case(Case::Pascal);
let next_state = Ident::new(&next_state, Span::call_site().into());
let next_char = LitChar::new(choice.next_char, Span::call_site().into());
match_arms = quote!(
#match_arms
Self::#name => (Self::#next_state, #next_char),
);
continue;
}
for choice in &state.choices {
let next_state = input[choice.next_ngram].name.to_case(Case::Pascal);
let next_state = Ident::new(&next_state, Span::call_site().into());
let cumulative_probability = choice.cumulative_probability - 1;
let next_char = LitChar::new(choice.next_char, Span::call_site().into());
inner_match_arms = quote!(
#inner_match_arms
0..=#cumulative_probability => (Self::#next_state, #next_char),
)
}
let total_probability = state.total_probability;
match_arms = quote!(
#match_arms
Self::#name => match rng.next_u32() % #total_probability {
#inner_match_arms
_ => unreachable!(),
},
);
}
quote!(
#[derive(Debug, Clone, Copy)]
enum StateMachine {
#variants
}
impl StateMachine {
fn generate(self, mut rng: impl ::rand_core::RngCore) -> (Self, char) {
match self {
#match_arms
}
}
}
).into()
}