parent
a6f5111c79
commit
e6300de142
@ -1,50 +1,295 @@
|
|||||||
/*
|
/*********************************************************************\
|
||||||
THIS IS A MESS BECAUSE EFFING MASTODON AND ITS EFFING HTML BIOS
|
|
||||||
INSTEAD OF JUST STORING EVERYTHING IN PLAIN EFFING TEXT ! ! ! !
|
To my lovely code maintainers,
|
||||||
BLANK LINES ALSO WON'T WORK BECAUSE RIGHT NOW MASTODON CONVERTS
|
|
||||||
THOSE INTO `<P>` ELEMENTS INSTEAD OF LEAVING IT AS `<BR><BR>` !
|
The syntax recognized by the Mastodon frontend for its bio metadata
|
||||||
TL:DR; THIS IS LARGELY A HACK. WITH BETTER BACKEND STUFF WE CAN
|
feature is a subset of that provided by the YAML 1.2 specification.
|
||||||
IMPROVE THIS BY BETTER PREDICTING HOW THE METADATA WILL BE SENT
|
In particular, Mastodon recognizes metadata which is provided as an
|
||||||
WHILE MAINTAINING BASIC PLAIN-TEXT PROCESSING. THE OTHER OPTION
|
implicit YAML map, where each key-value pair takes up only a single
|
||||||
IS TO TURN ALL BIOS INTO PLAIN-TEXT VIA A TREE-WALKER, AND THEN
|
line (no multi-line values are permitted). To simplify the level of
|
||||||
PROCESS THE YAML AND LINKS AND EVERYTHING OURSELVES. THIS WOULD
|
processing required, Mastodon metadata frontmatter has been limited
|
||||||
BE INCREDIBLY COMPLICATED, AND IT WOULD BE A MILLION TIMES LESS
|
to only allow those characters in the `c-printable` set, as defined
|
||||||
DIFFICULT IF MASTODON JUST GAVE US PLAIN-TEXT BIOS (WHICH QUITE
|
by the YAML 1.2 specification, instead of permitting those from the
|
||||||
FRANKLY MAKES THE MOST SENSE SINCE THAT'S WHAT USERS PROVIDE IN
|
`nb-json` characters inside double-quoted strings like YAML proper.
|
||||||
SETTINGS) TO BEGIN WITH AND LEFT ALL PROCESSING TO THE FRONTEND
|
¶ It is important to note that Mastodon only borrows the *syntax*
|
||||||
TO HANDLE ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
|
of YAML, not its semantics. This is to say, Mastodon won't make any
|
||||||
ANYWAY I KNOW WHAT NEEDS TO BE DONE REGARDING BACKEND STUFF BUT
|
attempt to interpret the data it receives. `true` will not become a
|
||||||
I'M NOT SMART ENOUGH TO FIGURE OUT HOW TO ACTUALLY IMPLEMENT IT
|
boolean; `56` will not be interpreted as a number. Rather, each key
|
||||||
SO FEEL FREE TO @ ME IF YOU NEED MY IDEAS REGARDING THAT. UNTIL
|
and every value will be read as a string, and as a string they will
|
||||||
THEN WE'LL JUST HAVE TO MAKE DO WITH THIS MESSY AND UNFORTUNATE
|
remain. The order of the pairs is unchanged, and any duplicate keys
|
||||||
HACKING ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
|
are preserved. However, YAML escape sequences will be replaced with
|
||||||
|
the proper interpretations according to the YAML 1.2 specification.
|
||||||
with love,
|
¶ The implementation provided below interprets `<br>` as `\n` and
|
||||||
@kibi@glitch.social <3
|
allows for an open <p> tag at the beginning of the bio. It replaces
|
||||||
*/
|
the escaped character entities `'` and `"` with single or
|
||||||
|
double quotes, respectively, prior to processing. However, no other
|
||||||
const NEW_LINE = /(?:^|\r?\n|<br\s*\/?>)/g;
|
escaped characters are replaced, not even those which might have an
|
||||||
const YAML_OPENER = /---/;
|
impact on the syntax otherwise. These minor allowances are provided
|
||||||
const YAML_CLOSER = /(?:---|\.\.\.)/;
|
because the Mastodon backend will insert these things automatically
|
||||||
const YAML_STRING = /(?:"(?:[^"\n]){1,32}"|'(?:[^'\n]){1,32}'|(?:[^'":\n]){1,32})/g;
|
into a bio before sending it through the API, so it is important we
|
||||||
const YAML_LINE = new RegExp('\\s*' + YAML_STRING.source + '\\s*:\\s*' + YAML_STRING.source + '\\s*', 'g');
|
account for them. Aside from this, the YAML frontmatter must be the
|
||||||
const BIO_REGEX = new RegExp(NEW_LINE.source + '*' + YAML_OPENER.source + NEW_LINE.source + '+(?:' + YAML_LINE.source + NEW_LINE.source + '+){0,4}' + YAML_CLOSER.source + NEW_LINE.source + '*');
|
very first thing in the bio, leading with three consecutive hyphen-
|
||||||
|
minues (`---`), and ending with the same or, alternatively, instead
|
||||||
export function processBio(data) {
|
with three periods (`...`). No limits have been set with respect to
|
||||||
let props = { text: data, metadata: [] };
|
the number of characters permitted in the frontmatter, although one
|
||||||
let yaml = data.match(BIO_REGEX);
|
should note that only limited space is provided for them in the UI.
|
||||||
if (!yaml) return props;
|
¶ The regular expression used to check the existence of, and then
|
||||||
|
process, the YAML frontmatter has been split into a number of small
|
||||||
|
components in the code below, in the vain hope that it will be much
|
||||||
|
easier to read and to maintain. I leave it to the future readers of
|
||||||
|
this code to determine the extent of my successes in this endeavor.
|
||||||
|
|
||||||
|
Sending love + warmth eternal,
|
||||||
|
- kibigo [@kibi@glitch.social]
|
||||||
|
|
||||||
|
\*********************************************************************/
|
||||||
|
|
||||||
|
/* CONVENIENCE FUNCTIONS */
|
||||||
|
|
||||||
|
const unirex = str => new RegExp(str, 'u');
|
||||||
|
const rexstr = exp => '(?:' + exp.source + ')';
|
||||||
|
|
||||||
|
/* CHARACTER CLASSES */
|
||||||
|
|
||||||
|
const DOCUMENT_START = /^/;
|
||||||
|
const DOCUMENT_END = /$/;
|
||||||
|
const ALLOWED_CHAR = // `c-printable` in the YAML 1.2 spec.
|
||||||
|
/[\t\n\r\x20-\x7e\x85\xa0-\ud7ff\ue000-\ufffd\u{10000}-\u{10FFFF}]/u;
|
||||||
|
const WHITE_SPACE = /[ \t]/;
|
||||||
|
const INDENTATION = / */; // Indentation must be only spaces.
|
||||||
|
const LINE_BREAK = /\r?\n|\r|<br\s*\/?>/;
|
||||||
|
const ESCAPE_CHAR = /[0abt\tnvfre "\/\\N_LP]/;
|
||||||
|
const HEXADECIMAL_CHARS = /[0-9a-fA-F]/;
|
||||||
|
const INDICATOR = /[-?:,[\]{}&#*!|>'"%@`]/;
|
||||||
|
const FLOW_CHAR = /[,[\]{}]/;
|
||||||
|
|
||||||
|
/* NEGATED CHARACTER CLASSES */
|
||||||
|
|
||||||
|
const NOT_WHITE_SPACE = unirex('(?!' + rexstr(WHITE_SPACE) + ')[^]');
|
||||||
|
const NOT_LINE_BREAK = unirex('(?!' + rexstr(LINE_BREAK) + ')[^]');
|
||||||
|
const NOT_INDICATOR = unirex('(?!' + rexstr(INDICATOR) + ')[^]');
|
||||||
|
const NOT_FLOW_CHAR = unirex('(?!' + rexstr(FLOW_CHAR) + ')[^]');
|
||||||
|
|
||||||
|
/* BASIC CONSTRUCTS */
|
||||||
|
|
||||||
|
const ANY_WHITE_SPACE = unirex(rexstr(WHITE_SPACE) + '*');
|
||||||
|
const ANY_ALLOWED_CHARS = unirex(rexstr(ALLOWED_CHAR) + '*');
|
||||||
|
const NEW_LINE = unirex(
|
||||||
|
rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK)
|
||||||
|
);
|
||||||
|
const SOME_NEW_LINES = unirex(
|
||||||
|
'(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+'
|
||||||
|
);
|
||||||
|
const POSSIBLE_STARTS = unirex(
|
||||||
|
rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?'
|
||||||
|
);
|
||||||
|
const POSSIBLE_ENDS = unirex(
|
||||||
|
rexstr(SOME_NEW_LINES) + '|' +
|
||||||
|
rexstr(DOCUMENT_END) + '|' +
|
||||||
|
rexstr(/<\/p>/)
|
||||||
|
);
|
||||||
|
const CHARACTER_ESCAPE = unirex(
|
||||||
|
rexstr(/\\/) +
|
||||||
|
'(?:' +
|
||||||
|
rexstr(ESCAPE_CHAR) + '|' +
|
||||||
|
rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' +
|
||||||
|
rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' +
|
||||||
|
rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' +
|
||||||
|
')'
|
||||||
|
);
|
||||||
|
const ESCAPED_CHAR = unirex(
|
||||||
|
rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' +
|
||||||
|
rexstr(CHARACTER_ESCAPE)
|
||||||
|
);
|
||||||
|
const ANY_ESCAPED_CHARS = unirex(
|
||||||
|
rexstr(ESCAPED_CHAR) + '*'
|
||||||
|
);
|
||||||
|
const ESCAPED_APOS = unirex(
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/)
|
||||||
|
);
|
||||||
|
const ANY_ESCAPED_APOS = unirex(
|
||||||
|
rexstr(ESCAPED_APOS) + '*'
|
||||||
|
);
|
||||||
|
const FIRST_KEY_CHAR = unirex(
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||||||
|
rexstr(NOT_INDICATOR) + '|' +
|
||||||
|
rexstr(/[?:-]/) +
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_FLOW_CHAR) + ')'
|
||||||
|
);
|
||||||
|
const FIRST_VALUE_CHAR = unirex(
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||||||
|
rexstr(NOT_INDICATOR) + '|' +
|
||||||
|
rexstr(/[?:-]/) +
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_WHITE_SPACE) + ')'
|
||||||
|
// Flow indicators are allowed in values.
|
||||||
|
);
|
||||||
|
const LATER_KEY_CHAR = unirex(
|
||||||
|
rexstr(WHITE_SPACE) + '|' +
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_FLOW_CHAR) + ')' +
|
||||||
|
rexstr(/[^:#]#?/) + '|' +
|
||||||
|
rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
|
||||||
|
);
|
||||||
|
const LATER_VALUE_CHAR = unirex(
|
||||||
|
rexstr(WHITE_SPACE) + '|' +
|
||||||
|
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||||||
|
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||||||
|
// Flow indicators are allowed in values.
|
||||||
|
rexstr(/[^:#]#?/) + '|' +
|
||||||
|
rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
|
||||||
|
);
|
||||||
|
|
||||||
|
/* YAML CONSTRUCTS */
|
||||||
|
|
||||||
|
const YAML_START = unirex(
|
||||||
|
rexstr(ANY_WHITE_SPACE) + rexstr(/---/)
|
||||||
|
);
|
||||||
|
const YAML_END = unirex(
|
||||||
|
rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/)
|
||||||
|
);
|
||||||
|
const YAML_LOOKAHEAD = unirex(
|
||||||
|
'(?=' +
|
||||||
|
rexstr(YAML_START) +
|
||||||
|
rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) +
|
||||||
|
rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) +
|
||||||
|
')'
|
||||||
|
);
|
||||||
|
const YAML_DOUBLE_QUOTE = unirex(
|
||||||
|
rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/)
|
||||||
|
);
|
||||||
|
const YAML_SINGLE_QUOTE = unirex(
|
||||||
|
rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/)
|
||||||
|
);
|
||||||
|
const YAML_SIMPLE_KEY = unirex(
|
||||||
|
rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*'
|
||||||
|
);
|
||||||
|
const YAML_SIMPLE_VALUE = unirex(
|
||||||
|
rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*'
|
||||||
|
);
|
||||||
|
const YAML_KEY = unirex(
|
||||||
|
rexstr(YAML_DOUBLE_QUOTE) + '|' +
|
||||||
|
rexstr(YAML_SINGLE_QUOTE) + '|' +
|
||||||
|
rexstr(YAML_SIMPLE_KEY)
|
||||||
|
);
|
||||||
|
const YAML_VALUE = unirex(
|
||||||
|
rexstr(YAML_DOUBLE_QUOTE) + '|' +
|
||||||
|
rexstr(YAML_SINGLE_QUOTE) + '|' +
|
||||||
|
rexstr(YAML_SIMPLE_VALUE)
|
||||||
|
);
|
||||||
|
const YAML_SEPARATOR = unirex(
|
||||||
|
rexstr(ANY_WHITE_SPACE) +
|
||||||
|
':' + rexstr(WHITE_SPACE) +
|
||||||
|
rexstr(ANY_WHITE_SPACE)
|
||||||
|
);
|
||||||
|
const YAML_LINE = unirex(
|
||||||
|
'(' + rexstr(YAML_KEY) + ')' +
|
||||||
|
rexstr(YAML_SEPARATOR) +
|
||||||
|
'(' + rexstr(YAML_VALUE) + ')'
|
||||||
|
);
|
||||||
|
|
||||||
|
/* FRONTMATTER REGEX */
|
||||||
|
|
||||||
|
const YAML_FRONTMATTER = unirex(
|
||||||
|
rexstr(POSSIBLE_STARTS) +
|
||||||
|
rexstr(YAML_LOOKAHEAD) +
|
||||||
|
rexstr(YAML_START) + rexstr(SOME_NEW_LINES) +
|
||||||
|
'(?:' +
|
||||||
|
'(' + rexstr(INDENTATION) + ')' +
|
||||||
|
rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
|
||||||
|
'(?:' +
|
||||||
|
'\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
|
||||||
|
'){0,4}' +
|
||||||
|
')?' +
|
||||||
|
rexstr(YAML_END) + rexstr(POSSIBLE_ENDS)
|
||||||
|
);
|
||||||
|
|
||||||
|
/* SEARCHES */
|
||||||
|
|
||||||
|
const FIND_YAML_LINES = unirex(
|
||||||
|
rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE)
|
||||||
|
);
|
||||||
|
|
||||||
|
/* STRING PROCESSING */
|
||||||
|
|
||||||
|
function processString(str) {
|
||||||
|
switch (str.charAt(0)) {
|
||||||
|
case '"':
|
||||||
|
return str
|
||||||
|
.substring(1, str.length - 1)
|
||||||
|
.replace(/\\0/g, '\x00')
|
||||||
|
.replace(/\\a/g, '\x07')
|
||||||
|
.replace(/\\b/g, '\x08')
|
||||||
|
.replace(/\\t/g, '\x09')
|
||||||
|
.replace(/\\n/g, '\x0a')
|
||||||
|
.replace(/\\v/g, '\x0b')
|
||||||
|
.replace(/\\f/g, '\x0c')
|
||||||
|
.replace(/\\r/g, '\x0d')
|
||||||
|
.replace(/\\e/g, '\x1b')
|
||||||
|
.replace(/\\ /g, '\x20')
|
||||||
|
.replace(/\\"/g, '\x22')
|
||||||
|
.replace(/\\\//g, '\x2f')
|
||||||
|
.replace(/\\\\/g, '\x5c')
|
||||||
|
.replace(/\\N/g, '\x85')
|
||||||
|
.replace(/\\_/g, '\xa0')
|
||||||
|
.replace(/\\L/g, '\u2028')
|
||||||
|
.replace(/\\P/g, '\u2029')
|
||||||
|
.replace(
|
||||||
|
new RegExp(
|
||||||
|
unirex(
|
||||||
|
rexstr(/\\x/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{2})'
|
||||||
|
), 'gu'
|
||||||
|
), (_, n) => String.fromCodePoint('0x' + n)
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
new RegExp(
|
||||||
|
unirex(
|
||||||
|
rexstr(/\\u/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{4})'
|
||||||
|
), 'gu'
|
||||||
|
), (_, n) => String.fromCodePoint('0x' + n)
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
new RegExp(
|
||||||
|
unirex(
|
||||||
|
rexstr(/\\U/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{8})'
|
||||||
|
), 'gu'
|
||||||
|
), (_, n) => String.fromCodePoint('0x' + n)
|
||||||
|
);
|
||||||
|
case '\'':
|
||||||
|
return str
|
||||||
|
.substring(1, str.length - 1)
|
||||||
|
.replace(/''/g, '\'');
|
||||||
|
default:
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* BIO PROCESSING */
|
||||||
|
|
||||||
|
export function processBio(content) {
|
||||||
|
content = content.replace(/"/g, '"').replace(/'/g, '\'');
|
||||||
|
let result = {
|
||||||
|
text: content,
|
||||||
|
metadata: [],
|
||||||
|
};
|
||||||
|
let yaml = content.match(YAML_FRONTMATTER);
|
||||||
|
if (!yaml) return result;
|
||||||
else yaml = yaml[0];
|
else yaml = yaml[0];
|
||||||
let start = props.text.indexOf(yaml);
|
let start = content.search(YAML_START);
|
||||||
let end = start + yaml.length;
|
let end = start + yaml.length - yaml.search(YAML_START);
|
||||||
props.text = props.text.substr(0, start) + props.text.substr(end);
|
result.text = content.substr(0, start) + content.substr(end);
|
||||||
yaml = yaml.replace(NEW_LINE, '\n');
|
let metadata = null;
|
||||||
let metadata = (yaml ? yaml.match(YAML_LINE) : []) || [];
|
let query = new RegExp(FIND_YAML_LINES, 'g');
|
||||||
for (let i = 0; i < metadata.length; i++) {
|
while ((metadata = query.exec(yaml))) {
|
||||||
let result = metadata[i].match(YAML_STRING);
|
result.metadata.push([
|
||||||
if (result[0][0] === '"' || result[0][0] === '\'') result[0] = result[0].substr(1, result[0].length - 2);
|
processString(metadata[1]),
|
||||||
if (result[1][0] === '"' || result[1][0] === '\'') result[0] = result[1].substr(1, result[1].length - 2);
|
processString(metadata[2]),
|
||||||
props.metadata.push(result);
|
]);
|
||||||
}
|
}
|
||||||
return props;
|
return result;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in new issue