Updates to bio metadata script
This commit is contained in:
		
							parent
							
								
									f5e2469485
								
							
						
					
					
						commit
						21bafc6555
					
				
					 1 changed files with 68 additions and 157 deletions
				
			
		|  | @ -69,6 +69,10 @@ functions are: | ||||||
|   easier to read and to maintain. I leave it to the future readers of |   easier to read and to maintain. I leave it to the future readers of | ||||||
|   this code to determine the extent of my successes in this endeavor. |   this code to determine the extent of my successes in this endeavor. | ||||||
| 
 | 
 | ||||||
|  |   UPDATE 19 Oct 2017: We no longer allow character escapes inside our | ||||||
|  |   double-quoted strings for ease of processing. We now internally use | ||||||
|  |   the name "ƔAML" in our code to clarify that this is Not Quite YAML. | ||||||
|  | 
 | ||||||
|                                        Sending love + warmth eternal, |                                        Sending love + warmth eternal, | ||||||
|                                        - kibigo [@kibi@glitch.social] |                                        - kibigo [@kibi@glitch.social] | ||||||
| 
 | 
 | ||||||
|  | @ -96,10 +100,7 @@ const ALLOWED_CHAR      =  unirex( //  `c-printable` in the YAML 1.2 spec. | ||||||
|     compat_mode ? '[\t\n\r\x20-\x7e\x85\xa0-\ufffd]' : '[\t\n\r\x20-\x7e\x85\xa0-\ud7ff\ue000-\ufffd\u{10000}-\u{10FFFF}]' |     compat_mode ? '[\t\n\r\x20-\x7e\x85\xa0-\ufffd]' : '[\t\n\r\x20-\x7e\x85\xa0-\ud7ff\ue000-\ufffd\u{10000}-\u{10FFFF}]' | ||||||
|   ); |   ); | ||||||
| const WHITE_SPACE       = /[ \t]/; | const WHITE_SPACE       = /[ \t]/; | ||||||
| const INDENTATION       = / */;  //  Indentation must be only spaces.
 |  | ||||||
| const LINE_BREAK        = /\r?\n|\r|<br\s*\/?>/; | const LINE_BREAK        = /\r?\n|\r|<br\s*\/?>/; | ||||||
| const ESCAPE_CHAR       = /[0abt\tnvfre "\/\\N_LP]/; |  | ||||||
| const HEXADECIMAL_CHARS = /[0-9a-fA-F]/; |  | ||||||
| const INDICATOR         = /[-?:,[\]{}&#*!|>'"%@`]/; | const INDICATOR         = /[-?:,[\]{}&#*!|>'"%@`]/; | ||||||
| const FLOW_CHAR         = /[,[\]{}]/; | const FLOW_CHAR         = /[,[\]{}]/; | ||||||
| 
 | 
 | ||||||
|  | @ -121,7 +122,7 @@ const NEW_LINE          = unirex( | ||||||
|   rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) |   rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) | ||||||
| ); | ); | ||||||
| const SOME_NEW_LINES    = unirex( | const SOME_NEW_LINES    = unirex( | ||||||
|   '(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+' |   '(?:' + rexstr(NEW_LINE) + ')+' | ||||||
| ); | ); | ||||||
| const POSSIBLE_STARTS   = unirex( | const POSSIBLE_STARTS   = unirex( | ||||||
|   rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?' |   rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?' | ||||||
|  | @ -131,22 +132,13 @@ const POSSIBLE_ENDS     = unirex( | ||||||
|   rexstr(DOCUMENT_END) + '|' + |   rexstr(DOCUMENT_END) + '|' + | ||||||
|   rexstr(/<\/p>/) |   rexstr(/<\/p>/) | ||||||
| ); | ); | ||||||
| const CHARACTER_ESCAPE  = unirex( | const QUOTE_CHAR         = unirex( | ||||||
|   rexstr(/\\/) + |   '(?=' + rexstr(NOT_LINE_BREAK) + ')[^"]' | ||||||
|   '(?:' + |  | ||||||
|     rexstr(ESCAPE_CHAR) + '|' + |  | ||||||
|     rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' + |  | ||||||
|     rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' + |  | ||||||
|     rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' + |  | ||||||
|   ')' |  | ||||||
| ); | ); | ||||||
| const ESCAPED_CHAR      = unirex( | const ANY_QUOTE_CHAR    = unirex( | ||||||
|   rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' + |   rexstr(QUOTE_CHAR) + '*' | ||||||
|   rexstr(CHARACTER_ESCAPE) |  | ||||||
| ); |  | ||||||
| const ANY_ESCAPED_CHARS = unirex( |  | ||||||
|   rexstr(ESCAPED_CHAR) + '*' |  | ||||||
| ); | ); | ||||||
|  | 
 | ||||||
| const ESCAPED_APOS      = unirex( | const ESCAPED_APOS      = unirex( | ||||||
|   '(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/) |   '(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/) | ||||||
| ); | ); | ||||||
|  | @ -190,120 +182,76 @@ const LATER_VALUE_CHAR  = unirex( | ||||||
| 
 | 
 | ||||||
| /*  YAML CONSTRUCTS  */ | /*  YAML CONSTRUCTS  */ | ||||||
| 
 | 
 | ||||||
| const YAML_START        = unirex( | const ƔAML_START        = unirex( | ||||||
|   rexstr(ANY_WHITE_SPACE) + rexstr(/---/) |   rexstr(ANY_WHITE_SPACE) + '---' | ||||||
| ); | ); | ||||||
| const YAML_END          = unirex( | const ƔAML_END          = unirex( | ||||||
|   rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/) |   rexstr(ANY_WHITE_SPACE) + '(?:---|\.\.\.)' | ||||||
| ); | ); | ||||||
| const YAML_LOOKAHEAD    = unirex( | const ƔAML_LOOKAHEAD    = unirex( | ||||||
|   '(?=' + |   '(?=' + | ||||||
|     rexstr(YAML_START) + |     rexstr(ƔAML_START) + | ||||||
|     rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) + |     rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) + | ||||||
|     rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) + |     rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS) + | ||||||
|   ')' |   ')' | ||||||
| ); | ); | ||||||
| const YAML_DOUBLE_QUOTE = unirex( | const ƔAML_DOUBLE_QUOTE = unirex( | ||||||
|   rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/) |   '"' + rexstr(ANY_QUOTE_CHAR) + '"' | ||||||
| ); | ); | ||||||
| const YAML_SINGLE_QUOTE = unirex( | const ƔAML_SINGLE_QUOTE = unirex( | ||||||
|   rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/) |   '\'' + rexstr(ANY_ESCAPED_APOS) + '\'' | ||||||
| ); | ); | ||||||
| const YAML_SIMPLE_KEY   = unirex( | const ƔAML_SIMPLE_KEY   = unirex( | ||||||
|   rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*' |   rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*' | ||||||
| ); | ); | ||||||
| const YAML_SIMPLE_VALUE = unirex( | const ƔAML_SIMPLE_VALUE = unirex( | ||||||
|   rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*' |   rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*' | ||||||
| ); | ); | ||||||
| const YAML_KEY          = unirex( | const ƔAML_KEY          = unirex( | ||||||
|   rexstr(YAML_DOUBLE_QUOTE) + '|' + |   rexstr(ƔAML_DOUBLE_QUOTE) + '|' + | ||||||
|   rexstr(YAML_SINGLE_QUOTE) + '|' + |   rexstr(ƔAML_SINGLE_QUOTE) + '|' + | ||||||
|   rexstr(YAML_SIMPLE_KEY) |   rexstr(ƔAML_SIMPLE_KEY) | ||||||
| ); | ); | ||||||
| const YAML_VALUE        = unirex( | const ƔAML_VALUE        = unirex( | ||||||
|   rexstr(YAML_DOUBLE_QUOTE) + '|' + |   rexstr(ƔAML_DOUBLE_QUOTE) + '|' + | ||||||
|   rexstr(YAML_SINGLE_QUOTE) + '|' + |   rexstr(ƔAML_SINGLE_QUOTE) + '|' + | ||||||
|   rexstr(YAML_SIMPLE_VALUE) |   rexstr(ƔAML_SIMPLE_VALUE) | ||||||
| ); | ); | ||||||
| const YAML_SEPARATOR    = unirex( | const ƔAML_SEPARATOR    = unirex( | ||||||
|   rexstr(ANY_WHITE_SPACE) + |   rexstr(ANY_WHITE_SPACE) + | ||||||
|   ':' + rexstr(WHITE_SPACE) + |   ':' + rexstr(WHITE_SPACE) + | ||||||
|   rexstr(ANY_WHITE_SPACE) |   rexstr(ANY_WHITE_SPACE) | ||||||
| ); | ); | ||||||
| const YAML_LINE         = unirex( | const ƔAML_LINE         = unirex( | ||||||
|   '(' + rexstr(YAML_KEY) + ')' + |   '(' + rexstr(ƔAML_KEY) + ')' + | ||||||
|   rexstr(YAML_SEPARATOR) + |   rexstr(ƔAML_SEPARATOR) + | ||||||
|   '(' + rexstr(YAML_VALUE) + ')' |   '(' + rexstr(ƔAML_VALUE) + ')' | ||||||
| ); | ); | ||||||
| 
 | 
 | ||||||
| /*  FRONTMATTER REGEX  */ | /*  FRONTMATTER REGEX  */ | ||||||
| 
 | 
 | ||||||
| const YAML_FRONTMATTER  = unirex( | const ƔAML_FRONTMATTER  = unirex( | ||||||
|   rexstr(POSSIBLE_STARTS) + |   rexstr(POSSIBLE_STARTS) + | ||||||
|   rexstr(YAML_LOOKAHEAD) + |   rexstr(ƔAML_LOOKAHEAD) + | ||||||
|   rexstr(YAML_START) + rexstr(SOME_NEW_LINES) + |   rexstr(ƔAML_START) + rexstr(SOME_NEW_LINES) + | ||||||
|   '(?:' + |   '(?:' + | ||||||
|     '(' + rexstr(INDENTATION) + ')' + |     rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE) + rexstr(SOME_NEW_LINES) + | ||||||
|     rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) + |   '){0,5}' + | ||||||
|     '(?:' + |   rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS) | ||||||
|       '\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) + |  | ||||||
|     '){0,4}' + |  | ||||||
|   ')?' + |  | ||||||
|   rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) |  | ||||||
| ); | ); | ||||||
| 
 | 
 | ||||||
| /*  SEARCHES  */ | /*  SEARCHES  */ | ||||||
| 
 | 
 | ||||||
| const FIND_YAML_LINES   = unirex( | const FIND_ƔAML_LINE    = unirex( | ||||||
|   rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE) |   rexstr(NEW_LINE) + rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE) | ||||||
| ); | ); | ||||||
| 
 | 
 | ||||||
| /*  STRING PROCESSING  */ | /*  STRING PROCESSING  */ | ||||||
| 
 | 
 | ||||||
| function processString(str) { | function processString (str) { | ||||||
|   switch (str.charAt(0)) { |   switch (str.charAt(0)) { | ||||||
|   case '"': |   case '"': | ||||||
|     return str |     return str.substring(1, str.length - 1); | ||||||
|       .substring(1, str.length - 1) |  | ||||||
|       .replace(/\\0/g, '\x00') |  | ||||||
|       .replace(/\\a/g, '\x07') |  | ||||||
|       .replace(/\\b/g, '\x08') |  | ||||||
|       .replace(/\\t/g, '\x09') |  | ||||||
|       .replace(/\\\x09/g, '\x09') |  | ||||||
|       .replace(/\\n/g, '\x0a') |  | ||||||
|       .replace(/\\v/g, '\x0b') |  | ||||||
|       .replace(/\\f/g, '\x0c') |  | ||||||
|       .replace(/\\r/g, '\x0d') |  | ||||||
|       .replace(/\\e/g, '\x1b') |  | ||||||
|       .replace(/\\ /g, '\x20') |  | ||||||
|       .replace(/\\"/g, '\x22') |  | ||||||
|       .replace(/\\\//g, '\x2f') |  | ||||||
|       .replace(/\\\\/g, '\x5c') |  | ||||||
|       .replace(/\\N/g, '\x85') |  | ||||||
|       .replace(/\\_/g, '\xa0') |  | ||||||
|       .replace(/\\L/g, '\u2028') |  | ||||||
|       .replace(/\\P/g, '\u2029') |  | ||||||
|       .replace( |  | ||||||
|         new RegExp( |  | ||||||
|           unirex( |  | ||||||
|             rexstr(/\\x/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{2})' |  | ||||||
|           ), 'gu' |  | ||||||
|         ), (_, n) => String.fromCodePoint('0x' + n) |  | ||||||
|       ) |  | ||||||
|       .replace( |  | ||||||
|         new RegExp( |  | ||||||
|           unirex( |  | ||||||
|             rexstr(/\\u/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{4})' |  | ||||||
|           ), 'gu' |  | ||||||
|         ), (_, n) => String.fromCodePoint('0x' + n) |  | ||||||
|       ) |  | ||||||
|       .replace( |  | ||||||
|         new RegExp( |  | ||||||
|           unirex( |  | ||||||
|             rexstr(/\\U/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{8})' |  | ||||||
|           ), 'gu' |  | ||||||
|         ), (_, n) => String.fromCodePoint('0x' + n) |  | ||||||
|       ); |  | ||||||
|   case '\'': |   case '\'': | ||||||
|     return str |     return str | ||||||
|       .substring(1, str.length - 1) |       .substring(1, str.length - 1) | ||||||
|  | @ -321,15 +269,18 @@ export function processBio(content) { | ||||||
|     text: content, |     text: content, | ||||||
|     metadata: [], |     metadata: [], | ||||||
|   }; |   }; | ||||||
|   let yaml = content.match(YAML_FRONTMATTER); |   let ɣaml = content.match(ƔAML_FRONTMATTER); | ||||||
|   if (!yaml) return result; |   if (!ɣaml) { | ||||||
|   else yaml = yaml[0]; |     return result; | ||||||
|   let start = content.search(YAML_START); |   } else { | ||||||
|   let end = start + yaml.length - yaml.search(YAML_START); |     ɣaml = ɣaml[0]; | ||||||
|   result.text = content.substr(0, start) + content.substr(end); |   } | ||||||
|  |   const start = content.search(ƔAML_START); | ||||||
|  |   const end = start + ɣaml.length - ɣaml.search(ƔAML_START); | ||||||
|  |   result.text = content.substr(end); | ||||||
|   let metadata = null; |   let metadata = null; | ||||||
|   let query = new RegExp(FIND_YAML_LINES, 'g'); |   let query = new RegExp(rexstr(FIND_ƔAML_LINE), 'g');  //  Some browsers don't allow flags unless both args are strings
 | ||||||
|   while ((metadata = query.exec(yaml))) { |   while ((metadata = query.exec(ɣaml))) { | ||||||
|     result.metadata.push([ |     result.metadata.push([ | ||||||
|       processString(metadata[1]), |       processString(metadata[1]), | ||||||
|       processString(metadata[2]), |       processString(metadata[2]), | ||||||
|  | @ -352,63 +303,23 @@ export function createBio(note, data) { | ||||||
|         let val = '' + data[i][1]; |         let val = '' + data[i][1]; | ||||||
| 
 | 
 | ||||||
|         //  Key processing
 |         //  Key processing
 | ||||||
|         if (key === (key.match(YAML_SIMPLE_KEY) || [])[0]) /*  do nothing  */; |         if (key === (key.match(ƔAML_SIMPLE_KEY) || [])[0]) /*  do nothing  */; | ||||||
|         else if (key.indexOf('\'') === -1 && key === (key.match(ANY_ESCAPED_APOS) || [])[0]) key = '\'' + key + '\''; |         else if (key === (key.match(ANY_QUOTE_CHAR) || [])[0]) key = '"' + key + '"'; | ||||||
|         else { |         else { | ||||||
|           key = key |           key = key | ||||||
|             .replace(/\x00/g, '\\0') |             .replace(/'/g, '\'\'') | ||||||
|             .replace(/\x07/g, '\\a') |             .replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '<27>'); | ||||||
|             .replace(/\x08/g, '\\b') |           key = '\'' + key + '\''; | ||||||
|             .replace(/\x0a/g, '\\n') |  | ||||||
|             .replace(/\x0b/g, '\\v') |  | ||||||
|             .replace(/\x0c/g, '\\f') |  | ||||||
|             .replace(/\x0d/g, '\\r') |  | ||||||
|             .replace(/\x1b/g, '\\e') |  | ||||||
|             .replace(/\x22/g, '\\"') |  | ||||||
|             .replace(/\x5c/g, '\\\\'); |  | ||||||
|           let badchars = key.match( |  | ||||||
|             new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu') |  | ||||||
|           ) || []; |  | ||||||
|           for (let j = 0; j < badchars.length; j++) { |  | ||||||
|             key = key.replace( |  | ||||||
|               badchars[i], |  | ||||||
|               '\\u' + badchars[i].codePointAt(0).toLocaleString('en', { |  | ||||||
|                 useGrouping: false, |  | ||||||
|                 minimumIntegerDigits: 4, |  | ||||||
|               }) |  | ||||||
|             ); |  | ||||||
|           } |  | ||||||
|           key = '"' + key + '"'; |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         //  Value processing
 |         //  Value processing
 | ||||||
|         if (val === (val.match(YAML_SIMPLE_VALUE) || [])[0]) /*  do nothing  */; |         if (val === (val.match(ƔAML_SIMPLE_VALUE) || [])[0]) /*  do nothing  */; | ||||||
|         else if (val.indexOf('\'') === -1 && val === (val.match(ANY_ESCAPED_APOS) || [])[0]) val = '\'' + val + '\''; |         else if (val === (val.match(ANY_QUOTE_CHAR) || [])[0]) val = '"' + val + '"'; | ||||||
|         else { |         else { | ||||||
|           val = val |           key = key | ||||||
|             .replace(/\x00/g, '\\0') |             .replace(/'/g, '\'\'') | ||||||
|             .replace(/\x07/g, '\\a') |             .replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '<27>'); | ||||||
|             .replace(/\x08/g, '\\b') |           key = '\'' + key + '\''; | ||||||
|             .replace(/\x0a/g, '\\n') |  | ||||||
|             .replace(/\x0b/g, '\\v') |  | ||||||
|             .replace(/\x0c/g, '\\f') |  | ||||||
|             .replace(/\x0d/g, '\\r') |  | ||||||
|             .replace(/\x1b/g, '\\e') |  | ||||||
|             .replace(/\x22/g, '\\"') |  | ||||||
|             .replace(/\x5c/g, '\\\\'); |  | ||||||
|           let badchars = val.match( |  | ||||||
|             new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu') |  | ||||||
|           ) || []; |  | ||||||
|           for (let j = 0; j < badchars.length; j++) { |  | ||||||
|             val = val.replace( |  | ||||||
|               badchars[i], |  | ||||||
|               '\\u' + badchars[i].codePointAt(0).toLocaleString('en', { |  | ||||||
|                 useGrouping: false, |  | ||||||
|                 minimumIntegerDigits: 4, |  | ||||||
|               }) |  | ||||||
|             ); |  | ||||||
|           } |  | ||||||
|           val = '"' + val + '"'; |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         frontmatter += key + ': ' + val + '\n'; |         frontmatter += key + ': ' + val + '\n'; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue