Convert Converter to class
This commit is contained in:
		
							parent
							
								
									1dc965d9a8
								
							
						
					
					
						commit
						d75473b2e8
					
				| @ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './ | ||||
| 
 | ||||
| describe('text-to-unicode (legacy tests)', () => { | ||||
|   const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); | ||||
|   const convertUnicodeToText = converters.decimalEntities.unescape; | ||||
|   const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped); | ||||
| 
 | ||||
|   describe('convertTextToUnicode', () => { | ||||
|     it('a text string is converted to unicode representation', () => { | ||||
|  | ||||
| @ -11,70 +11,63 @@ function codePoints(text: string): number[] { | ||||
|   return [...text].map(char => char.codePointAt(0)); | ||||
| } | ||||
| 
 | ||||
| export interface Converter { | ||||
| interface ConverterConfig { | ||||
|   name: string | ||||
|   escape(text: string, skipAscii: boolean): string | ||||
|   unescape(text: string): string | ||||
| }; | ||||
|   escape: { | ||||
|     charValues?(text: string): number[] | ||||
|     mapper(charValue: number): string | ||||
|     /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ | ||||
|     asciiSkipper: RegExp | ||||
|   } | ||||
|   unescape: { | ||||
|     regex: RegExp | ||||
|     radix: number | ||||
|   } | ||||
| } | ||||
| class Converter { | ||||
|   constructor(public config: ConverterConfig) {} | ||||
| 
 | ||||
| interface EscapeConfig { | ||||
|   charValues?(text: string): number[] | ||||
|   mapper(charValue: number): string | ||||
|   /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ | ||||
|   asciiSkipper: RegExp | ||||
| }; | ||||
| 
 | ||||
| function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) { | ||||
|   return (text: string, skip: boolean): string => { | ||||
|     getCharValues ??= codePoints; | ||||
|   escape(text: string, skipAscii: boolean): string { | ||||
|     const { asciiSkipper, charValues, mapper } = this.config.escape; | ||||
|     const getCharValues = charValues ?? codePoints; | ||||
| 
 | ||||
|     return text | ||||
|       .split(skip ? skipper : SKIP_NOTHING_RE) | ||||
|       .flatMap((x, i) => { | ||||
|         if (i % 2) { | ||||
|           return x; | ||||
|         } | ||||
|         return getCharValues(x).map(mapper); | ||||
|       }) | ||||
|       .split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE) | ||||
|       .flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper)) | ||||
|       .join(''); | ||||
|   }; | ||||
| } | ||||
|   } | ||||
| 
 | ||||
| interface UnescapeConfig { | ||||
|   regex: RegExp | ||||
|   radix: number | ||||
| }; | ||||
|   unescape(escaped: string): string { | ||||
|     const { regex, radix } = this.config.unescape; | ||||
| 
 | ||||
| function unescaper({ regex, radix }: UnescapeConfig) { | ||||
|   return (escaped: string): string => { | ||||
|     return escaped.replace(regex, (match) => { | ||||
|       return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); | ||||
|     }); | ||||
|   }; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| export type ConverterId = keyof typeof converters; | ||||
| const converters = { | ||||
|   fullUnicode: { | ||||
|   fullUnicode: new Converter({ | ||||
|     name: 'Full Unicode', | ||||
|     escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), | ||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), | ||||
|   }, | ||||
|   utf16: { | ||||
|     escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }, | ||||
|     unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }, | ||||
|   }), | ||||
|   utf16: new Converter({ | ||||
|     name: 'UTF-16 Code Units', | ||||
|     escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), | ||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), | ||||
|   }, | ||||
|   hexEntities: { | ||||
|     escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }, | ||||
|     unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 }, | ||||
|   }), | ||||
|   hexEntities: new Converter({ | ||||
|     name: 'HTML Entities (Hex)', | ||||
|     escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), | ||||
|     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), | ||||
|   }, | ||||
|   decimalEntities: { | ||||
|     escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }, | ||||
|     unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }, | ||||
|   }), | ||||
|   decimalEntities: new Converter({ | ||||
|     name: 'HTML Entities (Decimal)', | ||||
|     escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), | ||||
|     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), | ||||
|   }, | ||||
|     escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }, | ||||
|     unescape: { regex: /&#\d+;/gu, radix: 10 }, | ||||
|   }), | ||||
| } satisfies Record<string, Converter>; | ||||
| 
 | ||||
| function convertCodePointToUnicode(codePoint: number): string { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user