Convert Converter to class
This commit is contained in:
		
							parent
							
								
									1dc965d9a8
								
							
						
					
					
						commit
						d75473b2e8
					
				| @ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './ | |||||||
| 
 | 
 | ||||||
| describe('text-to-unicode (legacy tests)', () => { | describe('text-to-unicode (legacy tests)', () => { | ||||||
|   const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); |   const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); | ||||||
|   const convertUnicodeToText = converters.decimalEntities.unescape; |   const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped); | ||||||
| 
 | 
 | ||||||
|   describe('convertTextToUnicode', () => { |   describe('convertTextToUnicode', () => { | ||||||
|     it('a text string is converted to unicode representation', () => { |     it('a text string is converted to unicode representation', () => { | ||||||
|  | |||||||
| @ -11,70 +11,63 @@ function codePoints(text: string): number[] { | |||||||
|   return [...text].map(char => char.codePointAt(0)); |   return [...text].map(char => char.codePointAt(0)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| export interface Converter { | interface ConverterConfig { | ||||||
|   name: string |   name: string | ||||||
|   escape(text: string, skipAscii: boolean): string |   escape: { | ||||||
|   unescape(text: string): string |     charValues?(text: string): number[] | ||||||
| }; |     mapper(charValue: number): string | ||||||
|  |     /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ | ||||||
|  |     asciiSkipper: RegExp | ||||||
|  |   } | ||||||
|  |   unescape: { | ||||||
|  |     regex: RegExp | ||||||
|  |     radix: number | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | class Converter { | ||||||
|  |   constructor(public config: ConverterConfig) {} | ||||||
| 
 | 
 | ||||||
| interface EscapeConfig { |   escape(text: string, skipAscii: boolean): string { | ||||||
|   charValues?(text: string): number[] |     const { asciiSkipper, charValues, mapper } = this.config.escape; | ||||||
|   mapper(charValue: number): string |     const getCharValues = charValues ?? codePoints; | ||||||
|   /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ |  | ||||||
|   asciiSkipper: RegExp |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) { |  | ||||||
|   return (text: string, skip: boolean): string => { |  | ||||||
|     getCharValues ??= codePoints; |  | ||||||
| 
 | 
 | ||||||
|     return text |     return text | ||||||
|       .split(skip ? skipper : SKIP_NOTHING_RE) |       .split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE) | ||||||
|       .flatMap((x, i) => { |       .flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper)) | ||||||
|         if (i % 2) { |  | ||||||
|           return x; |  | ||||||
|         } |  | ||||||
|         return getCharValues(x).map(mapper); |  | ||||||
|       }) |  | ||||||
|       .join(''); |       .join(''); | ||||||
|   }; |   } | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| interface UnescapeConfig { |   unescape(escaped: string): string { | ||||||
|   regex: RegExp |     const { regex, radix } = this.config.unescape; | ||||||
|   radix: number |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| function unescaper({ regex, radix }: UnescapeConfig) { |  | ||||||
|   return (escaped: string): string => { |  | ||||||
|     return escaped.replace(regex, (match) => { |     return escaped.replace(regex, (match) => { | ||||||
|       return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); |       return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); | ||||||
|     }); |     }); | ||||||
|   }; |   } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| export type ConverterId = keyof typeof converters; | export type ConverterId = keyof typeof converters; | ||||||
| const converters = { | const converters = { | ||||||
|   fullUnicode: { |   fullUnicode: new Converter({ | ||||||
|     name: 'Full Unicode', |     name: 'Full Unicode', | ||||||
|     escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), |     escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }, | ||||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), |     unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }, | ||||||
|   }, |   }), | ||||||
|   utf16: { |   utf16: new Converter({ | ||||||
|     name: 'UTF-16 Code Units', |     name: 'UTF-16 Code Units', | ||||||
|     escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), |     escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }, | ||||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), |     unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 }, | ||||||
|   }, |   }), | ||||||
|   hexEntities: { |   hexEntities: new Converter({ | ||||||
|     name: 'HTML Entities (Hex)', |     name: 'HTML Entities (Hex)', | ||||||
|     escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), |     escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }, | ||||||
|     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), |     unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }, | ||||||
|   }, |   }), | ||||||
|   decimalEntities: { |   decimalEntities: new Converter({ | ||||||
|     name: 'HTML Entities (Decimal)', |     name: 'HTML Entities (Decimal)', | ||||||
|     escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), |     escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }, | ||||||
|     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), |     unescape: { regex: /&#\d+;/gu, radix: 10 }, | ||||||
|   }, |   }), | ||||||
| } satisfies Record<string, Converter>; | } satisfies Record<string, Converter>; | ||||||
| 
 | 
 | ||||||
| function convertCodePointToUnicode(codePoint: number): string { | function convertCodePointToUnicode(codePoint: number): string { | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user