fix(text-to-unicode): handle non-BMP + more conversion options
This commit is contained in:
		
							parent
							
								
									e876d03608
								
							
						
					
					
						commit
						b0ae8d7b3d
					
				| @ -1,7 +1,10 @@ | ||||
| import { describe, expect, it } from 'vitest'; | ||||
| import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service'; | ||||
| import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | ||||
| 
 | ||||
| describe('text-to-unicode (legacy tests)', () => { | ||||
|   const convertTextToUnicode = converters.decimalEntities.escape; | ||||
|   const convertUnicodeToText = converters.decimalEntities.unescape; | ||||
| 
 | ||||
| describe('text-to-unicode', () => { | ||||
|   describe('convertTextToUnicode', () => { | ||||
|     it('a text string is converted to unicode representation', () => { | ||||
|       expect(convertTextToUnicode('A')).toBe('A'); | ||||
| @ -18,3 +21,71 @@ describe('text-to-unicode', () => { | ||||
|     }); | ||||
|   }); | ||||
| }); | ||||
| 
 | ||||
| describe('text-to-unicode', () => { | ||||
|   interface TestConfig { | ||||
|     text: string | ||||
|     results: Record<ConverterId, string> | ||||
|     skipPrintableAscii?: boolean | ||||
|   }; | ||||
|   const tests: TestConfig[] = [ | ||||
|     { | ||||
|       text: 'ABC', | ||||
|       results: { | ||||
|         fullUnicode: String.raw`\u0041\u0042\u0043`, | ||||
|         utf16: String.raw`\u0041\u0042\u0043`, | ||||
|         hexEntities: String.raw`ABC`, | ||||
|         decimalEntities: String.raw`ABC`, | ||||
|       }, | ||||
|     }, | ||||
|     { | ||||
|       text: 'ABC', | ||||
|       skipPrintableAscii: true, | ||||
|       results: { | ||||
|         fullUnicode: 'ABC', | ||||
|         utf16: 'ABC', | ||||
|         hexEntities: 'ABC', | ||||
|         decimalEntities: 'ABC', | ||||
|       }, | ||||
|     }, | ||||
|     { | ||||
|       text: '文字', | ||||
|       results: { | ||||
|         // eslint-disable-next-line unicorn/escape-case
 | ||||
|         fullUnicode: String.raw`\u6587\u5b57`, | ||||
|         // eslint-disable-next-line unicorn/escape-case
 | ||||
|         utf16: String.raw`\u6587\u5b57`, | ||||
|         hexEntities: String.raw`文字`, | ||||
|         decimalEntities: String.raw`文字`, | ||||
|       }, | ||||
|     }, | ||||
|     { | ||||
|       text: 'a 💩 b', | ||||
|       skipPrintableAscii: true, | ||||
|       results: { | ||||
|         // eslint-disable-next-line unicorn/escape-case
 | ||||
|         fullUnicode: String.raw`a \u{1f4a9} b`, | ||||
|         // eslint-disable-next-line unicorn/escape-case
 | ||||
|         utf16: String.raw`a \ud83d\udca9 b`, | ||||
|         hexEntities: String.raw`a 💩 b`, | ||||
|         decimalEntities: String.raw`a 💩 b`, | ||||
|       }, | ||||
|     }, | ||||
|   ]; | ||||
| 
 | ||||
|   for (const { text, skipPrintableAscii: skipAscii, results } of tests) { | ||||
|     describe(`${text} (skipAscii=${skipAscii})`, () => { | ||||
|       for (const [key, result] of Object.entries(results)) { | ||||
|         describe(key, () => { | ||||
|           const converter = converters[key as ConverterId]; | ||||
|           it('Escaping', () => { | ||||
|             expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result); | ||||
|           }); | ||||
|           it('Unescaping', () => { | ||||
|             expect(converter.unescape(result)).toBe(text); | ||||
|           }); | ||||
|         }); | ||||
|       } | ||||
|     }); | ||||
|   } | ||||
| }); | ||||
|  | ||||
| @ -1,9 +1,95 @@ | ||||
| function convertTextToUnicode(text: string): string { | ||||
|   return text.split('').map(value => `&#${value.charCodeAt(0)};`).join(''); | ||||
| // regex that never matches
 | ||||
| const SKIP_NOTHING_RE = /(\b\B)/; | ||||
| export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g; | ||||
| 
 | ||||
| function _codeUnits(text: string): number[] { | ||||
|   return text.split('').map(char => char.codePointAt(0)); | ||||
| } | ||||
| 
 | ||||
| function convertUnicodeToText(unicodeStr: string): string { | ||||
|   return unicodeStr.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec)); | ||||
| function _codePoints(text: string): number[] { | ||||
|   return [...text].map(char => char.codePointAt(0)); | ||||
| } | ||||
| 
 | ||||
| export { convertTextToUnicode, convertUnicodeToText }; | ||||
| export interface Converter { | ||||
|   name: string | ||||
|   escape(text: string, skip: RegExp): string | ||||
|   unescape(text: string): string | ||||
| }; | ||||
| 
 | ||||
| interface EscapeConfig { | ||||
|   getCharValues?(text: string): number[] | ||||
|   mapper(charValue: number): string | ||||
| }; | ||||
| 
 | ||||
| function escaper({ getCharValues, mapper }: EscapeConfig) { | ||||
|   /** | ||||
|    * @param text text input to escape | ||||
|    * @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group. | ||||
|    */ | ||||
|   return (text: string, skipper?: RegExp): string => { | ||||
|     skipper ??= SKIP_NOTHING_RE; | ||||
|     getCharValues ??= _codePoints; | ||||
| 
 | ||||
|     return text | ||||
|       .split(skipper) | ||||
|       .flatMap((x, i) => { | ||||
|         if (i % 2) { | ||||
|           return x; | ||||
|         } | ||||
|         return getCharValues(x).map(mapper); | ||||
|       }) | ||||
|       .join(''); | ||||
|   }; | ||||
| } | ||||
| 
 | ||||
| interface UnescapeConfig { | ||||
|   regex: RegExp | ||||
|   radix: number | ||||
| }; | ||||
| 
 | ||||
| function unescaper({ regex, radix }: UnescapeConfig) { | ||||
|   return (escaped: string): string => { | ||||
|     return escaped.replace(regex, (match) => { | ||||
|       return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); | ||||
|     }); | ||||
|   }; | ||||
| } | ||||
| 
 | ||||
| export type ConverterId = keyof typeof converters; | ||||
| const converters = { | ||||
|   fullUnicode: { | ||||
|     name: 'Full Unicode', | ||||
|     escape: escaper({ mapper: convertCodePointToUnicode }), | ||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), | ||||
|   }, | ||||
|   utf16: { | ||||
|     name: 'UTF-16 Code Units', | ||||
|     escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }), | ||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), | ||||
|   }, | ||||
|   hexEntities: { | ||||
|     name: 'HTML Entities (Hex)', | ||||
|     escape: escaper({ mapper: toHexEntities }), | ||||
|     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), | ||||
|   }, | ||||
|   decimalEntities: { | ||||
|     name: 'HTML Entities (Decimal)', | ||||
|     escape: escaper({ mapper: toDecimalEntities }), | ||||
|     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), | ||||
|   }, | ||||
| } satisfies Record<string, Converter>; | ||||
| 
 | ||||
| function convertCodePointToUnicode(codePoint: number): string { | ||||
|   const hex = codePoint.toString(16); | ||||
|   return hex.length > 4 ? String.raw`\u{${hex}}` : String.raw`\u${hex.padStart(4, '0')}`; | ||||
| } | ||||
| 
 | ||||
| function toHexEntities(codePoint: number): string { | ||||
|   return `&#x${codePoint.toString(16)};`; | ||||
| } | ||||
| 
 | ||||
| function toDecimalEntities(codePoint: number): string { | ||||
|   return `&#${codePoint};`; | ||||
| } | ||||
| 
 | ||||
| export { converters }; | ||||
|  | ||||
| @ -1,34 +1,106 @@ | ||||
| <script setup lang="ts"> | ||||
| import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service'; | ||||
| import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | ||||
| import { useCopy } from '@/composable/copy'; | ||||
| 
 | ||||
| const converterId = ref<ConverterId>('fullUnicode'); | ||||
| const skipAscii = ref(true); | ||||
| 
 | ||||
| const inputText = ref(''); | ||||
| const unicodeFromText = computed(() => inputText.value.trim() === '' ? '' : convertTextToUnicode(inputText.value)); | ||||
| const unicodeFromText = computed(() => | ||||
|   inputText.value.trim() === '' | ||||
|     ? '' | ||||
|     : converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined), | ||||
| ); | ||||
| const { copy: copyUnicode } = useCopy({ source: unicodeFromText }); | ||||
| 
 | ||||
| const inputUnicode = ref(''); | ||||
| const textFromUnicode = computed(() => inputUnicode.value.trim() === '' ? '' : convertUnicodeToText(inputUnicode.value)); | ||||
| const textFromUnicode = computed(() => | ||||
|   inputUnicode.value.trim() === '' ? '' : converters[converterId.value].unescape(inputUnicode.value), | ||||
| ); | ||||
| const { copy: copyText } = useCopy({ source: textFromUnicode }); | ||||
| </script> | ||||
| 
 | ||||
| <template> | ||||
|   <c-card title="Text to Unicode"> | ||||
|     <c-input-text v-model:value="inputText" multiline placeholder="e.g. 'Hello Avengers'" label="Enter text to convert to unicode" autosize autofocus raw-text test-id="text-to-unicode-input" /> | ||||
|     <c-input-text v-model:value="unicodeFromText" label="Unicode from your text" multiline raw-text readonly mt-2 placeholder="The unicode representation of your text will be here" test-id="text-to-unicode-output" /> | ||||
|   <div class="outer" flex flex-col gap-6> | ||||
|     <div class="controls"> | ||||
|       <c-select | ||||
|         v-model:value="converterId" | ||||
|         searchable | ||||
|         label="Conversion type:" | ||||
|         :options="Object.entries(converters).map(([key, val]) => ({ label: val.name, value: key }))" | ||||
|       /> | ||||
|     </div> | ||||
|     <c-card class="card" title="Text to Unicode"> | ||||
|       <c-input-text | ||||
|         v-model:value="inputText" | ||||
|         multiline | ||||
|         placeholder="e.g. 'Hello Avengers'" | ||||
|         label="Enter text to convert to Unicode" | ||||
|         autosize | ||||
|         autofocus | ||||
|         raw-text | ||||
|         test-id="text-to-unicode-input" | ||||
|       /> | ||||
|       <c-input-text | ||||
|         v-model:value="unicodeFromText" | ||||
|         label="Unicode from your text" | ||||
|         multiline | ||||
|         raw-text | ||||
|         readonly | ||||
|         mt-2 | ||||
|         placeholder="The unicode representation of your text will be here" | ||||
|         test-id="text-to-unicode-output" | ||||
|       /> | ||||
|       <div mt-2 flex justify-start> | ||||
|         <n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left"> | ||||
|           <n-switch v-model:value="skipAscii" /> | ||||
|         </n-form-item> | ||||
|       </div> | ||||
|       <div mt-2 flex justify-center> | ||||
|       <c-button :disabled="!unicodeFromText" @click="copyUnicode()"> | ||||
|         Copy unicode to clipboard | ||||
|       </c-button> | ||||
|         <c-button :disabled="!unicodeFromText" @click="copyUnicode()"> Copy unicode to clipboard </c-button> | ||||
|       </div> | ||||
|     </c-card> | ||||
| 
 | ||||
|   <c-card title="Unicode to Text"> | ||||
|     <c-input-text v-model:value="inputUnicode" multiline placeholder="Input Unicode" label="Enter unicode to convert to text" autosize raw-text test-id="unicode-to-text-input" /> | ||||
|     <c-input-text v-model:value="textFromUnicode" label="Text from your Unicode" multiline raw-text readonly mt-2 placeholder="The text representation of your unicode will be here" test-id="unicode-to-text-output" /> | ||||
|     <c-card class="card" title="Unicode to Text"> | ||||
|       <c-input-text | ||||
|         v-model:value="inputUnicode" | ||||
|         multiline | ||||
|         placeholder="Input Unicode" | ||||
|         label="Enter unicode to convert to text" | ||||
|         autosize | ||||
|         raw-text | ||||
|         test-id="unicode-to-text-input" | ||||
|       /> | ||||
|       <c-input-text | ||||
|         v-model:value="textFromUnicode" | ||||
|         label="Text from your Unicode" | ||||
|         multiline | ||||
|         raw-text | ||||
|         readonly | ||||
|         mt-2 | ||||
|         placeholder="The text representation of your unicode will be here" | ||||
|         test-id="unicode-to-text-output" | ||||
|       /> | ||||
|       <div mt-2 flex justify-center> | ||||
|       <c-button :disabled="!textFromUnicode" @click="copyText()"> | ||||
|         Copy text to clipboard | ||||
|       </c-button> | ||||
|         <c-button :disabled="!textFromUnicode" @click="copyText()"> Copy text to clipboard </c-button> | ||||
|       </div> | ||||
|     </c-card> | ||||
|   </div> | ||||
| </template> | ||||
| 
 | ||||
| <style lang="less" scoped> | ||||
| .outer { | ||||
|   flex: 0 1 1200px; | ||||
|   margin-inline: 50px; | ||||
|   display: flex; | ||||
|   flex-direction: row; | ||||
|   flex-wrap: wrap; | ||||
| } | ||||
| 
 | ||||
| .controls { | ||||
|   flex: 0 1 100%; | ||||
| } | ||||
| 
 | ||||
| .card { | ||||
|   flex: 1 0 max(40%, 500px); | ||||
| } | ||||
| </style> | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user