fix(text-to-unicode): handle non-BMP + more conversion options
This commit is contained in:
		
							parent
							
								
									e876d03608
								
							
						
					
					
						commit
						b0ae8d7b3d
					
				| @ -1,7 +1,10 @@ | |||||||
| import { describe, expect, it } from 'vitest'; | import { describe, expect, it } from 'vitest'; | ||||||
| import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service'; | import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | ||||||
|  | 
 | ||||||
|  | describe('text-to-unicode (legacy tests)', () => { | ||||||
|  |   const convertTextToUnicode = converters.decimalEntities.escape; | ||||||
|  |   const convertUnicodeToText = converters.decimalEntities.unescape; | ||||||
| 
 | 
 | ||||||
| describe('text-to-unicode', () => { |  | ||||||
|   describe('convertTextToUnicode', () => { |   describe('convertTextToUnicode', () => { | ||||||
|     it('a text string is converted to unicode representation', () => { |     it('a text string is converted to unicode representation', () => { | ||||||
|       expect(convertTextToUnicode('A')).toBe('A'); |       expect(convertTextToUnicode('A')).toBe('A'); | ||||||
| @ -18,3 +21,71 @@ describe('text-to-unicode', () => { | |||||||
|     }); |     }); | ||||||
|   }); |   }); | ||||||
| }); | }); | ||||||
|  | 
 | ||||||
|  | describe('text-to-unicode', () => { | ||||||
|  |   interface TestConfig { | ||||||
|  |     text: string | ||||||
|  |     results: Record<ConverterId, string> | ||||||
|  |     skipPrintableAscii?: boolean | ||||||
|  |   }; | ||||||
|  |   const tests: TestConfig[] = [ | ||||||
|  |     { | ||||||
|  |       text: 'ABC', | ||||||
|  |       results: { | ||||||
|  |         fullUnicode: String.raw`\u0041\u0042\u0043`, | ||||||
|  |         utf16: String.raw`\u0041\u0042\u0043`, | ||||||
|  |         hexEntities: String.raw`ABC`, | ||||||
|  |         decimalEntities: String.raw`ABC`, | ||||||
|  |       }, | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       text: 'ABC', | ||||||
|  |       skipPrintableAscii: true, | ||||||
|  |       results: { | ||||||
|  |         fullUnicode: 'ABC', | ||||||
|  |         utf16: 'ABC', | ||||||
|  |         hexEntities: 'ABC', | ||||||
|  |         decimalEntities: 'ABC', | ||||||
|  |       }, | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       text: '文字', | ||||||
|  |       results: { | ||||||
|  |         // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |         fullUnicode: String.raw`\u6587\u5b57`, | ||||||
|  |         // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |         utf16: String.raw`\u6587\u5b57`, | ||||||
|  |         hexEntities: String.raw`文字`, | ||||||
|  |         decimalEntities: String.raw`文字`, | ||||||
|  |       }, | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       text: 'a 💩 b', | ||||||
|  |       skipPrintableAscii: true, | ||||||
|  |       results: { | ||||||
|  |         // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |         fullUnicode: String.raw`a \u{1f4a9} b`, | ||||||
|  |         // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |         utf16: String.raw`a \ud83d\udca9 b`, | ||||||
|  |         hexEntities: String.raw`a 💩 b`, | ||||||
|  |         decimalEntities: String.raw`a 💩 b`, | ||||||
|  |       }, | ||||||
|  |     }, | ||||||
|  |   ]; | ||||||
|  | 
 | ||||||
|  |   for (const { text, skipPrintableAscii: skipAscii, results } of tests) { | ||||||
|  |     describe(`${text} (skipAscii=${skipAscii})`, () => { | ||||||
|  |       for (const [key, result] of Object.entries(results)) { | ||||||
|  |         describe(key, () => { | ||||||
|  |           const converter = converters[key as ConverterId]; | ||||||
|  |           it('Escaping', () => { | ||||||
|  |             expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result); | ||||||
|  |           }); | ||||||
|  |           it('Unescaping', () => { | ||||||
|  |             expect(converter.unescape(result)).toBe(text); | ||||||
|  |           }); | ||||||
|  |         }); | ||||||
|  |       } | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  | }); | ||||||
|  | |||||||
| @ -1,9 +1,95 @@ | |||||||
| function convertTextToUnicode(text: string): string { | // regex that never matches
 | ||||||
|   return text.split('').map(value => `&#${value.charCodeAt(0)};`).join(''); | const SKIP_NOTHING_RE = /(\b\B)/; | ||||||
|  | export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g; | ||||||
|  | 
 | ||||||
|  | function _codeUnits(text: string): number[] { | ||||||
|  |   return text.split('').map(char => char.codePointAt(0)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function convertUnicodeToText(unicodeStr: string): string { | function _codePoints(text: string): number[] { | ||||||
|   return unicodeStr.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec)); |   return [...text].map(char => char.codePointAt(0)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| export { convertTextToUnicode, convertUnicodeToText }; | export interface Converter { | ||||||
|  |   name: string | ||||||
|  |   escape(text: string, skip: RegExp): string | ||||||
|  |   unescape(text: string): string | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | interface EscapeConfig { | ||||||
|  |   getCharValues?(text: string): number[] | ||||||
|  |   mapper(charValue: number): string | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | function escaper({ getCharValues, mapper }: EscapeConfig) { | ||||||
|  |   /** | ||||||
|  |    * @param text text input to escape | ||||||
|  |    * @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group. | ||||||
|  |    */ | ||||||
|  |   return (text: string, skipper?: RegExp): string => { | ||||||
|  |     skipper ??= SKIP_NOTHING_RE; | ||||||
|  |     getCharValues ??= _codePoints; | ||||||
|  | 
 | ||||||
|  |     return text | ||||||
|  |       .split(skipper) | ||||||
|  |       .flatMap((x, i) => { | ||||||
|  |         if (i % 2) { | ||||||
|  |           return x; | ||||||
|  |         } | ||||||
|  |         return getCharValues(x).map(mapper); | ||||||
|  |       }) | ||||||
|  |       .join(''); | ||||||
|  |   }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | interface UnescapeConfig { | ||||||
|  |   regex: RegExp | ||||||
|  |   radix: number | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | function unescaper({ regex, radix }: UnescapeConfig) { | ||||||
|  |   return (escaped: string): string => { | ||||||
|  |     return escaped.replace(regex, (match) => { | ||||||
|  |       return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); | ||||||
|  |     }); | ||||||
|  |   }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | export type ConverterId = keyof typeof converters; | ||||||
|  | const converters = { | ||||||
|  |   fullUnicode: { | ||||||
|  |     name: 'Full Unicode', | ||||||
|  |     escape: escaper({ mapper: convertCodePointToUnicode }), | ||||||
|  |     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), | ||||||
|  |   }, | ||||||
|  |   utf16: { | ||||||
|  |     name: 'UTF-16 Code Units', | ||||||
|  |     escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }), | ||||||
|  |     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), | ||||||
|  |   }, | ||||||
|  |   hexEntities: { | ||||||
|  |     name: 'HTML Entities (Hex)', | ||||||
|  |     escape: escaper({ mapper: toHexEntities }), | ||||||
|  |     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), | ||||||
|  |   }, | ||||||
|  |   decimalEntities: { | ||||||
|  |     name: 'HTML Entities (Decimal)', | ||||||
|  |     escape: escaper({ mapper: toDecimalEntities }), | ||||||
|  |     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), | ||||||
|  |   }, | ||||||
|  | } satisfies Record<string, Converter>; | ||||||
|  | 
 | ||||||
|  | function convertCodePointToUnicode(codePoint: number): string { | ||||||
|  |   const hex = codePoint.toString(16); | ||||||
|  |   return hex.length > 4 ? String.raw`\u{${hex}}` : String.raw`\u${hex.padStart(4, '0')}`; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | function toHexEntities(codePoint: number): string { | ||||||
|  |   return `&#x${codePoint.toString(16)};`; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | function toDecimalEntities(codePoint: number): string { | ||||||
|  |   return `&#${codePoint};`; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | export { converters }; | ||||||
|  | |||||||
| @ -1,34 +1,106 @@ | |||||||
| <script setup lang="ts"> | <script setup lang="ts"> | ||||||
| import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service'; | import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | ||||||
| import { useCopy } from '@/composable/copy'; | import { useCopy } from '@/composable/copy'; | ||||||
| 
 | 
 | ||||||
|  | const converterId = ref<ConverterId>('fullUnicode'); | ||||||
|  | const skipAscii = ref(true); | ||||||
|  | 
 | ||||||
| const inputText = ref(''); | const inputText = ref(''); | ||||||
| const unicodeFromText = computed(() => inputText.value.trim() === '' ? '' : convertTextToUnicode(inputText.value)); | const unicodeFromText = computed(() => | ||||||
|  |   inputText.value.trim() === '' | ||||||
|  |     ? '' | ||||||
|  |     : converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined), | ||||||
|  | ); | ||||||
| const { copy: copyUnicode } = useCopy({ source: unicodeFromText }); | const { copy: copyUnicode } = useCopy({ source: unicodeFromText }); | ||||||
| 
 | 
 | ||||||
| const inputUnicode = ref(''); | const inputUnicode = ref(''); | ||||||
| const textFromUnicode = computed(() => inputUnicode.value.trim() === '' ? '' : convertUnicodeToText(inputUnicode.value)); | const textFromUnicode = computed(() => | ||||||
|  |   inputUnicode.value.trim() === '' ? '' : converters[converterId.value].unescape(inputUnicode.value), | ||||||
|  | ); | ||||||
| const { copy: copyText } = useCopy({ source: textFromUnicode }); | const { copy: copyText } = useCopy({ source: textFromUnicode }); | ||||||
| </script> | </script> | ||||||
| 
 | 
 | ||||||
| <template> | <template> | ||||||
|   <c-card title="Text to Unicode"> |   <div class="outer" flex flex-col gap-6> | ||||||
|     <c-input-text v-model:value="inputText" multiline placeholder="e.g. 'Hello Avengers'" label="Enter text to convert to unicode" autosize autofocus raw-text test-id="text-to-unicode-input" /> |     <div class="controls"> | ||||||
|     <c-input-text v-model:value="unicodeFromText" label="Unicode from your text" multiline raw-text readonly mt-2 placeholder="The unicode representation of your text will be here" test-id="text-to-unicode-output" /> |       <c-select | ||||||
|  |         v-model:value="converterId" | ||||||
|  |         searchable | ||||||
|  |         label="Conversion type:" | ||||||
|  |         :options="Object.entries(converters).map(([key, val]) => ({ label: val.name, value: key }))" | ||||||
|  |       /> | ||||||
|  |     </div> | ||||||
|  |     <c-card class="card" title="Text to Unicode"> | ||||||
|  |       <c-input-text | ||||||
|  |         v-model:value="inputText" | ||||||
|  |         multiline | ||||||
|  |         placeholder="e.g. 'Hello Avengers'" | ||||||
|  |         label="Enter text to convert to Unicode" | ||||||
|  |         autosize | ||||||
|  |         autofocus | ||||||
|  |         raw-text | ||||||
|  |         test-id="text-to-unicode-input" | ||||||
|  |       /> | ||||||
|  |       <c-input-text | ||||||
|  |         v-model:value="unicodeFromText" | ||||||
|  |         label="Unicode from your text" | ||||||
|  |         multiline | ||||||
|  |         raw-text | ||||||
|  |         readonly | ||||||
|  |         mt-2 | ||||||
|  |         placeholder="The unicode representation of your text will be here" | ||||||
|  |         test-id="text-to-unicode-output" | ||||||
|  |       /> | ||||||
|  |       <div mt-2 flex justify-start> | ||||||
|  |         <n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left"> | ||||||
|  |           <n-switch v-model:value="skipAscii" /> | ||||||
|  |         </n-form-item> | ||||||
|  |       </div> | ||||||
|       <div mt-2 flex justify-center> |       <div mt-2 flex justify-center> | ||||||
|       <c-button :disabled="!unicodeFromText" @click="copyUnicode()"> |         <c-button :disabled="!unicodeFromText" @click="copyUnicode()"> Copy unicode to clipboard </c-button> | ||||||
|         Copy unicode to clipboard |  | ||||||
|       </c-button> |  | ||||||
|       </div> |       </div> | ||||||
|     </c-card> |     </c-card> | ||||||
| 
 |     <c-card class="card" title="Unicode to Text"> | ||||||
|   <c-card title="Unicode to Text"> |       <c-input-text | ||||||
|     <c-input-text v-model:value="inputUnicode" multiline placeholder="Input Unicode" label="Enter unicode to convert to text" autosize raw-text test-id="unicode-to-text-input" /> |         v-model:value="inputUnicode" | ||||||
|     <c-input-text v-model:value="textFromUnicode" label="Text from your Unicode" multiline raw-text readonly mt-2 placeholder="The text representation of your unicode will be here" test-id="unicode-to-text-output" /> |         multiline | ||||||
|  |         placeholder="Input Unicode" | ||||||
|  |         label="Enter unicode to convert to text" | ||||||
|  |         autosize | ||||||
|  |         raw-text | ||||||
|  |         test-id="unicode-to-text-input" | ||||||
|  |       /> | ||||||
|  |       <c-input-text | ||||||
|  |         v-model:value="textFromUnicode" | ||||||
|  |         label="Text from your Unicode" | ||||||
|  |         multiline | ||||||
|  |         raw-text | ||||||
|  |         readonly | ||||||
|  |         mt-2 | ||||||
|  |         placeholder="The text representation of your unicode will be here" | ||||||
|  |         test-id="unicode-to-text-output" | ||||||
|  |       /> | ||||||
|       <div mt-2 flex justify-center> |       <div mt-2 flex justify-center> | ||||||
|       <c-button :disabled="!textFromUnicode" @click="copyText()"> |         <c-button :disabled="!textFromUnicode" @click="copyText()"> Copy text to clipboard </c-button> | ||||||
|         Copy text to clipboard |  | ||||||
|       </c-button> |  | ||||||
|       </div> |       </div> | ||||||
|     </c-card> |     </c-card> | ||||||
|  |   </div> | ||||||
| </template> | </template> | ||||||
|  | 
 | ||||||
|  | <style lang="less" scoped> | ||||||
|  | .outer { | ||||||
|  |   flex: 0 1 1200px; | ||||||
|  |   margin-inline: 50px; | ||||||
|  |   display: flex; | ||||||
|  |   flex-direction: row; | ||||||
|  |   flex-wrap: wrap; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | .controls { | ||||||
|  |   flex: 0 1 100%; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | .card { | ||||||
|  |   flex: 1 0 max(40%, 500px); | ||||||
|  | } | ||||||
|  | </style> | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user