Always escape ASCII chars with special meaning
This commit is contained in:
		
							parent
							
								
									b0ae8d7b3d
								
							
						
					
					
						commit
						1dc965d9a8
					
				| @ -10,16 +10,18 @@ test.describe('Tool - Text to Unicode', () => { | ||||
|   }); | ||||
| 
 | ||||
|   test('Text to unicode conversion', async ({ page }) => { | ||||
|     await page.getByTestId('text-to-unicode-input').fill('it-tools'); | ||||
|     await page.getByTestId('text-to-unicode-input').fill('"it-tools" 文字'); | ||||
|     const unicode = await page.getByTestId('text-to-unicode-output').inputValue(); | ||||
| 
 | ||||
|     expect(unicode).toEqual('it-tools'); | ||||
|     // eslint-disable-next-line unicorn/escape-case
 | ||||
|     expect(unicode).toEqual(String.raw`\u0022it-tools\u0022 \u6587\u5b57`); | ||||
|   }); | ||||
| 
 | ||||
|   test('Unicode to text conversion', async ({ page }) => { | ||||
|     await page.getByTestId('unicode-to-text-input').fill('it-tools'); | ||||
|     // eslint-disable-next-line unicorn/escape-case
 | ||||
|     await page.getByTestId('unicode-to-text-input').fill(String.raw`\u0022it-tools\u0022 \u6587\u5b57`); | ||||
|     const text = await page.getByTestId('unicode-to-text-output').inputValue(); | ||||
| 
 | ||||
|     expect(text).toEqual('it-tools'); | ||||
|     expect(text).toEqual('"it-tools" 文字'); | ||||
|   }); | ||||
| }); | ||||
|  | ||||
| @ -1,8 +1,8 @@ | ||||
| import { describe, expect, it } from 'vitest'; | ||||
| import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | ||||
| import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './text-to-unicode.service'; | ||||
| 
 | ||||
| describe('text-to-unicode (legacy tests)', () => { | ||||
|   const convertTextToUnicode = converters.decimalEntities.escape; | ||||
|   const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); | ||||
|   const convertUnicodeToText = converters.decimalEntities.unescape; | ||||
| 
 | ||||
|   describe('convertTextToUnicode', () => { | ||||
| @ -22,6 +22,23 @@ describe('text-to-unicode (legacy tests)', () => { | ||||
|   }); | ||||
| }); | ||||
| 
 | ||||
| const ALL_PRINTABLE_ASCII = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'; | ||||
| 
 | ||||
| describe('text-to-unicode regexes', () => { | ||||
|   // eslint-disable-next-line prefer-regex-literals
 | ||||
|   const skipAsciiJs = new RegExp(String.raw`([[ -~]--['"\\]]+)`, 'gv'); | ||||
|   // eslint-disable-next-line prefer-regex-literals
 | ||||
|   const skipAsciiHtml = new RegExp(String.raw`([[ -~]--[<>&'"]]+)`, 'gv'); | ||||
| 
 | ||||
|   it('regexes are equivalent to `v`-flag versions', () => { | ||||
|     // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
 | ||||
|     // regexes in `text-to-unicode.service.ts` can be replaced with `v`-flag versions once unicodeSets reaches
 | ||||
|     // sufficient in-browser support
 | ||||
|     expect(ALL_PRINTABLE_ASCII.match(skipAsciiJs)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_JS)); | ||||
|     expect(ALL_PRINTABLE_ASCII.match(skipAsciiHtml)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_HTML)); | ||||
|   }); | ||||
| }); | ||||
| 
 | ||||
| describe('text-to-unicode', () => { | ||||
|   interface TestConfig { | ||||
|     text: string | ||||
| @ -48,6 +65,18 @@ describe('text-to-unicode', () => { | ||||
|         decimalEntities: 'ABC', | ||||
|       }, | ||||
|     }, | ||||
|     { | ||||
|       text: ALL_PRINTABLE_ASCII, | ||||
|       skipPrintableAscii: true, | ||||
|       results: { | ||||
|         // eslint-disable-next-line unicorn/escape-case
 | ||||
|         fullUnicode: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||
|         // eslint-disable-next-line unicorn/escape-case
 | ||||
|         utf16: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||
|         hexEntities: String.raw` !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||
|         decimalEntities: String.raw` !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||
|       }, | ||||
|     }, | ||||
|     { | ||||
|       text: '文字', | ||||
|       results: { | ||||
| @ -79,7 +108,7 @@ describe('text-to-unicode', () => { | ||||
|         describe(key, () => { | ||||
|           const converter = converters[key as ConverterId]; | ||||
|           it('Escaping', () => { | ||||
|             expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result); | ||||
|             expect(converter.escape(text, skipAscii)).toBe(result); | ||||
|           }); | ||||
|           it('Unescaping', () => { | ||||
|             expect(converter.unescape(result)).toBe(text); | ||||
|  | ||||
| @ -1,37 +1,35 @@ | ||||
| // regex that never matches
 | ||||
| const SKIP_NOTHING_RE = /(\b\B)/; | ||||
| export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g; | ||||
| export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g; | ||||
| export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g; | ||||
| 
 | ||||
| function _codeUnits(text: string): number[] { | ||||
| function codeUnits(text: string): number[] { | ||||
|   return text.split('').map(char => char.codePointAt(0)); | ||||
| } | ||||
| 
 | ||||
| function _codePoints(text: string): number[] { | ||||
| function codePoints(text: string): number[] { | ||||
|   return [...text].map(char => char.codePointAt(0)); | ||||
| } | ||||
| 
 | ||||
| export interface Converter { | ||||
|   name: string | ||||
|   escape(text: string, skip: RegExp): string | ||||
|   escape(text: string, skipAscii: boolean): string | ||||
|   unescape(text: string): string | ||||
| }; | ||||
| 
 | ||||
| interface EscapeConfig { | ||||
|   getCharValues?(text: string): number[] | ||||
|   charValues?(text: string): number[] | ||||
|   mapper(charValue: number): string | ||||
|   /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ | ||||
|   asciiSkipper: RegExp | ||||
| }; | ||||
| 
 | ||||
| function escaper({ getCharValues, mapper }: EscapeConfig) { | ||||
|   /** | ||||
|    * @param text text input to escape | ||||
|    * @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group. | ||||
|    */ | ||||
|   return (text: string, skipper?: RegExp): string => { | ||||
|     skipper ??= SKIP_NOTHING_RE; | ||||
|     getCharValues ??= _codePoints; | ||||
| function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) { | ||||
|   return (text: string, skip: boolean): string => { | ||||
|     getCharValues ??= codePoints; | ||||
| 
 | ||||
|     return text | ||||
|       .split(skipper) | ||||
|       .split(skip ? skipper : SKIP_NOTHING_RE) | ||||
|       .flatMap((x, i) => { | ||||
|         if (i % 2) { | ||||
|           return x; | ||||
| @ -59,22 +57,22 @@ export type ConverterId = keyof typeof converters; | ||||
| const converters = { | ||||
|   fullUnicode: { | ||||
|     name: 'Full Unicode', | ||||
|     escape: escaper({ mapper: convertCodePointToUnicode }), | ||||
|     escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), | ||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), | ||||
|   }, | ||||
|   utf16: { | ||||
|     name: 'UTF-16 Code Units', | ||||
|     escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }), | ||||
|     escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), | ||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), | ||||
|   }, | ||||
|   hexEntities: { | ||||
|     name: 'HTML Entities (Hex)', | ||||
|     escape: escaper({ mapper: toHexEntities }), | ||||
|     escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), | ||||
|     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), | ||||
|   }, | ||||
|   decimalEntities: { | ||||
|     name: 'HTML Entities (Decimal)', | ||||
|     escape: escaper({ mapper: toDecimalEntities }), | ||||
|     escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), | ||||
|     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), | ||||
|   }, | ||||
| } satisfies Record<string, Converter>; | ||||
|  | ||||
| @ -1,5 +1,5 @@ | ||||
| <script setup lang="ts"> | ||||
| import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | ||||
| import { type ConverterId, converters } from './text-to-unicode.service'; | ||||
| import { useCopy } from '@/composable/copy'; | ||||
| 
 | ||||
| const converterId = ref<ConverterId>('fullUnicode'); | ||||
| @ -9,7 +9,7 @@ const inputText = ref(''); | ||||
| const unicodeFromText = computed(() => | ||||
|   inputText.value.trim() === '' | ||||
|     ? '' | ||||
|     : converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined), | ||||
|     : converters[converterId.value].escape(inputText.value, skipAscii.value), | ||||
| ); | ||||
| const { copy: copyUnicode } = useCopy({ source: unicodeFromText }); | ||||
| 
 | ||||
| @ -52,7 +52,7 @@ const { copy: copyText } = useCopy({ source: textFromUnicode }); | ||||
|         test-id="text-to-unicode-output" | ||||
|       /> | ||||
|       <div mt-2 flex justify-start> | ||||
|         <n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left"> | ||||
|         <n-form-item label="Skip ASCII chars with no special meaning?" :show-feedback="false" label-placement="left"> | ||||
|           <n-switch v-model:value="skipAscii" /> | ||||
|         </n-form-item> | ||||
|       </div> | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user