Always escape ASCII chars with special meaning
This commit is contained in:
		
							parent
							
								
									b0ae8d7b3d
								
							
						
					
					
						commit
						1dc965d9a8
					
				| @ -10,16 +10,18 @@ test.describe('Tool - Text to Unicode', () => { | |||||||
|   }); |   }); | ||||||
| 
 | 
 | ||||||
|   test('Text to unicode conversion', async ({ page }) => { |   test('Text to unicode conversion', async ({ page }) => { | ||||||
|     await page.getByTestId('text-to-unicode-input').fill('it-tools'); |     await page.getByTestId('text-to-unicode-input').fill('"it-tools" 文字'); | ||||||
|     const unicode = await page.getByTestId('text-to-unicode-output').inputValue(); |     const unicode = await page.getByTestId('text-to-unicode-output').inputValue(); | ||||||
| 
 | 
 | ||||||
|     expect(unicode).toEqual('it-tools'); |     // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |     expect(unicode).toEqual(String.raw`\u0022it-tools\u0022 \u6587\u5b57`); | ||||||
|   }); |   }); | ||||||
| 
 | 
 | ||||||
|   test('Unicode to text conversion', async ({ page }) => { |   test('Unicode to text conversion', async ({ page }) => { | ||||||
|     await page.getByTestId('unicode-to-text-input').fill('it-tools'); |     // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |     await page.getByTestId('unicode-to-text-input').fill(String.raw`\u0022it-tools\u0022 \u6587\u5b57`); | ||||||
|     const text = await page.getByTestId('unicode-to-text-output').inputValue(); |     const text = await page.getByTestId('unicode-to-text-output').inputValue(); | ||||||
| 
 | 
 | ||||||
|     expect(text).toEqual('it-tools'); |     expect(text).toEqual('"it-tools" 文字'); | ||||||
|   }); |   }); | ||||||
| }); | }); | ||||||
|  | |||||||
| @ -1,8 +1,8 @@ | |||||||
| import { describe, expect, it } from 'vitest'; | import { describe, expect, it } from 'vitest'; | ||||||
| import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './text-to-unicode.service'; | ||||||
| 
 | 
 | ||||||
| describe('text-to-unicode (legacy tests)', () => { | describe('text-to-unicode (legacy tests)', () => { | ||||||
|   const convertTextToUnicode = converters.decimalEntities.escape; |   const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); | ||||||
|   const convertUnicodeToText = converters.decimalEntities.unescape; |   const convertUnicodeToText = converters.decimalEntities.unescape; | ||||||
| 
 | 
 | ||||||
|   describe('convertTextToUnicode', () => { |   describe('convertTextToUnicode', () => { | ||||||
| @ -22,6 +22,23 @@ describe('text-to-unicode (legacy tests)', () => { | |||||||
|   }); |   }); | ||||||
| }); | }); | ||||||
| 
 | 
 | ||||||
|  | const ALL_PRINTABLE_ASCII = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'; | ||||||
|  | 
 | ||||||
|  | describe('text-to-unicode regexes', () => { | ||||||
|  |   // eslint-disable-next-line prefer-regex-literals
 | ||||||
|  |   const skipAsciiJs = new RegExp(String.raw`([[ -~]--['"\\]]+)`, 'gv'); | ||||||
|  |   // eslint-disable-next-line prefer-regex-literals
 | ||||||
|  |   const skipAsciiHtml = new RegExp(String.raw`([[ -~]--[<>&'"]]+)`, 'gv'); | ||||||
|  | 
 | ||||||
|  |   it('regexes are equivalent to `v`-flag versions', () => { | ||||||
|  |     // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
 | ||||||
|  |     // regexes in `text-to-unicode.service.ts` can be replaced with `v`-flag versions once unicodeSets reaches
 | ||||||
|  |     // sufficient in-browser support
 | ||||||
|  |     expect(ALL_PRINTABLE_ASCII.match(skipAsciiJs)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_JS)); | ||||||
|  |     expect(ALL_PRINTABLE_ASCII.match(skipAsciiHtml)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_HTML)); | ||||||
|  |   }); | ||||||
|  | }); | ||||||
|  | 
 | ||||||
| describe('text-to-unicode', () => { | describe('text-to-unicode', () => { | ||||||
|   interface TestConfig { |   interface TestConfig { | ||||||
|     text: string |     text: string | ||||||
| @ -48,6 +65,18 @@ describe('text-to-unicode', () => { | |||||||
|         decimalEntities: 'ABC', |         decimalEntities: 'ABC', | ||||||
|       }, |       }, | ||||||
|     }, |     }, | ||||||
|  |     { | ||||||
|  |       text: ALL_PRINTABLE_ASCII, | ||||||
|  |       skipPrintableAscii: true, | ||||||
|  |       results: { | ||||||
|  |         // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |         fullUnicode: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||||
|  |         // eslint-disable-next-line unicorn/escape-case
 | ||||||
|  |         utf16: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||||
|  |         hexEntities: String.raw` !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||||
|  |         decimalEntities: String.raw` !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`, | ||||||
|  |       }, | ||||||
|  |     }, | ||||||
|     { |     { | ||||||
|       text: '文字', |       text: '文字', | ||||||
|       results: { |       results: { | ||||||
| @ -79,7 +108,7 @@ describe('text-to-unicode', () => { | |||||||
|         describe(key, () => { |         describe(key, () => { | ||||||
|           const converter = converters[key as ConverterId]; |           const converter = converters[key as ConverterId]; | ||||||
|           it('Escaping', () => { |           it('Escaping', () => { | ||||||
|             expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result); |             expect(converter.escape(text, skipAscii)).toBe(result); | ||||||
|           }); |           }); | ||||||
|           it('Unescaping', () => { |           it('Unescaping', () => { | ||||||
|             expect(converter.unescape(result)).toBe(text); |             expect(converter.unescape(result)).toBe(text); | ||||||
|  | |||||||
| @ -1,37 +1,35 @@ | |||||||
| // regex that never matches
 | // regex that never matches
 | ||||||
| const SKIP_NOTHING_RE = /(\b\B)/; | const SKIP_NOTHING_RE = /(\b\B)/; | ||||||
| export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g; | export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g; | ||||||
|  | export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g; | ||||||
| 
 | 
 | ||||||
| function _codeUnits(text: string): number[] { | function codeUnits(text: string): number[] { | ||||||
|   return text.split('').map(char => char.codePointAt(0)); |   return text.split('').map(char => char.codePointAt(0)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function _codePoints(text: string): number[] { | function codePoints(text: string): number[] { | ||||||
|   return [...text].map(char => char.codePointAt(0)); |   return [...text].map(char => char.codePointAt(0)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| export interface Converter { | export interface Converter { | ||||||
|   name: string |   name: string | ||||||
|   escape(text: string, skip: RegExp): string |   escape(text: string, skipAscii: boolean): string | ||||||
|   unescape(text: string): string |   unescape(text: string): string | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| interface EscapeConfig { | interface EscapeConfig { | ||||||
|   getCharValues?(text: string): number[] |   charValues?(text: string): number[] | ||||||
|   mapper(charValue: number): string |   mapper(charValue: number): string | ||||||
|  |   /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ | ||||||
|  |   asciiSkipper: RegExp | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| function escaper({ getCharValues, mapper }: EscapeConfig) { | function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) { | ||||||
|   /** |   return (text: string, skip: boolean): string => { | ||||||
|    * @param text text input to escape |     getCharValues ??= codePoints; | ||||||
|    * @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group. |  | ||||||
|    */ |  | ||||||
|   return (text: string, skipper?: RegExp): string => { |  | ||||||
|     skipper ??= SKIP_NOTHING_RE; |  | ||||||
|     getCharValues ??= _codePoints; |  | ||||||
| 
 | 
 | ||||||
|     return text |     return text | ||||||
|       .split(skipper) |       .split(skip ? skipper : SKIP_NOTHING_RE) | ||||||
|       .flatMap((x, i) => { |       .flatMap((x, i) => { | ||||||
|         if (i % 2) { |         if (i % 2) { | ||||||
|           return x; |           return x; | ||||||
| @ -59,22 +57,22 @@ export type ConverterId = keyof typeof converters; | |||||||
| const converters = { | const converters = { | ||||||
|   fullUnicode: { |   fullUnicode: { | ||||||
|     name: 'Full Unicode', |     name: 'Full Unicode', | ||||||
|     escape: escaper({ mapper: convertCodePointToUnicode }), |     escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), | ||||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), |     unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), | ||||||
|   }, |   }, | ||||||
|   utf16: { |   utf16: { | ||||||
|     name: 'UTF-16 Code Units', |     name: 'UTF-16 Code Units', | ||||||
|     escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }), |     escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), | ||||||
|     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), |     unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), | ||||||
|   }, |   }, | ||||||
|   hexEntities: { |   hexEntities: { | ||||||
|     name: 'HTML Entities (Hex)', |     name: 'HTML Entities (Hex)', | ||||||
|     escape: escaper({ mapper: toHexEntities }), |     escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), | ||||||
|     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), |     unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), | ||||||
|   }, |   }, | ||||||
|   decimalEntities: { |   decimalEntities: { | ||||||
|     name: 'HTML Entities (Decimal)', |     name: 'HTML Entities (Decimal)', | ||||||
|     escape: escaper({ mapper: toDecimalEntities }), |     escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), | ||||||
|     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), |     unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), | ||||||
|   }, |   }, | ||||||
| } satisfies Record<string, Converter>; | } satisfies Record<string, Converter>; | ||||||
|  | |||||||
| @ -1,5 +1,5 @@ | |||||||
| <script setup lang="ts"> | <script setup lang="ts"> | ||||||
| import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service'; | import { type ConverterId, converters } from './text-to-unicode.service'; | ||||||
| import { useCopy } from '@/composable/copy'; | import { useCopy } from '@/composable/copy'; | ||||||
| 
 | 
 | ||||||
| const converterId = ref<ConverterId>('fullUnicode'); | const converterId = ref<ConverterId>('fullUnicode'); | ||||||
| @ -9,7 +9,7 @@ const inputText = ref(''); | |||||||
| const unicodeFromText = computed(() => | const unicodeFromText = computed(() => | ||||||
|   inputText.value.trim() === '' |   inputText.value.trim() === '' | ||||||
|     ? '' |     ? '' | ||||||
|     : converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined), |     : converters[converterId.value].escape(inputText.value, skipAscii.value), | ||||||
| ); | ); | ||||||
| const { copy: copyUnicode } = useCopy({ source: unicodeFromText }); | const { copy: copyUnicode } = useCopy({ source: unicodeFromText }); | ||||||
| 
 | 
 | ||||||
| @ -52,7 +52,7 @@ const { copy: copyText } = useCopy({ source: textFromUnicode }); | |||||||
|         test-id="text-to-unicode-output" |         test-id="text-to-unicode-output" | ||||||
|       /> |       /> | ||||||
|       <div mt-2 flex justify-start> |       <div mt-2 flex justify-start> | ||||||
|         <n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left"> |         <n-form-item label="Skip ASCII chars with no special meaning?" :show-feedback="false" label-placement="left"> | ||||||
|           <n-switch v-model:value="skipAscii" /> |           <n-switch v-model:value="skipAscii" /> | ||||||
|         </n-form-item> |         </n-form-item> | ||||||
|       </div> |       </div> | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user