feat(new tool): Text extractor form HTML
Fix https://github.com/CorentinTh/it-tools/issues/1035
This commit is contained in:
		
							parent
							
								
									b59942ad9f
								
							
						
					
					
						commit
						089853e05b
					
				
							
								
								
									
										10
									
								
								components.d.ts
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								components.d.ts
									
									
									
									
										vendored
									
									
								
							| @ -77,6 +77,7 @@ declare module '@vue/runtime-core' { | ||||
|     EmojiPicker: typeof import('./src/tools/emoji-picker/emoji-picker.vue')['default'] | ||||
|     Encryption: typeof import('./src/tools/encryption/encryption.vue')['default'] | ||||
|     EtaCalculator: typeof import('./src/tools/eta-calculator/eta-calculator.vue')['default'] | ||||
|     ExtractTextFromHtml: typeof import('./src/tools/extract-text-from-html/extract-text-from-html.vue')['default'] | ||||
|     FavoriteButton: typeof import('./src/components/FavoriteButton.vue')['default'] | ||||
|     FormatTransformer: typeof import('./src/components/FormatTransformer.vue')['default'] | ||||
|     GitMemo: typeof import('./src/tools/git-memo/git-memo.vue')['default'] | ||||
| @ -126,25 +127,26 @@ declare module '@vue/runtime-core' { | ||||
|     MenuLayout: typeof import('./src/components/MenuLayout.vue')['default'] | ||||
|     MetaTagGenerator: typeof import('./src/tools/meta-tag-generator/meta-tag-generator.vue')['default'] | ||||
|     MimeTypes: typeof import('./src/tools/mime-types/mime-types.vue')['default'] | ||||
|     NAlert: typeof import('naive-ui')['NAlert'] | ||||
|     NavbarButtons: typeof import('./src/components/NavbarButtons.vue')['default'] | ||||
|     NCode: typeof import('naive-ui')['NCode'] | ||||
|     NCollapseTransition: typeof import('naive-ui')['NCollapseTransition'] | ||||
|     NColorPicker: typeof import('naive-ui')['NColorPicker'] | ||||
|     NConfigProvider: typeof import('naive-ui')['NConfigProvider'] | ||||
|     NDivider: typeof import('naive-ui')['NDivider'] | ||||
|     NEllipsis: typeof import('naive-ui')['NEllipsis'] | ||||
|     NFormItem: typeof import('naive-ui')['NFormItem'] | ||||
|     NGi: typeof import('naive-ui')['NGi'] | ||||
|     NGrid: typeof import('naive-ui')['NGrid'] | ||||
|     NH1: typeof import('naive-ui')['NH1'] | ||||
|     NH3: typeof import('naive-ui')['NH3'] | ||||
|     NIcon: typeof import('naive-ui')['NIcon'] | ||||
|     NInputGroup: typeof import('naive-ui')['NInputGroup'] | ||||
|     NInputGroupLabel: typeof import('naive-ui')['NInputGroupLabel'] | ||||
|     NInputNumber: typeof import('naive-ui')['NInputNumber'] | ||||
|     NLabel: typeof import('naive-ui')['NLabel'] | ||||
|     NLayout: typeof import('naive-ui')['NLayout'] | ||||
|     NLayoutSider: typeof import('naive-ui')['NLayoutSider'] | ||||
|     NMenu: typeof import('naive-ui')['NMenu'] | ||||
|     NScrollbar: typeof import('naive-ui')['NScrollbar'] | ||||
|     NSpin: typeof import('naive-ui')['NSpin'] | ||||
|     NSwitch: typeof import('naive-ui')['NSwitch'] | ||||
|     NumeronymGenerator: typeof import('./src/tools/numeronym-generator/numeronym-generator.vue')['default'] | ||||
|     OtpCodeGeneratorAndValidator: typeof import('./src/tools/otp-code-generator-and-validator/otp-code-generator-and-validator.vue')['default'] | ||||
|     PasswordStrengthAnalyser: typeof import('./src/tools/password-strength-analyser/password-strength-analyser.vue')['default'] | ||||
|  | ||||
| @ -0,0 +1,17 @@ | ||||
| import { test, expect } from '@playwright/test'; | ||||
| 
 | ||||
| test.describe('Tool - Extract text from html', () => { | ||||
|   test.beforeEach(async ({ page }) => { | ||||
|     await page.goto('/extract-text-from-html'); | ||||
|   }); | ||||
| 
 | ||||
|   test('Has correct title', async ({ page }) => { | ||||
|     await expect(page).toHaveTitle('Extract text from HTML'); | ||||
|   }); | ||||
| 
 | ||||
|   test('Extract text from HTML', async ({ page }) => { | ||||
|     await page.getByTestId('input').fill('<p>Paste your HTML in the input form on the left</p>'); | ||||
|     const extractedText = await page.getByTestId('area-content').innerText(); | ||||
|     expect(extractedText.trim()).toEqual('Paste your HTML in the input form on the left'.trim()); | ||||
|   }); | ||||
| }); | ||||
| @ -0,0 +1,36 @@ | ||||
| import { expect, describe, it } from 'vitest'; | ||||
| import { getTextFromHtml, validateHtml } from './extract-text-from-html.service'; | ||||
| 
 | ||||
| describe('extract-text-from-html service', () => { | ||||
|   describe('validateHtml', () => { | ||||
|     it('check if the value is valid html', () => { | ||||
|       expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy(); | ||||
|       expect(validateHtml('<div>Paste your HTML in the input form on the left</div>')).toBeTruthy(); | ||||
|       expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p></div>')).toBeTruthy(); | ||||
|       expect(validateHtml('<body><div><p>Paste your HTML in the input form on the left</p></div></body>')).toBeTruthy(); | ||||
|       expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy(); | ||||
|     }); | ||||
| 
 | ||||
|     it('check if the value is an html invlid', () => { | ||||
|       expect(validateHtml('<p>Paste your HTML in the input form on the left<p>')).toBeFalsy(); | ||||
|       expect(validateHtml('Paste your HTML in the input form on the left<p>')).toBeFalsy(); | ||||
|       expect(validateHtml('<p>Paste your HTML in the input form on the left')).toBeFalsy(); | ||||
|       expect(validateHtml('<p>Paste your HTML in the input form on the left<>')).toBeFalsy(); | ||||
|       expect(validateHtml('<>Paste your HTML in the input form on the left<>')).toBeFalsy(); | ||||
|       expect(validateHtml('<p>Paste your HTML in the input form on the left</a>')).toBeFalsy(); | ||||
|       expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p>')).toBeTruthy(); | ||||
|     }); | ||||
|   }); | ||||
| 
 | ||||
|   describe('getTextFromHtml', () => { | ||||
|     it('must be return a string', () => { | ||||
|       expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toString(); | ||||
|     }); | ||||
| 
 | ||||
|     it('must be return text from html', () => { | ||||
|       expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toStrictEqual( | ||||
|         'Paste your HTML in the input form on the left', | ||||
|       ); | ||||
|     }); | ||||
|   }); | ||||
| }); | ||||
| @ -0,0 +1,21 @@ | ||||
| function validateHtml(value: string) { | ||||
|   try { | ||||
|     new DOMParser().parseFromString(value, 'text/html'); | ||||
|   } catch (error) { | ||||
|     return false; | ||||
|   } | ||||
| 
 | ||||
|   const regex = /<([a-z][a-z0-9]*)\b[^>]*>(.*?)<\/\1>|<([a-z][a-z0-9]*)\b[^\/]*\/>/gi; | ||||
|   const matches = value.match(regex); | ||||
| 
 | ||||
|   return Boolean(matches !== null && matches.length); | ||||
| } | ||||
| 
 | ||||
| function getTextFromHtml(value: string) { | ||||
|   const element = document.createElement('div'); | ||||
|   element.innerHTML = value; | ||||
|   const text = element?.innerText || element?.textContent || ''; | ||||
|   return text.replace(/\s+/g, ' '); | ||||
| } | ||||
| 
 | ||||
| export { validateHtml, getTextFromHtml }; | ||||
							
								
								
									
										33
									
								
								src/tools/extract-text-from-html/extract-text-from-html.vue
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								src/tools/extract-text-from-html/extract-text-from-html.vue
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,33 @@ | ||||
| <script setup lang="ts"> | ||||
| import { getTextFromHtml, validateHtml } from './extract-text-from-html.service'; | ||||
| import { withDefaultOnError } from '@/utils/defaults'; | ||||
| import type { UseValidationRule } from '@/composable/validation'; | ||||
| 
 | ||||
| function transformer(value: string) { | ||||
|   return withDefaultOnError(() => { | ||||
|     if (value === '') { | ||||
|       return ''; | ||||
|     } | ||||
|     return getTextFromHtml(value); | ||||
|   }, ''); | ||||
| } | ||||
| 
 | ||||
| const rules: UseValidationRule<string>[] = [ | ||||
|   { | ||||
|     validator: (value: string) => value === '' || validateHtml(value), | ||||
|     message: 'Provided HTML is not valid.', | ||||
|   }, | ||||
| ]; | ||||
| </script> | ||||
| 
 | ||||
| <template> | ||||
|   <format-transformer | ||||
|     input-label="Your raw HTML" | ||||
|     input-placeholder="Paste your raw HTML here..." | ||||
|     output-label="Text from your HTML" | ||||
|     :input-validation-rules="rules" | ||||
|     :transformer="transformer" | ||||
|   /> | ||||
| </template> | ||||
| 
 | ||||
| <style lang="less" scoped></style> | ||||
							
								
								
									
										13
									
								
								src/tools/extract-text-from-html/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								src/tools/extract-text-from-html/index.ts
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| import { CursorText } from '@vicons/tabler'; | ||||
| import { defineTool } from '../tool'; | ||||
| 
 | ||||
| export const tool = defineTool({ | ||||
|   name: 'Extract text from HTML', | ||||
|   path: '/extract-text-from-html', | ||||
|   description: | ||||
|     'Paste your HTML in the input form on the left and you will get text instantly. Occasionally, you may need to extract plain text from an HTML page where CSS properties (like user-select: none;) prevent text selection. The typical workaround involves using the DevTools (F12) to select "Copy → outer HTML". The proposed tool would simplify this process by extracting the "inner Text" directly from the copied HTML.', | ||||
|   keywords: ['extract', 'text', 'from', 'html'], | ||||
|   component: () => import('./extract-text-from-html.vue'), | ||||
|   icon: CursorText, | ||||
|   createdAt: new Date('2024-05-10'), | ||||
| }); | ||||
| @ -1,6 +1,7 @@ | ||||
| import { tool as base64FileConverter } from './base64-file-converter'; | ||||
| import { tool as base64StringConverter } from './base64-string-converter'; | ||||
| import { tool as basicAuthGenerator } from './basic-auth-generator'; | ||||
| import { tool as extractTextFromHtml } from './extract-text-from-html'; | ||||
| 
 | ||||
| import { tool as asciiTextDrawer } from './ascii-text-drawer'; | ||||
| 
 | ||||
| @ -148,6 +149,7 @@ export const toolsByCategory: ToolCategory[] = [ | ||||
|       dockerRunToDockerComposeConverter, | ||||
|       xmlFormatter, | ||||
|       yamlViewer, | ||||
|       extractTextFromHtml, | ||||
|     ], | ||||
|   }, | ||||
|   { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user