initial commit

This commit is contained in:
amorozov 2024-09-16 15:32:22 +03:00
commit 02d0a1162b
2 changed files with 110 additions and 0 deletions

8
go.mod Normal file
View File

@ -0,0 +1,8 @@
module git.tswf.io/incredible-go/incredible-go-core-text-encoding
go 1.20
require (
github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394
golang.org/x/text v0.18.0
)

View File

@ -0,0 +1,102 @@
package stringencoding
import (
"fmt"
"io/ioutil"
"strings"
"github.com/axgle/mahonia"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/encoding/unicode/utf32"
"golang.org/x/text/transform"
)
func DecodeString(encodingName string, input []byte) (string, error) {
if encodingName == "" {
return "", fmt.Errorf("empty encoding string")
}
if input == nil || len(input) == 0 {
return "", fmt.Errorf("empty or null input")
}
var decoder *encoding.Decoder
// Определяем декодер в зависимости от кодировки
switch strings.ToLower(encodingName) {
case "utf-8":
decoder = unicode.UTF8.NewDecoder()
case "utf-16":
decoder = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()
case "utf-16be":
decoder = unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewDecoder()
case "utf-32":
decoder = utf32.UTF32(utf32.LittleEndian, utf32.UseBOM).NewDecoder()
case "utf-32be":
decoder = utf32.UTF32(utf32.BigEndian, utf32.UseBOM).NewDecoder()
case "windows-1252":
decoder = charmap.Windows1252.NewDecoder()
case "iso-8859-1":
decoder = charmap.ISO8859_1.NewDecoder()
case "iso-8859-2":
decoder = charmap.ISO8859_2.NewDecoder()
case "iso-8859-3":
decoder = charmap.ISO8859_3.NewDecoder()
case "iso-8859-4":
decoder = charmap.ISO8859_4.NewDecoder()
case "iso-8859-5":
decoder = charmap.ISO8859_5.NewDecoder()
case "iso-8859-6":
decoder = charmap.ISO8859_6.NewDecoder()
case "iso-8859-7":
decoder = charmap.ISO8859_7.NewDecoder()
case "iso-8859-8":
decoder = charmap.ISO8859_8.NewDecoder()
case "iso-8859-9":
decoder = charmap.ISO8859_9.NewDecoder()
case "koi8-r":
decoder = charmap.KOI8R.NewDecoder()
case "gb2312":
return decodeGB2312(input)
case "gbk":
return decodeGBK(input)
case "shift_jis":
return decodeShiftJIS(input)
case "euc-jp":
return decodeEUCJP(input)
default:
return "", fmt.Errorf("unsupported encoding: %s", encodingName)
}
// Декодируем входные байты
decodingReader := transform.NewReader(strings.NewReader(string(input)), decoder)
decodedBytes, err := ioutil.ReadAll(decodingReader)
if err != nil {
return "", err
}
return string(decodedBytes), nil
}
// Функции для декодирования специфических кодировок с использованием mahonia
func decodeGB2312(input []byte) (string, error) {
decoder := mahonia.NewDecoder("gb2312")
return decoder.ConvertString(string(input)), nil
}
func decodeGBK(input []byte) (string, error) {
decoder := mahonia.NewDecoder("gbk")
return decoder.ConvertString(string(input)), nil
}
func decodeShiftJIS(input []byte) (string, error) {
decoder := mahonia.NewDecoder("shift_jis")
return decoder.ConvertString(string(input)), nil
}
func decodeEUCJP(input []byte) (string, error) {
decoder := mahonia.NewDecoder("euc-jp")
return decoder.ConvertString(string(input)), nil
}