commit 02d0a1162b03ffa7511c46a31d6f26ba513f07f9 Author: amorozov Date: Mon Sep 16 15:32:22 2024 +0300 initial commit diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6c3e415 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module git.tswf.io/incredible-go/incredible-go-core-text-encoding + +go 1.20 + +require ( + github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 + golang.org/x/text v0.18.0 +) diff --git a/pkg/stringencoding/standard_string_encoder.go b/pkg/stringencoding/standard_string_encoder.go new file mode 100644 index 0000000..ee459a6 --- /dev/null +++ b/pkg/stringencoding/standard_string_encoder.go @@ -0,0 +1,102 @@ +package stringencoding + +import ( + "fmt" + "io/ioutil" + "strings" + + "github.com/axgle/mahonia" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/encoding/unicode/utf32" + "golang.org/x/text/transform" +) + +func DecodeString(encodingName string, input []byte) (string, error) { + if encodingName == "" { + return "", fmt.Errorf("empty encoding string") + } + + if input == nil || len(input) == 0 { + return "", fmt.Errorf("empty or null input") + } + + var decoder *encoding.Decoder + + // Определяем декодер в зависимости от кодировки + switch strings.ToLower(encodingName) { + case "utf-8": + decoder = unicode.UTF8.NewDecoder() + case "utf-16": + decoder = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder() + case "utf-16be": + decoder = unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewDecoder() + case "utf-32": + decoder = utf32.UTF32(utf32.LittleEndian, utf32.UseBOM).NewDecoder() + case "utf-32be": + decoder = utf32.UTF32(utf32.BigEndian, utf32.UseBOM).NewDecoder() + case "windows-1252": + decoder = charmap.Windows1252.NewDecoder() + case "iso-8859-1": + decoder = charmap.ISO8859_1.NewDecoder() + case "iso-8859-2": + decoder = charmap.ISO8859_2.NewDecoder() + case "iso-8859-3": + decoder = charmap.ISO8859_3.NewDecoder() + case "iso-8859-4": + decoder = charmap.ISO8859_4.NewDecoder() + case "iso-8859-5": + decoder = charmap.ISO8859_5.NewDecoder() + case "iso-8859-6": + decoder = charmap.ISO8859_6.NewDecoder() + case "iso-8859-7": + decoder = charmap.ISO8859_7.NewDecoder() + case "iso-8859-8": + decoder = charmap.ISO8859_8.NewDecoder() + case "iso-8859-9": + decoder = charmap.ISO8859_9.NewDecoder() + case "koi8-r": + decoder = charmap.KOI8R.NewDecoder() + case "gb2312": + return decodeGB2312(input) + case "gbk": + return decodeGBK(input) + case "shift_jis": + return decodeShiftJIS(input) + case "euc-jp": + return decodeEUCJP(input) + default: + return "", fmt.Errorf("unsupported encoding: %s", encodingName) + } + + // Декодируем входные байты + decodingReader := transform.NewReader(strings.NewReader(string(input)), decoder) + decodedBytes, err := ioutil.ReadAll(decodingReader) + if err != nil { + return "", err + } + + return string(decodedBytes), nil +} + +// Функции для декодирования специфических кодировок с использованием mahonia +func decodeGB2312(input []byte) (string, error) { + decoder := mahonia.NewDecoder("gb2312") + return decoder.ConvertString(string(input)), nil +} + +func decodeGBK(input []byte) (string, error) { + decoder := mahonia.NewDecoder("gbk") + return decoder.ConvertString(string(input)), nil +} + +func decodeShiftJIS(input []byte) (string, error) { + decoder := mahonia.NewDecoder("shift_jis") + return decoder.ConvertString(string(input)), nil +} + +func decodeEUCJP(input []byte) (string, error) { + decoder := mahonia.NewDecoder("euc-jp") + return decoder.ConvertString(string(input)), nil +}