incredible-go-core-text-enc.../pkg/stringencoding/standard_string_encoder.go

103 lines
2.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package stringencoding
import (
"fmt"
"io/ioutil"
"strings"
"github.com/axgle/mahonia"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/encoding/unicode/utf32"
"golang.org/x/text/transform"
)
func DecodeString(encodingName string, input []byte) (string, error) {
if encodingName == "" {
return "", fmt.Errorf("empty encoding string")
}
if input == nil || len(input) == 0 {
return "", fmt.Errorf("empty or null input")
}
var decoder *encoding.Decoder
// Определяем декодер в зависимости от кодировки
switch strings.ToLower(encodingName) {
case "utf-8":
decoder = unicode.UTF8.NewDecoder()
case "utf-16":
decoder = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()
case "utf-16be":
decoder = unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewDecoder()
case "utf-32":
decoder = utf32.UTF32(utf32.LittleEndian, utf32.UseBOM).NewDecoder()
case "utf-32be":
decoder = utf32.UTF32(utf32.BigEndian, utf32.UseBOM).NewDecoder()
case "windows-1252":
decoder = charmap.Windows1252.NewDecoder()
case "iso-8859-1":
decoder = charmap.ISO8859_1.NewDecoder()
case "iso-8859-2":
decoder = charmap.ISO8859_2.NewDecoder()
case "iso-8859-3":
decoder = charmap.ISO8859_3.NewDecoder()
case "iso-8859-4":
decoder = charmap.ISO8859_4.NewDecoder()
case "iso-8859-5":
decoder = charmap.ISO8859_5.NewDecoder()
case "iso-8859-6":
decoder = charmap.ISO8859_6.NewDecoder()
case "iso-8859-7":
decoder = charmap.ISO8859_7.NewDecoder()
case "iso-8859-8":
decoder = charmap.ISO8859_8.NewDecoder()
case "iso-8859-9":
decoder = charmap.ISO8859_9.NewDecoder()
case "koi8-r":
decoder = charmap.KOI8R.NewDecoder()
case "gb2312":
return decodeGB2312(input)
case "gbk":
return decodeGBK(input)
case "shift_jis":
return decodeShiftJIS(input)
case "euc-jp":
return decodeEUCJP(input)
default:
return "", fmt.Errorf("unsupported encoding: %s", encodingName)
}
// Декодируем входные байты
decodingReader := transform.NewReader(strings.NewReader(string(input)), decoder)
decodedBytes, err := ioutil.ReadAll(decodingReader)
if err != nil {
return "", err
}
return string(decodedBytes), nil
}
// Функции для декодирования специфических кодировок с использованием mahonia
func decodeGB2312(input []byte) (string, error) {
decoder := mahonia.NewDecoder("gb2312")
return decoder.ConvertString(string(input)), nil
}
func decodeGBK(input []byte) (string, error) {
decoder := mahonia.NewDecoder("gbk")
return decoder.ConvertString(string(input)), nil
}
func decodeShiftJIS(input []byte) (string, error) {
decoder := mahonia.NewDecoder("shift_jis")
return decoder.ConvertString(string(input)), nil
}
func decodeEUCJP(input []byte) (string, error) {
decoder := mahonia.NewDecoder("euc-jp")
return decoder.ConvertString(string(input)), nil
}