103 lines
2.9 KiB
Go
103 lines
2.9 KiB
Go
package stringencoding
|
||
|
||
import (
|
||
"fmt"
|
||
"io/ioutil"
|
||
"strings"
|
||
|
||
"github.com/axgle/mahonia"
|
||
"golang.org/x/text/encoding"
|
||
"golang.org/x/text/encoding/charmap"
|
||
"golang.org/x/text/encoding/unicode"
|
||
"golang.org/x/text/encoding/unicode/utf32"
|
||
"golang.org/x/text/transform"
|
||
)
|
||
|
||
func DecodeString(encodingName string, input []byte) (string, error) {
|
||
if encodingName == "" {
|
||
return "", fmt.Errorf("empty encoding string")
|
||
}
|
||
|
||
if input == nil || len(input) == 0 {
|
||
return "", fmt.Errorf("empty or null input")
|
||
}
|
||
|
||
var decoder *encoding.Decoder
|
||
|
||
// Определяем декодер в зависимости от кодировки
|
||
switch strings.ToLower(encodingName) {
|
||
case "utf-8":
|
||
decoder = unicode.UTF8.NewDecoder()
|
||
case "utf-16":
|
||
decoder = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()
|
||
case "utf-16be":
|
||
decoder = unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewDecoder()
|
||
case "utf-32":
|
||
decoder = utf32.UTF32(utf32.LittleEndian, utf32.UseBOM).NewDecoder()
|
||
case "utf-32be":
|
||
decoder = utf32.UTF32(utf32.BigEndian, utf32.UseBOM).NewDecoder()
|
||
case "windows-1252":
|
||
decoder = charmap.Windows1252.NewDecoder()
|
||
case "iso-8859-1":
|
||
decoder = charmap.ISO8859_1.NewDecoder()
|
||
case "iso-8859-2":
|
||
decoder = charmap.ISO8859_2.NewDecoder()
|
||
case "iso-8859-3":
|
||
decoder = charmap.ISO8859_3.NewDecoder()
|
||
case "iso-8859-4":
|
||
decoder = charmap.ISO8859_4.NewDecoder()
|
||
case "iso-8859-5":
|
||
decoder = charmap.ISO8859_5.NewDecoder()
|
||
case "iso-8859-6":
|
||
decoder = charmap.ISO8859_6.NewDecoder()
|
||
case "iso-8859-7":
|
||
decoder = charmap.ISO8859_7.NewDecoder()
|
||
case "iso-8859-8":
|
||
decoder = charmap.ISO8859_8.NewDecoder()
|
||
case "iso-8859-9":
|
||
decoder = charmap.ISO8859_9.NewDecoder()
|
||
case "koi8-r":
|
||
decoder = charmap.KOI8R.NewDecoder()
|
||
case "gb2312":
|
||
return decodeGB2312(input)
|
||
case "gbk":
|
||
return decodeGBK(input)
|
||
case "shift_jis":
|
||
return decodeShiftJIS(input)
|
||
case "euc-jp":
|
||
return decodeEUCJP(input)
|
||
default:
|
||
return "", fmt.Errorf("unsupported encoding: %s", encodingName)
|
||
}
|
||
|
||
// Декодируем входные байты
|
||
decodingReader := transform.NewReader(strings.NewReader(string(input)), decoder)
|
||
decodedBytes, err := ioutil.ReadAll(decodingReader)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
return string(decodedBytes), nil
|
||
}
|
||
|
||
// Функции для декодирования специфических кодировок с использованием mahonia
|
||
func decodeGB2312(input []byte) (string, error) {
|
||
decoder := mahonia.NewDecoder("gb2312")
|
||
return decoder.ConvertString(string(input)), nil
|
||
}
|
||
|
||
func decodeGBK(input []byte) (string, error) {
|
||
decoder := mahonia.NewDecoder("gbk")
|
||
return decoder.ConvertString(string(input)), nil
|
||
}
|
||
|
||
func decodeShiftJIS(input []byte) (string, error) {
|
||
decoder := mahonia.NewDecoder("shift_jis")
|
||
return decoder.ConvertString(string(input)), nil
|
||
}
|
||
|
||
func decodeEUCJP(input []byte) (string, error) {
|
||
decoder := mahonia.NewDecoder("euc-jp")
|
||
return decoder.ConvertString(string(input)), nil
|
||
}
|