initial commit
commit
02d0a1162b
|
@ -0,0 +1,8 @@
|
|||
module git.tswf.io/incredible-go/incredible-go-core-text-encoding
|
||||
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394
|
||||
golang.org/x/text v0.18.0
|
||||
)
|
|
@ -0,0 +1,102 @@
|
|||
package stringencoding
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
|
||||
"github.com/axgle/mahonia"
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/encoding/unicode/utf32"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func DecodeString(encodingName string, input []byte) (string, error) {
|
||||
if encodingName == "" {
|
||||
return "", fmt.Errorf("empty encoding string")
|
||||
}
|
||||
|
||||
if input == nil || len(input) == 0 {
|
||||
return "", fmt.Errorf("empty or null input")
|
||||
}
|
||||
|
||||
var decoder *encoding.Decoder
|
||||
|
||||
// Определяем декодер в зависимости от кодировки
|
||||
switch strings.ToLower(encodingName) {
|
||||
case "utf-8":
|
||||
decoder = unicode.UTF8.NewDecoder()
|
||||
case "utf-16":
|
||||
decoder = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()
|
||||
case "utf-16be":
|
||||
decoder = unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewDecoder()
|
||||
case "utf-32":
|
||||
decoder = utf32.UTF32(utf32.LittleEndian, utf32.UseBOM).NewDecoder()
|
||||
case "utf-32be":
|
||||
decoder = utf32.UTF32(utf32.BigEndian, utf32.UseBOM).NewDecoder()
|
||||
case "windows-1252":
|
||||
decoder = charmap.Windows1252.NewDecoder()
|
||||
case "iso-8859-1":
|
||||
decoder = charmap.ISO8859_1.NewDecoder()
|
||||
case "iso-8859-2":
|
||||
decoder = charmap.ISO8859_2.NewDecoder()
|
||||
case "iso-8859-3":
|
||||
decoder = charmap.ISO8859_3.NewDecoder()
|
||||
case "iso-8859-4":
|
||||
decoder = charmap.ISO8859_4.NewDecoder()
|
||||
case "iso-8859-5":
|
||||
decoder = charmap.ISO8859_5.NewDecoder()
|
||||
case "iso-8859-6":
|
||||
decoder = charmap.ISO8859_6.NewDecoder()
|
||||
case "iso-8859-7":
|
||||
decoder = charmap.ISO8859_7.NewDecoder()
|
||||
case "iso-8859-8":
|
||||
decoder = charmap.ISO8859_8.NewDecoder()
|
||||
case "iso-8859-9":
|
||||
decoder = charmap.ISO8859_9.NewDecoder()
|
||||
case "koi8-r":
|
||||
decoder = charmap.KOI8R.NewDecoder()
|
||||
case "gb2312":
|
||||
return decodeGB2312(input)
|
||||
case "gbk":
|
||||
return decodeGBK(input)
|
||||
case "shift_jis":
|
||||
return decodeShiftJIS(input)
|
||||
case "euc-jp":
|
||||
return decodeEUCJP(input)
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported encoding: %s", encodingName)
|
||||
}
|
||||
|
||||
// Декодируем входные байты
|
||||
decodingReader := transform.NewReader(strings.NewReader(string(input)), decoder)
|
||||
decodedBytes, err := ioutil.ReadAll(decodingReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(decodedBytes), nil
|
||||
}
|
||||
|
||||
// Функции для декодирования специфических кодировок с использованием mahonia
|
||||
func decodeGB2312(input []byte) (string, error) {
|
||||
decoder := mahonia.NewDecoder("gb2312")
|
||||
return decoder.ConvertString(string(input)), nil
|
||||
}
|
||||
|
||||
func decodeGBK(input []byte) (string, error) {
|
||||
decoder := mahonia.NewDecoder("gbk")
|
||||
return decoder.ConvertString(string(input)), nil
|
||||
}
|
||||
|
||||
func decodeShiftJIS(input []byte) (string, error) {
|
||||
decoder := mahonia.NewDecoder("shift_jis")
|
||||
return decoder.ConvertString(string(input)), nil
|
||||
}
|
||||
|
||||
func decodeEUCJP(input []byte) (string, error) {
|
||||
decoder := mahonia.NewDecoder("euc-jp")
|
||||
return decoder.ConvertString(string(input)), nil
|
||||
}
|
Loading…
Reference in New Issue