charset_reader.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. package mail
  2. import (
  3. "fmt"
  4. "golang.org/x/text/encoding/charmap"
  5. "golang.org/x/text/encoding/japanese"
  6. "golang.org/x/text/encoding/korean"
  7. "golang.org/x/text/encoding/simplifiedchinese"
  8. "golang.org/x/text/encoding/traditionalchinese"
  9. "golang.org/x/text/transform"
  10. "io"
  11. "strings"
  12. )
  13. var charsetMap = map[string]transform.Transformer{
  14. "gb2312": simplifiedchinese.GBK.NewDecoder(),
  15. "gbk": simplifiedchinese.GBK.NewDecoder(),
  16. "ibm037": charmap.CodePage037.NewDecoder(),
  17. "ibm437": charmap.CodePage437.NewDecoder(),
  18. "ibm850": charmap.CodePage850.NewDecoder(),
  19. "ibm852": charmap.CodePage852.NewDecoder(),
  20. "ibm855": charmap.CodePage855.NewDecoder(),
  21. "ibm858": charmap.CodePage858.NewDecoder(),
  22. "ibm860": charmap.CodePage860.NewDecoder(),
  23. "ibm862": charmap.CodePage862.NewDecoder(),
  24. "ibm863": charmap.CodePage863.NewDecoder(),
  25. "ibm865": charmap.CodePage865.NewDecoder(),
  26. "ibm866": charmap.CodePage866.NewDecoder(),
  27. "ibm1047": charmap.CodePage1047.NewDecoder(),
  28. "ibm1140": charmap.CodePage1140.NewDecoder(),
  29. "iso-8859-1": charmap.ISO8859_1.NewDecoder(),
  30. "iso-8859-2": charmap.ISO8859_2.NewDecoder(),
  31. "iso-8859-3": charmap.ISO8859_3.NewDecoder(),
  32. "iso-8859-4": charmap.ISO8859_4.NewDecoder(),
  33. "iso-8859-5": charmap.ISO8859_5.NewDecoder(),
  34. "iso-8859-6": charmap.ISO8859_6.NewDecoder(),
  35. "iso-8859-7": charmap.ISO8859_7.NewDecoder(),
  36. "iso-8859-8": charmap.ISO8859_8.NewDecoder(),
  37. "iso-8859-9": charmap.ISO8859_9.NewDecoder(),
  38. "iso-8859-10": charmap.ISO8859_10.NewDecoder(),
  39. "iso-8859-13": charmap.ISO8859_13.NewDecoder(),
  40. "iso-8859-14": charmap.ISO8859_14.NewDecoder(),
  41. "iso-8859-15": charmap.ISO8859_15.NewDecoder(),
  42. "iso-8859-16": charmap.ISO8859_16.NewDecoder(),
  43. "koi8-r": charmap.KOI8R.NewDecoder(),
  44. "koi8-u": charmap.KOI8U.NewDecoder(),
  45. "macintosh": charmap.Macintosh.NewDecoder(),
  46. "x-mac-cyrillic": charmap.MacintoshCyrillic.NewDecoder(),
  47. "windows-874": charmap.Windows874.NewDecoder(),
  48. "windows-1250": charmap.Windows1250.NewDecoder(),
  49. "windows-1251": charmap.Windows1251.NewDecoder(),
  50. "windows-1252": charmap.Windows1252.NewDecoder(),
  51. "windows-1253": charmap.Windows1253.NewDecoder(),
  52. "windows-1254": charmap.Windows1254.NewDecoder(),
  53. "windows-1255": charmap.Windows1255.NewDecoder(),
  54. "windows-1257": charmap.Windows1257.NewDecoder(),
  55. "windows-1258": charmap.Windows1258.NewDecoder(),
  56. "x-user-defined": charmap.XUserDefined.NewDecoder(),
  57. "euc-jp": japanese.EUCJP.NewDecoder(),
  58. "iso-2022-jp": japanese.ISO2022JP.NewDecoder(),
  59. "shift_jis": japanese.ShiftJIS.NewDecoder(),
  60. "ks_c_5601-1987": korean.EUCKR.NewDecoder(),
  61. "euc-kr": korean.EUCKR.NewDecoder(),
  62. "big5": traditionalchinese.Big5.NewDecoder(),
  63. }
  64. // 定义一个自定义的 CharsetReader 函数,它能够处理 gb2312 和 gbk 字符集
  65. func myCharsetReader(charset string, input io.Reader) (io.Reader, error) {
  66. charset = strings.ToLower(charset)
  67. newDecoder, ok := charsetMap[charset]
  68. if ok {
  69. reader := transform.NewReader(input, newDecoder)
  70. return reader, nil
  71. }
  72. if charset == `utf-8` {
  73. return input, nil
  74. }
  75. switch strings.ToLower(charset) {
  76. case "gb2312", "gbk":
  77. reader := transform.NewReader(input, simplifiedchinese.GBK.NewDecoder())
  78. return reader, nil
  79. case "utf-8":
  80. return input, nil
  81. case "iso-8859-1":
  82. reader := transform.NewReader(input, charmap.ISO8859_1.NewDecoder())
  83. return reader, nil
  84. case "windows-1252":
  85. reader := transform.NewReader(input, charmap.Windows1252.NewDecoder())
  86. return reader, nil
  87. default:
  88. }
  89. return input, fmt.Errorf("unsupported charset: %s", charset)
  90. }