imap.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. package mail
  2. import (
  3. "errors"
  4. "eta/eta_email_analysis/global"
  5. "eta/eta_email_analysis/utils"
  6. "fmt"
  7. "github.com/emersion/go-imap"
  8. "github.com/emersion/go-imap/client"
  9. "github.com/emersion/go-message"
  10. "github.com/emersion/go-message/mail"
  11. "github.com/h2non/filetype"
  12. "io"
  13. "log"
  14. "os"
  15. "path"
  16. "strings"
  17. "time"
  18. )
  19. type MailMessage struct {
  20. Date time.Time `description:"收件时间"`
  21. Uid uint32 `description:"该邮件在邮箱中的唯一id"`
  22. FromAddress string `description:"发件人邮箱"`
  23. From string `description:"发件人名称"`
  24. Title string `description:"邮件标题"`
  25. Content string `description:"邮件主体正文"`
  26. Resources map[string]string `description:"正文内嵌资源"`
  27. Attachment map[string]string `description:"附件资源"`
  28. }
  29. func ListenMail(mailAddress, folder, userName, password string, readBatchSize, fromEmailIndex int, mailMessageChan chan MailMessage, mailMessageDoneChan chan bool) (err error) { // 收件箱
  30. defer func() {
  31. // 处理结束
  32. mailMessageDoneChan <- true
  33. if err != nil {
  34. fmt.Println("err:", err.Error())
  35. }
  36. }()
  37. // 建立与 IMAP 服务器的连接
  38. c, err := client.DialTLS(mailAddress, nil)
  39. if err != nil {
  40. fmt.Printf("连接 IMAP 服务器失败: %+v \n", err)
  41. return
  42. }
  43. // 最后一定不要忘记退出登录
  44. defer func() {
  45. _ = c.Logout()
  46. }()
  47. // 登录
  48. if err = c.Login(userName, password); err != nil {
  49. fmt.Printf("邮箱[%s] 登录失败: %v \n", fmt.Sprintf("%s:%s", userName, mailAddress), err)
  50. return
  51. }
  52. // 列出当前邮箱中的文件夹
  53. mailboxes := make(chan *imap.MailboxInfo, 10)
  54. done := make(chan error, 1) // 记录错误的 chan
  55. go func() {
  56. done <- c.List("", "*", mailboxes)
  57. }()
  58. log.Println("-->当前邮箱的文件夹 Mailboxes:")
  59. var folderExists bool
  60. for m := range mailboxes {
  61. log.Println("* ", m.Name)
  62. if m.Name == folder {
  63. folderExists = true
  64. }
  65. }
  66. err = <-done
  67. if err != nil {
  68. global.LOG.Errorf("列出邮箱列表时,出现错误:%v \n", err)
  69. return
  70. }
  71. log.Println("-->列出邮箱列表完毕!")
  72. if !folderExists {
  73. err = errors.New(fmt.Sprintf("文件夹[%s] 不存在 \n", folder))
  74. return
  75. }
  76. message.CharsetReader = myCharsetReader
  77. // 选择指定的文件夹
  78. mbox, err := c.Select(folder, false)
  79. if err != nil {
  80. err = errors.New(fmt.Sprintf("选择邮件箱失败: %+v", err))
  81. return
  82. }
  83. //log.Printf("mbox %+v \n", mbox)
  84. log.Printf("当前文件夹[%s]中,总共有 %d 封邮件 \n", folder, mbox.Messages)
  85. if mbox.Messages == 0 {
  86. //log.Fatalf("当前文件夹[%s]中没有邮件", folder)
  87. return
  88. }
  89. // 创建一个序列集,用于批量读取邮件
  90. seqSet := new(imap.SeqSet)
  91. to := mbox.Messages // 此文件下的邮件总数
  92. //minIndex := uint32(5)
  93. //// 假设需要获取最后4封邮件时
  94. //if fromEmailIndex > 0 {
  95. // minIndex = uint32(fromEmailIndex)
  96. //} else {
  97. // var maxNum uint32
  98. // //该次监听获取的最大数量
  99. // maxNum = 20000
  100. // //获取开始的邮件编号
  101. // if to > maxNum {
  102. // minIndex = to - maxNum + 1
  103. // }
  104. //}
  105. //from = 310
  106. var isStopFor bool
  107. step := uint32(1)
  108. for i := to; i >= 1; {
  109. start := i - step + 1
  110. if start < 0 {
  111. start = 1
  112. }
  113. //fmt.Printf("当前剩余%d封邮件待处理\n", i-minIndex+1)
  114. seqSet.Clear()
  115. seqSet.AddRange(start, i) // 添加指定范围内的邮件编号
  116. // 获取整个消息正文
  117. // imap.FetchEnvelope:请求获取邮件的信封数据(例如发件人、收件人、主题等元数据)。
  118. // imap.FetchRFC822:请求获取完整的邮件内容,包括所有头部和正文。
  119. items := []imap.FetchItem{imap.FetchFlags, imap.FetchEnvelope, imap.FetchRFC822}
  120. // 获取邮件内容 Start
  121. messages := make(chan *imap.Message, readBatchSize) // 创建一个通道,用于接收邮件消息
  122. fetchDone := make(chan error, 1) // 创建一个通道,用于接收错误消息
  123. go func() {
  124. // Fetch方法用于从服务器获取邮件数据,这里请求了邮件的信封和完整内容
  125. fetchDone <- c.Fetch(seqSet, items, messages)
  126. }()
  127. err = <-fetchDone
  128. if err != nil {
  129. global.LOG.Errorf("获取邮件信息出现错误:%v \n", err)
  130. return
  131. }
  132. // 获取邮件内容 End
  133. //log.Println("开始读取邮件内容")
  134. for msg := range messages {
  135. // 如果需要终止,那么就不处理了
  136. if isStopFor {
  137. continue
  138. }
  139. emailMessage, isRead, tmpErr := readEveryMsg(msg)
  140. if tmpErr != nil {
  141. // 移除本地文件
  142. {
  143. for _, v := range emailMessage.Attachment {
  144. os.Remove(v)
  145. }
  146. for _, v := range emailMessage.Resources {
  147. os.Remove(v)
  148. }
  149. }
  150. global.FILE_LOG.Errorf("读取邮件内容时出现错误:%v \n", tmpErr)
  151. continue
  152. }
  153. // 如果没有取到,那么就过滤
  154. if !isRead {
  155. continue
  156. }
  157. // 判断当前邮件id是否小于等于已经监听到的最小id,如果是,那么就不处理了
  158. if emailMessage.Uid <= uint32(fromEmailIndex) {
  159. isStopFor = true
  160. continue
  161. }
  162. // 如果取到了,那么写入待处理chan
  163. // 写入邮件处理chan
  164. mailMessageChan <- emailMessage
  165. }
  166. if isStopFor {
  167. // 已经找到了最小的邮件id,那么就退出循环了
  168. }
  169. //time.Sleep(time.Second * 5) // 休眠10秒
  170. i = i - step
  171. }
  172. log.Println("读取了所有邮件,完毕!")
  173. return
  174. }
  175. // document link: https://github.com/emersion/go-imap/wiki/Fetching-messages
  176. func readEveryMsg(msg *imap.Message) (emailMessage MailMessage, ok bool, err error) {
  177. ok = true
  178. defer func() {
  179. if err != nil {
  180. ok = false
  181. global.FILE_LOG.Errorf("邮件读取失败;Err:%s", err.Error())
  182. }
  183. }()
  184. message.CharsetReader = myCharsetReader
  185. emailMessage.Resources = make(map[string]string) // 内嵌资源
  186. emailMessage.Attachment = make(map[string]string) // 附件
  187. emailMessage.Uid = msg.Uid
  188. htmlStr := ``
  189. textStr := ``
  190. //log.Printf("当前邮件的消息序列号 %+v \n", msg.SeqNum)
  191. //log.Println("-------------------------")
  192. // 获取邮件正文
  193. r := msg.GetBody(&imap.BodySectionName{})
  194. if r == nil {
  195. global.FILE_LOG.Info("服务器没有返回消息内容")
  196. }
  197. mr, err := mail.CreateReader(r)
  198. if err != nil {
  199. //log.Fatalf("邮件读取时出现错误: %v \n", err)
  200. err = errors.New(fmt.Sprintf("邮件读取时出现错误:%v \n", err))
  201. return
  202. }
  203. // 收件时间
  204. {
  205. date, err := mr.Header.Date()
  206. if err != nil {
  207. log.Println("收件时间 异常:", err.Error())
  208. }
  209. emailMessage.Date = date
  210. //log.Println("收件时间 Date:", date)
  211. }
  212. // 发件人
  213. {
  214. fromStr := mr.Header.Get("From")
  215. //fmt.Println(fromStr)
  216. // 处理无效地址的情况
  217. if !strings.Contains(fromStr, "@") {
  218. emailMessage.FromAddress = fromStr
  219. emailMessage.From = fromStr
  220. } else {
  221. from, tmpErr := mr.Header.AddressList("From")
  222. if tmpErr != nil {
  223. log.Println("发件人 异常:", err.Error())
  224. }
  225. if len(from) > 0 {
  226. emailMessage.FromAddress = from[0].Address
  227. emailMessage.From = from[0].Name
  228. //mailMessage.From = from[0].String()
  229. //log.Println("发件人 From:", from)
  230. }
  231. }
  232. }
  233. //if to, err := mr.Header.AddressList("To"); err == nil {
  234. // log.Println("收件人 To:", to)
  235. //}
  236. //log.Printf("抄送 Cc: %+v \n", msg.Envelope.Cc)
  237. // 邮件标题
  238. subject, err := mr.Header.Subject()
  239. if err != nil {
  240. log.Println("邮件主题 Subject ERR:", err)
  241. } else {
  242. //log.Println("邮件主题 Subject:", subject)
  243. }
  244. emailMessage.Title = subject
  245. // 过滤
  246. if isIgnore(emailMessage) {
  247. ok = false
  248. return
  249. }
  250. //fmt.Println("当前邮件Uid:", emailMessage.Uid)
  251. //ok = false
  252. //return
  253. for {
  254. p, tmpErr := mr.NextPart()
  255. if tmpErr == io.EOF {
  256. break
  257. } else if tmpErr != nil {
  258. global.FILE_LOG.Errorf("读取邮件内容时出现错误:%v \n", tmpErr)
  259. err = tmpErr
  260. return
  261. }
  262. bodyBytes, _ := io.ReadAll(p.Body)
  263. if err != nil {
  264. //log.Fatalf("读取邮件部分时出现错误:%v \n", err)
  265. err = errors.New(fmt.Sprintf("读取邮件部分时出现错误:%v \n", err))
  266. return
  267. }
  268. switch h := p.Header.(type) {
  269. case *mail.InlineHeader:
  270. // 这是消息的文本(可以是纯文本或 HTML)
  271. contentType := h.Get("Content-Type")
  272. //log.Println("消息内容content-type:", contentType)
  273. if strings.HasPrefix(contentType, "text/plain") {
  274. //log.Printf("得到正文 -> TEXT: %v \n", string(bodyBytes))
  275. textStr += string(bodyBytes)
  276. } else if strings.HasPrefix(contentType, "text/html") {
  277. //log.Printf("得到正文 -> HTML: %v \n", len(b))
  278. //log.Printf("得到正文 -> HTML: %v \n", string(bodyBytes))
  279. htmlStr += string(bodyBytes)
  280. }
  281. // 这是内嵌资源
  282. if cid := p.Header.Get("Content-ID"); cid != "" {
  283. // 确定文件后缀
  284. fileSuffix := determineFileSuffix(bodyBytes)
  285. fileName := fmt.Sprintf("%s%s.%s", global.CONFIG.Serve.StaticDir, cid[1:len(cid)-1], fileSuffix)
  286. err = utils.SaveToFile(bodyBytes, fileName)
  287. if err != nil {
  288. //log.Fatalf("保存文件时出现错误:%v \n", err)
  289. err = errors.New(fmt.Sprintf("保存文件时出现错误:%v \n", err))
  290. return
  291. }
  292. emailMessage.Resources[cid] = fileName
  293. }
  294. break
  295. case *mail.AttachmentHeader:
  296. // 这是一个附件
  297. filename, _ := h.Filename()
  298. //log.Printf("得到附件: %v,content-type:%s \n", filename, p.Header.Get("Content-Type"))
  299. saveName := fmt.Sprint(msg.SeqNum, utils.MD5(filename), time.Now().Format(utils.FormatDateTimeUnSpace), time.Now().Nanosecond(), path.Ext(filename))
  300. filePath := fmt.Sprintf("%s%s%s%s", global.CONFIG.Serve.StaticDir, `file`, string(os.PathSeparator), saveName)
  301. err = utils.SaveToFile(bodyBytes, filePath)
  302. if err != nil {
  303. //log.Fatalf("保存文件时出现错误:%v \n", err)
  304. err = errors.New(fmt.Sprintf("保存文件时出现错误:%v \n", err))
  305. return
  306. }
  307. // 这是附件资源
  308. if contentDisposition := p.Header.Get("Content-Disposition"); contentDisposition != "" {
  309. if strings.HasPrefix(contentDisposition, "attachment") {
  310. emailMessage.Attachment[filename] = filePath
  311. }
  312. } else if cid := p.Header.Get("Content-ID"); cid != "" {
  313. // 这是内嵌资源
  314. emailMessage.Resources[cid] = filePath
  315. }
  316. //else {
  317. // mailMessage.Attachment[filename] = filePath
  318. //}
  319. break
  320. default:
  321. global.FILE_LOG.Info("未知格式:", h)
  322. //log.Println(h)
  323. }
  324. }
  325. emailMessage.Content = htmlStr
  326. if emailMessage.Content == `` {
  327. emailMessage.Content = textStr
  328. }
  329. //log.Println("一封邮件读取完毕")
  330. //log.Printf("------------------------- \n\n")
  331. return
  332. }
  333. // 根据文件内容确定文件后缀
  334. func determineFileSuffix(content []byte) string {
  335. kind, err := filetype.Match(content)
  336. if err != nil {
  337. global.FILE_LOG.Error("无法确定文件类型:%v \n", err)
  338. return ".bin"
  339. }
  340. return kind.Extension
  341. }
  342. // isIgnore
  343. // @Description: 校验是否忽略的邮件
  344. // @author: Roc
  345. // @datetime 2024-09-30 16:09:34
  346. // @param emailMessage MailMessage
  347. // @return bool
  348. func isIgnore(emailMessage MailMessage) bool {
  349. // 发件人中包含待过滤的字符串,那么就过滤
  350. lowerFrom := strings.ToLower(emailMessage.From)
  351. for _, email := range global.CONFIG.Email.IgnoreEmail {
  352. if utils.ContainsWholeWord(lowerFrom, email) {
  353. global.FILE_LOG.Infof("发件人包含%s,过滤掉,发件人:%s;标题:%s;所属下标:%d", email, emailMessage.From, emailMessage.Title, emailMessage.Uid)
  354. return true
  355. }
  356. }
  357. // 邮件标题中包含待过滤的字符串(大小写敏感的标题),那么就过滤
  358. for _, email := range global.CONFIG.Email.IgnoreEmailCaseSensitive {
  359. if utils.ContainsWholeWord(emailMessage.From, email) {
  360. global.FILE_LOG.Infof("发件人包含%s,过滤掉,发件人:%s;标题:%s;所属下标:%d", email, emailMessage.From, emailMessage.Title, emailMessage.Uid)
  361. return true
  362. }
  363. }
  364. // 发件人地址中包含待过滤的字符串,那么就过滤
  365. lowerFromAddress := strings.ToLower(emailMessage.FromAddress)
  366. for _, emailAddress := range global.CONFIG.Email.IgnoreEmailAddress {
  367. if utils.ContainsWholeWord(lowerFromAddress, emailAddress) {
  368. global.FILE_LOG.Infof("发件人邮箱包含%s,过滤掉,发件人邮箱地址:%s;标题:%s;所属下标:%d", emailAddress, emailMessage.FromAddress, emailMessage.Title, emailMessage.Uid)
  369. return true
  370. }
  371. }
  372. // 邮件地址中包含待过滤的字符串(大小写敏感的标题),那么就过滤
  373. for _, emailAddress := range global.CONFIG.Email.IgnoreEmailAddressCaseSensitive {
  374. if utils.ContainsWholeWord(emailMessage.FromAddress, emailAddress) {
  375. global.FILE_LOG.Infof("发件人邮箱包含%s,过滤掉,发件人邮箱地址:%s;标题:%s;所属下标:%d", emailAddress, emailMessage.FromAddress, emailMessage.Title, emailMessage.Uid)
  376. return true
  377. }
  378. }
  379. // 邮件标题中包含待过滤的字符串,那么就过滤
  380. lowerTitle := strings.ToLower(emailMessage.Title)
  381. for _, title := range global.CONFIG.Email.IgnoreEmailTitle {
  382. if utils.ContainsWholeWord(lowerTitle, title) {
  383. global.FILE_LOG.Infof("邮件标题包含%s,过滤掉,标题:%s,所属下标:%d", title, emailMessage.Title, emailMessage.Uid)
  384. return true
  385. }
  386. }
  387. // 邮件标题中包含待过滤的字符串(大小写敏感的标题),那么就过滤
  388. for _, title := range global.CONFIG.Email.IgnoreEmailTitleCaseSensitive {
  389. if utils.ContainsWholeWord(emailMessage.Title, title) {
  390. global.FILE_LOG.Infof("邮件标题包含%s,过滤掉,标题:%s,所属下标:%d", title, emailMessage.Title, emailMessage.Uid)
  391. return true
  392. }
  393. }
  394. return false
  395. }