package services import ( "bytes" "eta/eta_api/cache" "eta/eta_api/models" "eta/eta_api/models/rag" "eta/eta_api/services/llm" "eta/eta_api/utils" "eta/eta_api/utils/llm/eta_llm/eta_llm_http" "fmt" html2 "golang.org/x/net/html" "html" "os" "path" "strconv" "strings" "time" ) // AddWechatPlatform // @Description: 添加新的公众号 // @param item func AddWechatPlatform(item *rag.WechatPlatform) { var err error defer func() { if err != nil { utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err) } }() if item.FakeId != `` { return } if item.ArticleLink == `` { return } articleLink := item.ArticleLink articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink) if err != nil { return } if articleDetail.Appuin == `` { err = fmt.Errorf("文章内未匹配到公众号唯一标识") return } wechatPlatform := new(rag.WechatPlatform) // 查找是否存在这个公众号id的 wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin) if tmpErr != nil && !utils.IsErrNoRow(tmpErr) { err = tmpErr return } if tmpErr == nil { // 如果找到了,那么需要将当前的给移除掉 err = item.Del() if err != nil { return } // 并将查出来的微信公众号摘出来的数据重新赋值 item = wechatPlatformInfo } else if utils.IsErrNoRow(tmpErr) { // 如果没找到,那么就变更当前的信息 item.FakeId = articleDetail.Appuin item.Nickname = articleDetail.Nickname //item.Alias = req.Alias item.RoundHeadImg = articleDetail.RoundHeadImg //item.ServiceType = req.ServiceType item.Signature = articleDetail.ProfileSignature //item.Verified = verified item.ModifyTime = time.Now() err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime}) if err != nil { return } // 修改公众号头像 go replaceWechatPlatformPic(item) } // 把刚搜索的文章加入到文章库中 AddWechatArticle(item, articleLink, articleDetail, nil) BeachAddWechatArticle(item, 10) fmt.Println("公众号入库完成") return } // AddWechatArticle // @Description: 添加公众号文章入库 // @author: Roc // @datetime 2025-03-05 13:24:14 // @param item *rag.WechatPlatform // @param link string // @param articleDetail WechatArticleDataResp func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) { var err error defer func() { if err != nil { utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err) } }() obj := new(rag.WechatArticle) _, err = obj.GetByLink(articleLink) if err == nil { // 文章已经入库了,不需要重复入库 return } // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回 if !utils.IsErrNoRow(err) { return } // 这个时候,说明数据库中没有这个文章,那么需要文章入库 err = nil var publishAt time.Time if articleDetail.CreateAt != `` { createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt) if tmpErr == nil { publishAt = time.Unix(int64(createAtInt), 1000) } } else if articleMenu != nil { publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000) } content := articleDetail.HtmlContent // 图片下载下来到本地,如果成功了,那么就用新的 tmpContent, err := ReplaceHtmlImg(content) if tmpContent != `` { content = tmpContent } obj = &rag.WechatArticle{ WechatArticleId: 0, WechatPlatformId: item.WechatPlatformId, FakeId: item.FakeId, Title: articleDetail.Title, Link: articleLink, CoverUrl: articleDetail.CoverUrl, Description: articleDetail.Desc, Content: html.EscapeString(content), TextContent: articleDetail.TextContent, Country: articleDetail.CountryName, Province: articleDetail.ProvinceName, City: articleDetail.CityName, //Abstract: "", //ArticleCreateTime: createAt, ModifyTime: time.Now(), CreateTime: time.Now(), } if !publishAt.IsZero() { obj.ArticleCreateTime = publishAt } if articleMenu != nil { obj.Title = articleMenu.Title //obj.Link = articleMenu.Link obj.CoverUrl = articleMenu.Cover obj.Description = articleMenu.Digest } err = obj.Create() // 修改文章封面图 go replaceWechatArticleCoverPic(obj) // 文章入库成功后,需要将相关信息入摘要库 go cache.AddWechatArticleLlmOpToCache(item.WechatPlatformId, ``) } // BeachAddWechatArticle // @Description: 批量添加公众号文章 // @param item // @param num // @return err func BeachAddWechatArticle(item *rag.WechatPlatform, num int) { var err error defer func() { //fmt.Println("公众号文章批量入库完成") if err != nil { utils.FileLog.Error("公众号文章批量入库失败,err:%v", err) fmt.Println("公众号文章批量入库失败,err:", err) } }() if item.FakeId == `` { return } wechatArticleObj := new(rag.WechatArticle) // 获取公众号的文章列表 articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num) if err != nil { return } for _, articleMenu := range articleListResp.List { // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了 _, err = wechatArticleObj.GetByLink(articleMenu.Link) if err == nil { // 文章已经入库了,不需要重复入库 continue } if !utils.IsErrNoRow(err) { return } err = nil articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link) if tmpErr != nil { err = tmpErr return } // 把刚搜索的文章加入到指标库 AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu) time.Sleep(10 * time.Second) } return } // GenerateArticleAbstract // @Description: 文章摘要生成 // @author: Roc // @datetime 2025-03-10 16:17:53 // @param item *rag.WechatArticle func GenerateArticleAbstract(item *rag.WechatArticle) { var err error defer func() { if err != nil { utils.FileLog.Error("文章转临时文件失败,err:%v", err) fmt.Println("文章转临时文件失败,err:", err) } }() abstractObj := rag.WechatArticleAbstract{} _, err = abstractObj.GetByWechatArticleId(item.WechatArticleId) if err == nil { // 摘要已经生成,不需要重复生成 return } if !utils.IsErrNoRow(err) { return } // 生成临时文件 dateDir := time.Now().Format("20060102") uploadDir := utils.STATIC_DIR + "ai/" + dateDir err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } randStr := utils.GetRandStringNoSpecialChar(28) fileName := randStr + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(item.TextContent, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() // 上传临时文件到LLM tmpFileResp, err := llm.UploadTempDocs(tmpFilePath) if err != nil { err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error()) return } if tmpFileResp.Data.Id == `` { err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败") return } tmpDocId := tmpFileResp.Data.Id //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材 //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材 //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` // historyList := make([]eta_llm_http.HistoryContent, 0) questionObj := rag.Question{} questionList, err := questionObj.GetListByCondition(``, []interface{}{}, 0, 100) if err != nil { err = fmt.Errorf("获取问题列表失败,Err:" + err.Error()) return } addArticleChatRecordList := make([]*rag.WechatArticleChatRecord, 0) var abstract string //开始对话 for _, question := range questionList { originalAnswer, tmpAnswer, tmpErr := getAnswerByContent(tmpDocId, question.QuestionContent, historyList) if tmpErr != nil { err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error()) return } abstract = tmpAnswer historyList = append(historyList, eta_llm_http.HistoryContent{ Role: `user`, Content: question.QuestionContent, }, eta_llm_http.HistoryContent{ Role: `assistant`, Content: tmpAnswer, }) // 待入库的数据 addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{ WechatArticleChatRecordId: 0, WechatArticleId: item.WechatArticleId, ChatUserType: "user", Content: question.QuestionContent, SendTime: time.Now(), CreatedTime: time.Now(), UpdateTime: time.Now(), }, &rag.WechatArticleChatRecord{ WechatArticleChatRecordId: 0, WechatArticleId: item.WechatArticleId, ChatUserType: "assistant", Content: originalAnswer, SendTime: time.Now(), CreatedTime: time.Now(), UpdateTime: time.Now(), }) } // 添加问答记录 if len(addArticleChatRecordList) > 0 { recordObj := rag.WechatArticleChatRecord{} err = recordObj.CreateInBatches(addArticleChatRecordList) if err != nil { return } } if abstract != `` { abstractItem := &rag.WechatArticleAbstract{ WechatArticleAbstractId: 0, WechatArticleId: item.WechatArticleId, Content: abstract, Version: 0, VectorKey: "", ModifyTime: time.Now(), CreateTime: time.Now(), } err = abstractItem.Create() if err != nil { return } AbstractToKnowledge(item, abstractItem) } } func getAnswerByContent(docId, question string, historyList []eta_llm_http.HistoryContent) (originalAnswer, answer string, err error) { originalAnswer, result, err := llm.ChatByFile(docId, question, historyList) fmt.Println(result) if err != nil { err = fmt.Errorf("LLM对话失败,Err:" + err.Error()) return } // 提取 后面的内容 thinkEndIndex := strings.Index(result.Answer, "") if thinkEndIndex != -1 { answer = strings.TrimSpace(result.Answer[thinkEndIndex+len(""):]) } else { answer = result.Answer } answer = strings.TrimSpace(answer) return } // ArticleToKnowledge // @Description: 原文入向量库 // @author: Roc // @datetime 2025-03-10 16:13:16 // @param item *rag.WechatArticle func ArticleToKnowledge(item *rag.WechatArticle) { if item.TextContent == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err) fmt.Println("上传文章原文到知识库失败,err:", err) } }() // 生成临时文件 //dateDir := time.Now().Format("20060102") //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir uploadDir := utils.STATIC_DIR + "ai/article" err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } fileName := utils.RemoveSpecialChars(item.Title) + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(item.TextContent, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName] // 上传临时文件到LLM uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName) if err != nil { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error()) return } if len(uploadFileResp.FailedFiles) > 0 { for _, v := range uploadFileResp.FailedFiles { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v) } } item.VectorKey = tmpFilePath item.ModifyTime = time.Now() err = item.Update([]string{"vector_key", "modify_time"}) } // AbstractToKnowledge // @Description: 摘要入向量库 // @author: Roc // @datetime 2025-03-10 16:14:59 // @param wechatArticleItem *rag.WechatArticle // @param item *rag.WechatArticleAbstract func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, item *rag.WechatArticleAbstract) { if item.Content == `` { return } // 已经生成了,那就不处理了 if item.VectorKey != `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("摘要入向量库失败,err:%v", err) fmt.Println("摘要入向量库失败,err:", err) } }() // 生成临时文件 //dateDir := time.Now().Format("20060102") //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir uploadDir := utils.STATIC_DIR + "ai/abstract" err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } fileName := utils.RemoveSpecialChars(wechatArticleItem.Title) + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(item.Content, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName] // 上传临时文件到LLM uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName) if err != nil { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error()) return } if len(uploadFileResp.FailedFiles) > 0 { for _, v := range uploadFileResp.FailedFiles { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v) } } item.VectorKey = tmpFilePath item.ModifyTime = time.Now() err = item.Update([]string{"vector_key", "modify_time"}) } // replaceWechatPlatformPic // @Description: 替换公众号头像 // @author: Roc // @datetime 2025-03-11 09:38:24 // @param item *rag.WechatPlatform func replaceWechatPlatformPic(item *rag.WechatPlatform) { var err error defer func() { if err != nil { utils.FileLog.Error("替换公众号头像失败,err:%v", err) fmt.Println("替换公众号头像失败,err:", err) } }() if item.RoundHeadImg == `` { return } resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`) if err != nil { return } item.RoundHeadImg = resourceUrl err = item.Update([]string{"round_head_img"}) } // replaceWechatArticleCoverPic // @Description: 替换文章封面图 // @author: Roc // @datetime 2025-03-11 09:38:35 // @param item *rag.WechatArticle func replaceWechatArticleCoverPic(item *rag.WechatArticle) { var err error defer func() { if err != nil { utils.FileLog.Error("替换公众号头像失败,err:%v", err) fmt.Println("替换公众号头像失败,err:", err) } }() if item.CoverUrl == `` { return } resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`) if err != nil { return } item.CoverUrl = resourceUrl err = item.Update([]string{"cover_url"}) } // replaceWechatArticlePic // @Description: 替换文章内容图 // @author: Roc // @datetime 2025-03-11 09:38:35 // @param item *rag.WechatArticle func ReplaceWechatArticlePic(item *rag.WechatArticle) { var err error defer func() { if err != nil { utils.FileLog.Error("替换公众号头像失败,err:%v", err) fmt.Println("替换公众号头像失败,err:", err) } }() if item.Content == `` { return } content, err := ReplaceHtmlImg(html.UnescapeString(item.Content)) if err != nil { return } item.Content = html.EscapeString(content) err = item.Update([]string{"content"}) return } // downloadWxPicAndUploadToOss // @Description: 下载微信图片并上传到OSS // @author: Roc // @datetime 2025-03-11 09:28:49 // @param wxPicUrl string // @return resourceUrl string // @return err error func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) { localFilePath, err := utils.DownloadWxImage(wxPicUrl) if err != nil { return } defer func() { os.Remove(localFilePath) }() ossClient := NewOssClient() if ossClient == nil { err = fmt.Errorf(`初始化OSS服务失败`) return } ext := path.Ext(localFilePath) fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext) //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName) resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath) if err != nil { err = fmt.Errorf("文件上传失败,Err:" + err.Error()) return } return } // ReplaceHtmlImg // @Description: 将html中的图片替换成自己的 // @author: Roc // @datetime 2025-03-11 14:32:00 // @param htmlStr string // @return newHtml string // @return err error func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) { doc, err := html2.Parse(strings.NewReader(htmlStr)) if err != nil { return } if err != nil { return } handleNode(doc) // 将处理后的HTML节点重新渲染为HTML字符串 var buf bytes.Buffer if err = html2.Render(&buf, doc); err != nil { fmt.Println(err) return } newHtml = buf.String() return } // handleNode // @Description: html节点处理 // @author: Roc // @datetime 2025-03-11 14:32:45 // @param n *html2.Node func handleNode(n *html2.Node) { if n.Type == html2.ElementNode { if n.Data == "img" { for k, attr := range n.Attr { // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src if n.Data == "img" && attr.Key == "src" { resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`) if tmpErr != nil { continue } attr.Val = resourceUrl } n.Attr[k] = attr } } } for c := n.FirstChild; c != nil; c = c.NextSibling { handleNode(c) } }