package services import ( "bytes" "encoding/json" "eta/eta_api/cache" "eta/eta_api/models" "eta/eta_api/models/rag" "eta/eta_api/services/elastic" "eta/eta_api/services/llm" "eta/eta_api/services/llm/facade" "eta/eta_api/utils" "eta/eta_api/utils/llm/eta_llm/eta_llm_http" "fmt" html2 "golang.org/x/net/html" "html" "os" "path" "regexp" "strconv" "strings" "time" ) // AddWechatPlatform // @Description: 添加新的公众号 // @param item func AddWechatPlatform(item *rag.WechatPlatform) { var err error defer func() { if err != nil { utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err) } }() if item.FakeId != `` { return } if item.ArticleLink == `` { return } articleLink := item.ArticleLink articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink) if err != nil { return } if articleDetail.Appuin == `` { err = fmt.Errorf("文章内未匹配到公众号唯一标识") return } wechatPlatform := new(rag.WechatPlatform) // 查找是否存在这个公众号id的 wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin) if tmpErr != nil && !utils.IsErrNoRow(tmpErr) { err = tmpErr return } if tmpErr == nil { // 如果找到了,那么需要将当前的给移除掉 err = item.Del() if err != nil { return } // 并将查出来的微信公众号摘出来的数据重新赋值 item = wechatPlatformInfo } else if utils.IsErrNoRow(tmpErr) { // 如果没找到,那么就变更当前的信息 item.FakeId = articleDetail.Appuin item.Nickname = articleDetail.Nickname //item.Alias = req.Alias item.RoundHeadImg = articleDetail.RoundHeadImg //item.ServiceType = req.ServiceType item.Signature = articleDetail.ProfileSignature //item.Verified = verified item.ModifyTime = time.Now() err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime}) if err != nil { return } // 修改公众号头像 go replaceWechatPlatformPic(item) } // 把刚搜索的文章加入到文章库中 AddWechatArticle(item, articleLink, articleDetail, nil) BeachAddWechatArticle(item, 10) fmt.Println("公众号入库完成") return } // AddWechatArticle // @Description: 添加公众号文章入库 // @author: Roc // @datetime 2025-03-05 13:24:14 // @param item *rag.WechatPlatform // @param link string // @param articleDetail WechatArticleDataResp func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) { var err error defer func() { if err != nil { utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err) } }() obj := new(rag.WechatArticle) _, err = obj.GetByLink(articleLink) if err == nil { // 文章已经入库了,不需要重复入库 return } // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回 if !utils.IsErrNoRow(err) { return } // 这个时候,说明数据库中没有这个文章,那么需要文章入库 err = nil var publishAt time.Time if articleDetail.CreateAt != `` { createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt) if tmpErr == nil { publishAt = time.Unix(int64(createAtInt), 1000) } } else if articleMenu != nil { publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000) } content := articleDetail.HtmlContent // 图片下载下来到本地,如果成功了,那么就用新的 tmpContent, err := ReplaceHtmlImg(content) if tmpContent != `` { content = tmpContent } obj = &rag.WechatArticle{ WechatArticleId: 0, WechatPlatformId: item.WechatPlatformId, FakeId: item.FakeId, Title: articleDetail.Title, Link: articleLink, CoverUrl: articleDetail.CoverUrl, Description: articleDetail.Desc, Content: html.EscapeString(content), TextContent: articleDetail.TextContent, Country: articleDetail.CountryName, Province: articleDetail.ProvinceName, City: articleDetail.CityName, //Abstract: "", //ArticleCreateTime: createAt, ModifyTime: time.Now(), CreateTime: time.Now(), } if !publishAt.IsZero() { obj.ArticleCreateTime = publishAt } if articleMenu != nil { obj.Title = articleMenu.Title //obj.Link = articleMenu.Link obj.CoverUrl = articleMenu.Cover obj.Description = articleMenu.Digest } err = obj.Create() // 修改文章封面图 go replaceWechatArticleCoverPic(obj) // 文章入库成功后,需要将相关信息入摘要库 go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``) } // BeachAddWechatArticle // @Description: 批量添加公众号文章 // @param item // @param num // @return err func BeachAddWechatArticle(item *rag.WechatPlatform, num int) { var err error defer func() { //fmt.Println("公众号文章批量入库完成") if err != nil { utils.FileLog.Error("公众号文章批量入库失败,err:%v", err) fmt.Println("公众号文章批量入库失败,err:", err) } }() if item.FakeId == `` { return } wechatArticleObj := new(rag.WechatArticle) // 获取公众号的文章列表 articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num) if err != nil { return } for _, articleMenu := range articleListResp.List { // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了 _, err = wechatArticleObj.GetByLink(articleMenu.Link) if err == nil { // 文章已经入库了,不需要重复入库 continue } if !utils.IsErrNoRow(err) { return } err = nil articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link) if tmpErr != nil { err = tmpErr return } // 把刚搜索的文章加入到指标库 AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu) //time.Sleep(10 * time.Second) // 随机休眠,至少大于10s sleepTimeInt := utils.GetRandInt(10, 20) if sleepTimeInt < 10 { sleepTimeInt = 10 } time.Sleep(time.Duration(sleepTimeInt) * time.Second) } return } // //// GenerateArticleAbstract //// @Description: 文章摘要生成 //// @author: Roc //// @datetime 2025-03-10 16:17:53 //// @param item *rag.WechatArticle //func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) { // var err error // defer func() { // if err != nil { // utils.FileLog.Error("文章转临时文件失败,err:%v", err) // fmt.Println("文章转临时文件失败,err:", err) // } // }() // // // 内容为空,那就不需要生成摘要 // if item.TextContent == `` { // return // } // // abstractObj := rag.WechatArticleAbstract{} // tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId) // // 如果找到了,同时不是强制生成,那么就直接处理到知识库中 // if err == nil && !forceGenerate { // // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中 // WechatArticleAbstractToKnowledge(item, tmpAbstractItem, false) // // return // } // if !utils.IsErrNoRow(err) { // return // } // // //开始对话 // abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT) // if tmpErr != nil { // err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error()) // return // } // // // 添加问答记录 // if len(addArticleChatRecordList) > 0 { // recordObj := rag.WechatArticleChatRecord{} // err = recordObj.CreateInBatches(addArticleChatRecordList) // if err != nil { // return // } // } // // if abstract != `` { // if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 { // item.AbstractStatus = 2 // item.ModifyTime = time.Now() // err = item.Update([]string{"AbstractStatus", "ModifyTime"}) // return // } // item.AbstractStatus = 1 // item.ModifyTime = time.Now() // err = item.Update([]string{"AbstractStatus", "ModifyTime"}) // // abstractItem := &rag.WechatArticleAbstract{ // WechatArticleAbstractId: 0, // WechatArticleId: item.WechatArticleId, // Content: abstract, // Version: 0, // VectorKey: "", // ModifyTime: time.Now(), // CreateTime: time.Now(), // } // err = abstractItem.Create() // if err != nil { // return // } // // // 数据入ES库 // go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId) // // WechatArticleAbstractToKnowledge(item, abstractItem, false) // } //} // GenerateArticleAbstract // @Description: 文章摘要生成(默认提示词批量生成) // @author: Roc // @datetime 2025-03-10 16:17:53 // @param item *rag.WechatArticle func GenerateWechatArticleAbstract(item *rag.WechatArticle, forceGenerate bool) { var err error defer func() { if err != nil { utils.FileLog.Error("文章转临时文件失败,err:%v", err) fmt.Println("文章转临时文件失败,err:", err) } }() // 内容为空,那就不需要生成摘要 if item.TextContent == `` { return } questionObj := rag.Question{} questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100) if err != nil { err = fmt.Errorf("获取问题列表失败,Err:" + err.Error()) return } // 没问题就不生成了 if len(questionList) <= 0 { return } for _, question := range questionList { GenerateWechatArticleAbstractByQuestion(item, question, forceGenerate) } return } // GenerateArticleAbstractByQuestion // @Description: 文章摘要生成(根据提示词生成) // @author: Roc // @datetime 2025-03-10 16:17:53 // @param item *rag.WechatArticle func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) { var err error defer func() { if err != nil { utils.FileLog.Error("摘要生成失败,err:%v", err) fmt.Println("摘要生成失败,err:", err) } }() // 内容为空,那就不需要生成摘要 if item.TextContent == `` { return } abstractObj := rag.WechatArticleAbstract{} abstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId) // 如果找到了,同时不是强制生成,那么就直接处理到知识库中 if err == nil && !forceGenerate { // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中 WechatArticleAbstractToKnowledge(item, abstractItem, false) return } //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent) //开始对话 abstract, industryTags, _, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr) if tmpErr != nil { err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error()) return } if abstract == `` { return } var tagIdJsonStr string // 标签ID { tagIdList := make([]int, 0) tagIdMap := make(map[int]bool) if abstractItem != nil && abstractItem.Tags != `` { tmpErr = json.Unmarshal([]byte(abstractItem.Tags), &tagIdList) if tmpErr != nil { utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 失败,标签数据:%s,Err:%s", abstractItem.Tags, tmpErr.Error())) } else { for _, tagId := range tagIdList { tagIdMap[tagId] = true } } } for _, tagName := range industryTags { tagId, tmpErr := GetTagIdByName(tagName) if tmpErr != nil { utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error())) } if _, ok := tagIdMap[tagId]; !ok { tagIdList = append(tagIdList, tagId) tagIdMap[tagId] = true } } //for _, tagName := range varietyTags { // tagId, tmpErr := GetTagIdByName(tagName) // if tmpErr != nil { // utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error())) // } // if _, ok := tagIdMap[tagId]; !ok { // tagIdList = append(tagIdList, tagId) // tagIdMap[tagId] = true // } //} tagIdJsonByte, err := json.Marshal(tagIdList) if err != nil { utils.FileLog.Info(fmt.Sprintf("标签ID序列化失败,Err:%s", tmpErr.Error())) } else { tagIdJsonStr = string(tagIdJsonByte) } } if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 { item.AbstractStatus = 2 item.ModifyTime = time.Now() err = item.Update([]string{"AbstractStatus", "ModifyTime"}) return } item.AbstractStatus = 1 item.ModifyTime = time.Now() err = item.Update([]string{"AbstractStatus", "ModifyTime"}) if abstractItem == nil || abstractItem.WechatArticleAbstractId <= 0 { abstractItem = &rag.WechatArticleAbstract{ WechatArticleAbstractId: 0, WechatArticleId: item.WechatArticleId, Content: abstract, Version: 1, VectorKey: "", ModifyTime: time.Now(), CreateTime: time.Now(), QuestionId: question.QuestionId, Tags: tagIdJsonStr, QuestionContent: question.QuestionContent, } err = abstractItem.Create() } else { // 添加历史记录 rag.AddArticleAbstractHistoryByWechatArticleAbstract(abstractItem) abstractItem.Content = abstract abstractItem.Version++ abstractItem.ModifyTime = time.Now() abstractItem.Tags = "" abstractItem.QuestionContent = question.QuestionContent err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "question_content"}) } if err != nil { return } // 数据入ES库 go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId) WechatArticleAbstractToKnowledge(item, abstractItem, false) } // DelDoc // @Description: 删除摘要向量库 // @author: Roc // @datetime 2025-03-12 16:55:05 // @param wechatArticleAbstractList []*rag.WechatArticleAbstract // @return err error func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) { defer func() { if err != nil { utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err) fmt.Println("删除摘要向量库文件失败,err:", err) } }() vectorKeyList := make([]string, 0) wechatArticleAbstractIdList := make([]int, 0) for _, v := range wechatArticleAbstractList { if v.VectorKey == `` { continue } vectorKeyList = append(vectorKeyList, v.VectorKey) wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId) } // 没有就不删除 if len(vectorKeyList) <= 0 { return } _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList) if err != nil { err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error()) return } //fmt.Println(resp) obj := rag.WechatArticleAbstract{} err = obj.DelVectorKey(wechatArticleAbstractIdList) return } // DelLlmDoc // @Description: 删除摘要向量库 // @author: Roc // @datetime 2025-03-12 16:55:05 // @param wechatArticleAbstractList []*rag.WechatArticleAbstract // @return err error func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err error) { defer func() { if err != nil { utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err) fmt.Println("删除摘要向量库文件失败,err:", err) } }() // 没有就不删除 if len(vectorKeyList) <= 0 { return } _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList) if err != nil { err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error()) return } //fmt.Println(resp) obj := rag.WechatArticleAbstract{} err = obj.DelVectorKey(wechatArticleAbstractIdList) return } func getAnswerByContent(articleId int, source int, questionStr string) (answer string, industryTags, varietyTags []string, err error) { //addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0) result, err := facade.AIGCBaseOnPromote(facade.AIGC{ Promote: questionStr, Source: source, ArticleId: articleId, LLMModel: `deepseek-r1:32b`, }) if err != nil { return } // JSON字符串转字节 //answerByte, err := json.Marshal(result) //if err != nil { // return //} //originalAnswer := string(answerByte) // 提取 后面的内容 thinkEndIndex := strings.Index(result.Answer, "") if thinkEndIndex != -1 { answer = strings.TrimSpace(result.Answer[thinkEndIndex+len(""):]) } else { answer = result.Answer } answer = strings.TrimSpace(answer) // 提取标签 industryTags, varietyTags = extractLabels(answer) //// 待入库的数据 //addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{ // WechatArticleChatRecordId: 0, // WechatArticleId: articleId, // ChatUserType: "user", // Content: questionStr, // SendTime: time.Now(), // CreatedTime: time.Now(), // UpdateTime: time.Now(), //}, &rag.WechatArticleChatRecord{ // WechatArticleChatRecordId: 0, // WechatArticleId: articleId, // ChatUserType: "assistant", // Content: originalAnswer, // SendTime: time.Now(), // CreatedTime: time.Now(), // UpdateTime: time.Now(), //}) return } func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) { historyList := make([]eta_llm_http.HistoryContent, 0) addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0) questionObj := rag.Question{} questionList, err := questionObj.GetListByCondition(``, ``, []interface{}{}, 0, 100) if err != nil { err = fmt.Errorf("获取问题列表失败,Err:" + err.Error()) return } // 没问题就不生成了 if len(questionList) <= 0 { return } //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`} for _, v := range questionList { questionStrList = append(questionStrList, v.QuestionContent) } questionStr := strings.Join(questionStrList, "\n") originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList) fmt.Println(result) if err != nil { err = fmt.Errorf("LLM对话失败,Err:" + err.Error()) return } // 提取 后面的内容 thinkEndIndex := strings.Index(result.Answer, "") if thinkEndIndex != -1 { answer = strings.TrimSpace(result.Answer[thinkEndIndex+len(""):]) } else { answer = result.Answer } answer = strings.TrimSpace(answer) // 待入库的数据 addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{ WechatArticleChatRecordId: 0, WechatArticleId: wechatArticleId, ChatUserType: "user", Content: questionStr, SendTime: time.Now(), CreatedTime: time.Now(), UpdateTime: time.Now(), }, &rag.WechatArticleChatRecord{ WechatArticleChatRecordId: 0, WechatArticleId: wechatArticleId, ChatUserType: "assistant", Content: originalAnswer, SendTime: time.Now(), CreatedTime: time.Now(), UpdateTime: time.Now(), }) return } // ArticleToKnowledge // @Description: 原文入向量库 // @author: Roc // @datetime 2025-03-10 16:13:16 // @param item *rag.WechatArticle func ArticleToKnowledge(item *rag.WechatArticle) { if item.TextContent == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err) fmt.Println("上传文章原文到知识库失败,err:", err) } }() // 生成临时文件 //dateDir := time.Now().Format("20060102") //uploadDir := "./static/ai/article/" + dateDir uploadDir := "./static/ai/article" err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } //fileName := utils.RemoveSpecialChars(item.Title) + `.md` fileName := utils.MD5(item.Title) + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(item.TextContent, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName] // 上传临时文件到LLM uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName) if err != nil { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error()) return } if len(uploadFileResp.FailedFiles) > 0 { for _, v := range uploadFileResp.FailedFiles { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v) } } item.VectorKey = tmpFilePath item.ModifyTime = time.Now() err = item.Update([]string{"vector_key", "modify_time"}) } // WechatArticleAbstractToKnowledge // @Description: 摘要入向量库 // @author: Roc // @datetime 2025-03-10 16:14:59 // @param wechatArticleItem *rag.WechatArticle // @param abstractItem *rag.WechatArticleAbstract func WechatArticleAbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) { if abstractItem.Content == `` { return } // 已经生成了,那就不处理了 if abstractItem.VectorKey != `` && !isReUpload { return } var err error defer func() { if err != nil { utils.FileLog.Error("摘要入向量库失败,err:%v", err) fmt.Println("摘要入向量库失败,err:", err) } // 数据入ES库 go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId) }() // 生成临时文件 //dateDir := time.Now().Format("20060102") //uploadDir := + "./static/ai/article/" + dateDir uploadDir := "./static/ai/abstract" err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_WECHAT, wechatArticleItem.WechatArticleId)) + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(abstractItem.Content, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName] // 上传临时文件到LLM uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName) if err != nil { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error()) return } if len(uploadFileResp.FailedFiles) > 0 { for _, v := range uploadFileResp.FailedFiles { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v) } } abstractItem.VectorKey = tmpFilePath abstractItem.ModifyTime = time.Now() err = abstractItem.Update([]string{"vector_key", "modify_time"}) } // replaceWechatPlatformPic // @Description: 替换公众号头像 // @author: Roc // @datetime 2025-03-11 09:38:24 // @param item *rag.WechatPlatform func replaceWechatPlatformPic(item *rag.WechatPlatform) { var err error defer func() { if err != nil { utils.FileLog.Error("替换公众号头像失败,err:%v", err) fmt.Println("替换公众号头像失败,err:", err) } }() if item.RoundHeadImg == `` { return } resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`) if err != nil { return } item.RoundHeadImg = resourceUrl err = item.Update([]string{"round_head_img"}) } // replaceWechatArticleCoverPic // @Description: 替换文章封面图 // @author: Roc // @datetime 2025-03-11 09:38:35 // @param item *rag.WechatArticle func replaceWechatArticleCoverPic(item *rag.WechatArticle) { var err error defer func() { if err != nil { utils.FileLog.Error("替换公众号头像失败,err:%v", err) fmt.Println("替换公众号头像失败,err:", err) } // 数据入ES库 AddOrEditEsWechatArticle(item.WechatArticleId) }() if item.CoverUrl == `` { return } resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`) if err != nil { return } item.CoverUrl = resourceUrl err = item.Update([]string{"cover_url"}) } // replaceWechatArticlePic // @Description: 替换文章内容图 // @author: Roc // @datetime 2025-03-11 09:38:35 // @param item *rag.WechatArticle func ReplaceWechatArticlePic(item *rag.WechatArticle) { var err error defer func() { if err != nil { utils.FileLog.Error("替换公众号头像失败,err:%v", err) fmt.Println("替换公众号头像失败,err:", err) } }() if item.Content == `` { return } content, err := ReplaceHtmlImg(html.UnescapeString(item.Content)) if err != nil { return } item.Content = html.EscapeString(content) err = item.Update([]string{"content"}) return } // downloadWxPicAndUploadToOss // @Description: 下载微信图片并上传到OSS // @author: Roc // @datetime 2025-03-11 09:28:49 // @param wxPicUrl string // @return resourceUrl string // @return err error func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) { localFilePath, err := utils.DownloadWxImage(wxPicUrl) if err != nil { return } defer func() { os.Remove(localFilePath) }() ossClient := NewOssClient() if ossClient == nil { err = fmt.Errorf(`初始化OSS服务失败`) return } ext := path.Ext(localFilePath) fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext) //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName) resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath) if err != nil { err = fmt.Errorf("文件上传失败,Err:" + err.Error()) return } return } // ReplaceHtmlImg // @Description: 将html中的图片替换成自己的 // @author: Roc // @datetime 2025-03-11 14:32:00 // @param htmlStr string // @return newHtml string // @return err error func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) { doc, err := html2.Parse(strings.NewReader(htmlStr)) if err != nil { return } if err != nil { return } handleNode(doc) // 将处理后的HTML节点重新渲染为HTML字符串 var buf bytes.Buffer if err = html2.Render(&buf, doc); err != nil { fmt.Println(err) return } newHtml = buf.String() return } // handleNode // @Description: html节点处理 // @author: Roc // @datetime 2025-03-11 14:32:45 // @param n *html2.Node func handleNode(n *html2.Node) { if n.Type == html2.ElementNode { if n.Data == "img" { for k, attr := range n.Attr { // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src if n.Data == "img" && attr.Key == "src" { resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`) if tmpErr != nil { continue } attr.Val = resourceUrl } n.Attr[k] = attr } } } for c := n.FirstChild; c != nil; c = c.NextSibling { handleNode(c) } } // AddOrEditEsWechatPlatformId // @Description: 批量处理某个公众号下的文章到ES // @author: Roc // @datetime 2025-03-13 11:01:28 // @param articleId int func AddOrEditEsWechatPlatformId(wechatPlatformId int) { if utils.EsWechatArticleName == `` { return } obj := rag.WechatArticle{} list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000) for _, item := range list { AddOrEditEsWechatArticle(item.WechatArticleId) } } // AddOrEditEsWechatArticle // @Description: 新增/编辑微信文章入ES // @author: Roc // @datetime 2025-03-13 11:01:28 // @param articleId int func AddOrEditEsWechatArticle(articleId int) { if utils.EsWechatArticleName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err) fmt.Println("添加公众号微信信息到ES失败,err:", err) } }() obj := rag.WechatArticle{} articleInfo, err := obj.GetById(articleId) if err != nil { err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error()) return } platformObj := rag.WechatPlatform{} platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId) if err != nil { err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error()) return } esItem := elastic.WechatArticleAndPlatform{ WechatArticleId: articleInfo.WechatArticleId, WechatPlatformId: articleInfo.WechatPlatformId, FakeId: articleInfo.FakeId, Title: articleInfo.Title, Link: articleInfo.Link, CoverUrl: articleInfo.CoverUrl, Description: articleInfo.Description, //Content: articleInfo.Content, //TextContent: articleInfo.TextContent, //AbstractStatus: articleInfo.AbstractStatus, Country: articleInfo.Country, Province: articleInfo.Province, City: articleInfo.City, ArticleCreateTime: articleInfo.ArticleCreateTime, IsDeleted: articleInfo.IsDeleted, ModifyTime: articleInfo.ModifyTime, CreateTime: articleInfo.CreateTime, Nickname: platformInfo.Nickname, Alias: platformInfo.Alias, RoundHeadImg: platformInfo.RoundHeadImg, } err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem) } // AddOrEditEsWechatArticleAbstract // @Description: 新增/编辑微信文章摘要入ES // @author: Roc // @datetime 2025-03-13 14:13:47 // @param articleAbstractId int func AddOrEditEsWechatArticleAbstract(articleAbstractId int) { if utils.EsWechatArticleAbstractName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err) fmt.Println("添加公众号微信信息到ES失败,err:", err) } }() obj := rag.WechatArticleAbstract{} abstractInfo, err := obj.GetById(articleAbstractId) if err != nil { err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error()) return } articleObj := rag.WechatArticle{} articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId) if err != nil { err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error()) return } // 标签ID tagIdList := make([]int, 0) if abstractInfo.Tags != `` { err = json.Unmarshal([]byte(abstractInfo.Tags), &tagIdList) if err != nil { err = fmt.Errorf("报告标签ID转int失败,Err:" + err.Error()) utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 报告标签ID转int失败,标签数据:%s,Err:%s", abstractInfo.Tags, err.Error())) } } esItem := elastic.WechatArticleAbstractItem{ WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId, WechatArticleId: abstractInfo.WechatArticleId, WechatPlatformId: articleInfo.WechatPlatformId, Abstract: abstractInfo.Content, QuestionId: abstractInfo.QuestionId, Version: abstractInfo.Version, VectorKey: abstractInfo.VectorKey, ModifyTime: articleInfo.ModifyTime, CreateTime: articleInfo.CreateTime, Title: articleInfo.Title, Link: articleInfo.Link, TagIdList: tagIdList, } err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem) } // DelEsWechatArticleAbstract // @Description: 删除ES中的微信文章摘要 // @author: Roc // @datetime 2025-03-13 14:13:47 // @param articleAbstractId int func DelEsWechatArticleAbstract(articleAbstractId int) { if utils.EsWechatArticleAbstractName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("删除公众号微信信息到ES失败,err:%v", err) fmt.Println("删除公众号微信信息到ES失败,err:", err) } }() err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId)) } // AddOrEditEsRagQuestion // @Description: 新增/编辑知识库问题入ES // @author: Roc // @datetime 2025-03-28 11:25:50 // @param questionId int func AddOrEditEsRagQuestion(questionId int) { if utils.EsWechatArticleName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err) fmt.Println("添加公众号微信信息到ES失败,err:", err) } }() obj := rag.Question{} questionInfo, err := obj.GetByID(questionId) if err != nil { err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error()) return } esItem := elastic.RagQuestionItem{ QuestionId: questionInfo.QuestionId, QuestionTitle: questionInfo.QuestionTitle, QuestionContent: questionInfo.QuestionContent, Sort: questionInfo.Sort, SysUserId: questionInfo.SysUserId, SysUserRealName: questionInfo.SysUserRealName, ModifyTime: questionInfo.ModifyTime, CreateTime: questionInfo.CreateTime, } err = elastic.RagQuestionEsAddOrEdit(strconv.Itoa(questionInfo.QuestionId), esItem) } // DelEsRagQuestion // @Description: 删除ES中的知识库问题 // @author: Roc // @datetime 2025-03-28 11:26:40 // @param questionId int func DelEsRagQuestion(questionId int) { if utils.EsWechatArticleAbstractName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err) fmt.Println("添加公众号微信信息到ES失败,err:", err) } }() err = elastic.RagQuestionEsDel(strconv.Itoa(questionId)) } // extractLabels // @Description: 提取摘要中的标签 // @author: Roc // @datetime 2025-04-18 17:16:05 // @param text string // @return industryTags []string // @return varietyTags []string func extractLabels(text string) (industryTags []string, varietyTags []string) { reIndustry := regexp.MustCompile(`行业标签((?:【[^】]*】)+)`) industryMatch := reIndustry.FindStringSubmatch(text) if len(industryMatch) > 1 { industryContent := industryMatch[1] reSplit := regexp.MustCompile(`【([^】]*)】`) industryTags = make([]string, 0) for _, m := range reSplit.FindAllStringSubmatch(industryContent, -1) { if len(m) > 1 { industryTags = append(industryTags, m[1]) } } } reVariety := regexp.MustCompile(`品种标签((?:【[^】]*】)+)`) varietyMatch := reVariety.FindStringSubmatch(text) if len(varietyMatch) > 1 { varietyContent := varietyMatch[1] reSplit := regexp.MustCompile(`【([^】]*)】`) varietyTags = make([]string, 0) for _, m := range reSplit.FindAllStringSubmatch(varietyContent, -1) { if len(m) > 1 { varietyTags = append(varietyTags, m[1]) } } } return } var aiAbstractTagMap = map[string]int{} // GetTagIdByName // @Description: 获取标签ID // @author: Roc // @datetime 2025-04-18 17:25:46 // @param tagName string // @return tagId int // @return err error func GetTagIdByName(tagName string) (tagId int, err error) { tagName = strings.TrimSpace(tagName) tagId, ok := aiAbstractTagMap[tagName] if ok { return } obj := rag.Tag{} item, err := obj.GetByCondition(fmt.Sprintf(` AND %s = ? `, rag.TagColumns.TagName), []interface{}{tagName}) if err != nil { if !utils.IsErrNoRow(err) { err = fmt.Errorf("获取标签失败,Err:" + err.Error()) return } item = &rag.Tag{ TagId: 0, TagName: tagName, Sort: 0, ModifyTime: time.Now(), CreateTime: time.Now(), } err = item.Create() if err != nil { err = fmt.Errorf("添加标签失败,Err:" + err.Error()) return } } tagId = item.TagId aiAbstractTagMap[tagName] = tagId return }