tactics.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. package services
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "hongze/hongze_cygx/models"
  6. "hongze/hongze_cygx/utils"
  7. "html"
  8. "strconv"
  9. "strings"
  10. "time"
  11. )
  12. //同步策略文章
  13. func SyncTacticsList() (err error) {
  14. defer func() {
  15. if err != nil {
  16. fmt.Println("同步失败,Err:", err.Error())
  17. }
  18. }()
  19. fmt.Println("同步数据")
  20. indexName := utils.IndexName
  21. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  22. list, err := models.GetTacticsList2(endDate)
  23. //list, err := models.GetTacticsListAll()
  24. if err != nil {
  25. fmt.Println("GetTacticsList Err:", err.Error())
  26. return
  27. }
  28. fmt.Println("list len:", len(list))
  29. for k, v := range list {
  30. v.Department = "弘则权益研究"
  31. fmt.Println(k, v.ArticleId)
  32. //
  33. //publishDate, err := time.Parse(utils.FormatDateTime, v.PublishDate)
  34. //if err != nil {
  35. // fmt.Println("time.Parse:", err.Error())
  36. // return err
  37. //}
  38. //fmt.Println(publishDate)
  39. hh, _ := time.ParseDuration("8h")
  40. //pDate := publishDate.Add(hh)
  41. v.PublishDate = v.PublishDate.Add(hh)
  42. //判断是否已经存在
  43. if v.ArticleId < 0 {
  44. fmt.Println("参数错误")
  45. return err
  46. }
  47. count, err := models.GetArticleCountById(v.ArticleId)
  48. if err != nil && err.Error() != utils.ErrNoRow() {
  49. return err
  50. }
  51. v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
  52. expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
  53. if count > 0 {
  54. bodyText, _ := GetReportContentTextSub(v.Body)
  55. updateParams := make(map[string]interface{})
  56. updateParams["Title"] = v.Title
  57. updateParams["TitleEn"] = v.TitleEn
  58. updateParams["UpdateFrequency"] = v.UpdateFrequency
  59. updateParams["CreateDate"] = v.CreateDate
  60. updateParams["PublishDate"] = v.PublishDate
  61. updateParams["Body"] = html.EscapeString(v.Body)
  62. updateParams["BodyText"] = bodyText
  63. updateParams["Abstract"] = html.EscapeString(v.Abstract)
  64. updateParams["CategoryName"] = v.CategoryName
  65. updateParams["SubCategoryName"] = v.SubCategoryName
  66. updateParams["CategoryId"] = v.CategoryId
  67. updateParams["PublishStatus"] = v.PublishStatus
  68. updateParams["ExpertBackground"] = expertContentStr
  69. updateParams["ExpertNumber"] = expertNumStr
  70. updateParams["InterviewDate"] = interviewDateStr
  71. if v.Department != "弘则权益研究" {
  72. v.Department = "弘则权益研究"
  73. }
  74. updateParams["Department"] = v.Department
  75. whereParam := map[string]interface{}{"article_id": v.ArticleId}
  76. err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
  77. if err != nil {
  78. fmt.Println("UpdateByExpr Err:" + err.Error())
  79. }
  80. } else {
  81. fmt.Println(k, v.ArticleId, "add")
  82. item := new(models.CygxArticle)
  83. articleIdInt := v.ArticleId
  84. item.ArticleId = articleIdInt
  85. item.Title = v.Title
  86. item.TitleEn = v.TitleEn
  87. item.UpdateFrequency = v.UpdateFrequency
  88. item.CreateDate = v.CreateDate
  89. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  90. item.Body = html.EscapeString(v.Body)
  91. item.Abstract = html.EscapeString(v.Abstract)
  92. item.CategoryName = v.CategoryName
  93. item.SubCategoryName = v.SubCategoryName
  94. item.CategoryId = v.CategoryId
  95. item.PublishStatus = v.PublishStatus
  96. item.ExpertBackground = expertContentStr
  97. item.ExpertNumber = expertNumStr
  98. item.InterviewDate = interviewDateStr
  99. item.Department = v.Department
  100. item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt))
  101. _, err = models.AddCygxArticle(item)
  102. if err != nil {
  103. fmt.Println("AddCygxArticle Err:", err.Error())
  104. return err
  105. }
  106. }
  107. content := html.UnescapeString(v.Body)
  108. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  109. if err != nil {
  110. fmt.Println("create doc err:", err.Error())
  111. return err
  112. }
  113. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  114. a.Remove()
  115. })
  116. bodyText := doc.Text()
  117. item := new(ElasticTestArticleDetail)
  118. item.ArticleId = v.ArticleId
  119. item.Title = v.Title
  120. item.BodyText = bodyText
  121. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  122. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  123. }
  124. return
  125. }
  126. //同步策略文章
  127. func SyncCygxArticleList() (err error) {
  128. defer func() {
  129. if err != nil {
  130. fmt.Println("同步失败,Err:", err.Error())
  131. }
  132. }()
  133. fmt.Println("同步数据")
  134. indexName := utils.IndexName
  135. fmt.Println("indexName:", indexName)
  136. time.Sleep(5 * time.Second)
  137. list, err := models.GetCygxArticleListAll()
  138. if err != nil {
  139. fmt.Println("GetTacticsList Err:", err.Error())
  140. return
  141. }
  142. fmt.Println("list len:", len(list))
  143. for k, v := range list {
  144. v.Department = "弘则权益研究"
  145. fmt.Println(k, v.ArticleId)
  146. //判断是否已经存在
  147. if v.ArticleId < 0 {
  148. fmt.Println("参数错误")
  149. return err
  150. }
  151. content := html.UnescapeString(v.Body)
  152. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  153. if err != nil {
  154. fmt.Println("create doc err:", err.Error())
  155. return err
  156. }
  157. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  158. a.Remove()
  159. })
  160. bodyText := doc.Text()
  161. item := new(ElasticTestArticleDetail)
  162. item.ArticleId = v.ArticleId
  163. item.Title = v.Title
  164. item.BodyText = bodyText
  165. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  166. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  167. }
  168. return
  169. }
  170. //body 解析
  171. func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr string) {
  172. body = html.UnescapeString(body)
  173. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  174. if err != nil {
  175. fmt.Println("create doc err:", err.Error())
  176. return
  177. }
  178. var expertNumArr []string
  179. var expertContentArr []string
  180. var interviewDateArr []string
  181. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  182. contentTxt := s.Text()
  183. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  184. interviewDate := s.Next().Text()
  185. interviewDateArr = append(interviewDateArr, interviewDate)
  186. }
  187. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  188. expertContent := s.Next().Text()
  189. if expertContent == "" {
  190. expertContent = contentTxt
  191. }
  192. if expertContent != "" {
  193. rightIndex := strings.Index(expertContent, ")")
  194. if rightIndex == 0 {
  195. rightIndex = strings.Index(expertContent, ")")
  196. }
  197. if rightIndex > 0 {
  198. expertNum := expertContent[:rightIndex]
  199. expertNum = strings.Replace(expertNum, "(", "", -1)
  200. expertNum = strings.Replace(expertNum, "(", "", -1)
  201. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  202. if expertNum != "" {
  203. expertNumArr = append(expertNumArr, expertNum)
  204. rightIndex = rightIndex
  205. expertContentStr := expertContent[rightIndex:]
  206. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  207. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  208. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  209. expertContentArr = append(expertContentArr, expertContentStr)
  210. }
  211. }
  212. }
  213. }
  214. })
  215. if len(expertContentArr) == 0 {
  216. doc.Find("pre").Each(func(i int, s *goquery.Selection) {
  217. contentTxt := s.Text()
  218. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  219. interviewDate := s.Next().Text()
  220. if interviewDate != "" {
  221. interviewDateArr = append(interviewDateArr, interviewDate)
  222. }
  223. }
  224. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  225. expertContent := s.Next().Text()
  226. if expertContent == "" {
  227. expertContent = contentTxt
  228. }
  229. if expertContent != "" {
  230. rightIndex := strings.Index(expertContent, ")")
  231. if rightIndex == 0 {
  232. rightIndex = strings.Index(expertContent, ")")
  233. }
  234. expertNum := expertContent[:rightIndex]
  235. expertNum = strings.Replace(expertNum, "(", "", -1)
  236. expertNum = strings.Replace(expertNum, "(", "", -1)
  237. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  238. if expertNum != "" {
  239. expertNumArr = append(expertNumArr, expertNum)
  240. rightIndex = rightIndex
  241. expertContentStr := expertContent[rightIndex:]
  242. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  243. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  244. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  245. if expertContentStr != "" {
  246. expertContentArr = append(expertContentArr, expertContentStr)
  247. }
  248. }
  249. }
  250. }
  251. })
  252. }
  253. if len(expertNumArr) > 0 {
  254. expertNumStr = expertNumArr[0]
  255. if expertNumStr != "" {
  256. expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
  257. expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
  258. expertNumStr = strings.Trim(expertNumStr, "")
  259. }
  260. }
  261. if len(expertContentArr) > 0 {
  262. expertContentStr = expertContentArr[0]
  263. }
  264. if len(interviewDateArr) > 0 {
  265. interviewDateStr = interviewDateArr[0]
  266. }
  267. return
  268. }
  269. //同步策略,报告文章
  270. func SyncTacticsListAddreport() (err error) {
  271. defer func() {
  272. if err != nil {
  273. fmt.Println("同步失败,Err:", err.Error())
  274. }
  275. }()
  276. fmt.Println("同步数据")
  277. indexName := utils.IndexName
  278. //endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  279. //list, err := models.GetTacticsList2(endDate)
  280. list, err := models.GetTacticsListAll2()
  281. if err != nil {
  282. fmt.Println("GetTacticsList Err:", err.Error())
  283. return
  284. }
  285. fmt.Println("list len:", len(list))
  286. summaryCategoryIds := "28,32,45,50,57,62,72,74,79,84,86,88,90,95,96" //纪要库的文章类型categoty_id
  287. listSummary := strings.Split(summaryCategoryIds, ",")
  288. //listSummary, errsu := models.GetReportMappingCategoryID()
  289. //if errsu != nil {
  290. // fmt.Println("GetTacticsList Err:", errsu.Error())
  291. // return
  292. //}
  293. noSummaryArticleIds := "3454,3456,3457,3459,2449,2450,2453,2454,2459,2530,2583,2663,2670,2699,2715,2732,2748,2759,2399,2356,2870,3173,2978,2826,3470" //非纪要库类型的文章ID
  294. listNoSummaryArticleIds := strings.Split(noSummaryArticleIds, ",")
  295. listPermission, errper := models.GetPermissionMappingCategoryID()
  296. if errper != nil {
  297. fmt.Println("GetTacticsList Err:", errper.Error())
  298. return
  299. }
  300. for k, v := range list {
  301. //是否属于纪要库的数据
  302. for _, vSum := range listSummary {
  303. vSumInt, _ := strconv.Atoi(vSum)
  304. if v.CategoryId == vSumInt {
  305. v.IsSummary = 1
  306. }
  307. }
  308. //排除不属于纪要库类型的文章
  309. for _, vArt := range listNoSummaryArticleIds {
  310. vArtInt, _ := strconv.Atoi(vArt)
  311. if v.ArticleId == vArtInt {
  312. v.IsSummary = 0
  313. }
  314. }
  315. for _, vPer := range listPermission {
  316. if v.CategoryId == vPer.CategoryId {
  317. v.IsReport = 1
  318. }
  319. }
  320. if v.IsReport > 0 {
  321. //是否属于策略 策略自动归类
  322. //是否属于行业报告 行业报告自动归类
  323. if v.CategoryId == 7 || v.CategoryId == 11 || v.CategoryId == 51 || v.CategoryId == 52 || v.CategoryId == 64 || v.CategoryId == 80 || v.CategoryId == 87 {
  324. v.IsClass = 1
  325. v.ReportType = 1 //是否属于行业报告
  326. } else {
  327. v.ReportType = 2 //是否属于产业报告
  328. }
  329. }
  330. v.Department = "弘则权益研究"
  331. fmt.Println(k, v.ArticleId)
  332. hh, _ := time.ParseDuration("8h")
  333. //pDate := publishDate.Add(hh)
  334. v.PublishDate = v.PublishDate.Add(hh)
  335. //判断是否已经存在
  336. if v.ArticleId < 0 {
  337. fmt.Println("AddCygxArticle Err:")
  338. return err
  339. }
  340. count, err := models.GetArticleCountById(v.ArticleId)
  341. if err != nil && err.Error() != utils.ErrNoRow() {
  342. fmt.Println("AddCygxArticle Err:", err.Error())
  343. return err
  344. }
  345. v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
  346. expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
  347. var titleNew string
  348. titleNew = v.Title
  349. // 7资金流向 、11大类资产 、51每日复盘 、80医药周报
  350. if v.CategoryId == 7 || v.CategoryId == 11 || v.CategoryId == 51 {
  351. if v.UpdateFrequency == "daily" {
  352. titleNew = v.Title + "(" + strconv.Itoa(v.PublishDate.Year())[2:len(strconv.Itoa(v.PublishDate.Year()))-0] + v.PublishDate.Format("01") + strconv.Itoa(v.PublishDate.Day()) + ")"
  353. } else if v.UpdateFrequency == "weekly" {
  354. titleNew = v.Title + utils.WeekByDate(v.PublishDate)
  355. }
  356. }
  357. if v.CategoryId == 80 {
  358. titleNew = v.Title + utils.WeekByDate(v.PublishDate)
  359. }
  360. if count > 0 {
  361. fmt.Println(k, v.ArticleId, "edit")
  362. bodyText, _ := GetReportContentTextSub(v.Body)
  363. updateParams := make(map[string]interface{})
  364. //updateParams["Title"] = v.Title
  365. updateParams["Title"] = titleNew
  366. updateParams["TitleEn"] = v.TitleEn
  367. updateParams["UpdateFrequency"] = v.UpdateFrequency
  368. updateParams["CreateDate"] = v.CreateDate
  369. updateParams["PublishDate"] = v.PublishDate
  370. updateParams["Body"] = html.EscapeString(v.Body)
  371. updateParams["BodyText"] = bodyText
  372. updateParams["Abstract"] = html.EscapeString(v.Abstract)
  373. updateParams["CategoryName"] = v.CategoryName
  374. updateParams["SubCategoryName"] = v.SubCategoryName
  375. updateParams["CategoryId"] = v.CategoryId
  376. updateParams["PublishStatus"] = v.PublishStatus
  377. updateParams["ExpertBackground"] = expertContentStr
  378. updateParams["ExpertNumber"] = expertNumStr
  379. updateParams["InterviewDate"] = interviewDateStr
  380. //updateParams["IsClass"] = v.IsClass
  381. updateParams["IsSummary"] = v.IsSummary
  382. updateParams["IsReport"] = v.IsReport
  383. updateParams["ReportType"] = v.ReportType
  384. if v.Department != "弘则权益研究" {
  385. v.Department = "弘则权益研究"
  386. }
  387. updateParams["Department"] = v.Department
  388. whereParam := map[string]interface{}{"article_id": v.ArticleId}
  389. err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
  390. if err != nil {
  391. fmt.Println("UpdateByExpr Err:" + err.Error())
  392. }
  393. } else {
  394. fmt.Println(k, v.ArticleId, "add")
  395. item := new(models.CygxArticle)
  396. articleIdInt := v.ArticleId
  397. item.ArticleId = articleIdInt
  398. //item.Title = v.Title
  399. item.Title = titleNew
  400. item.TitleEn = v.TitleEn
  401. item.UpdateFrequency = v.UpdateFrequency
  402. item.CreateDate = v.CreateDate
  403. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  404. item.Body = html.EscapeString(v.Body)
  405. item.Abstract = html.EscapeString(v.Abstract)
  406. item.CategoryName = v.CategoryName
  407. item.SubCategoryName = v.SubCategoryName
  408. item.CategoryId = v.CategoryId
  409. item.PublishStatus = v.PublishStatus
  410. item.ExpertBackground = expertContentStr
  411. item.ExpertNumber = expertNumStr
  412. item.InterviewDate = interviewDateStr
  413. item.Department = v.Department
  414. item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt))
  415. item.IsClass = v.IsClass
  416. item.IsSummary = v.IsSummary
  417. item.IsReport = v.IsReport
  418. item.ReportType = v.ReportType
  419. _, err = models.AddCygxArticles(item)
  420. if err != nil {
  421. fmt.Println("AddCygxArticle Err:", err.Error())
  422. return err
  423. }
  424. }
  425. //纪要库的数据同步到Es
  426. if v.IsSummary == 1 {
  427. content := html.UnescapeString(v.Body)
  428. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  429. if err != nil {
  430. fmt.Println("AddCygxArticle Err:", err.Error())
  431. return err
  432. }
  433. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  434. a.Remove()
  435. })
  436. bodyText := doc.Text()
  437. item := new(ElasticTestArticleDetail)
  438. item.ArticleId = v.ArticleId
  439. item.Title = v.Title
  440. item.BodyText = bodyText
  441. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  442. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  443. }
  444. }
  445. return
  446. }