tactics.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. package services
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "hongze/hongze_cygx/models"
  7. "hongze/hongze_cygx/utils"
  8. "html"
  9. "regexp"
  10. "strconv"
  11. "strings"
  12. "time"
  13. )
  14. // 同步策略文章
  15. func SyncTacticsList() (err error) {
  16. defer func() {
  17. if err != nil {
  18. fmt.Println("同步失败,Err:", err.Error())
  19. }
  20. }()
  21. fmt.Println("同步数据")
  22. indexName := utils.IndexName
  23. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  24. list, err := models.GetTacticsList2(endDate)
  25. //list, err := models.GetTacticsListAll()
  26. if err != nil {
  27. fmt.Println("GetTacticsList Err:", err.Error())
  28. return
  29. }
  30. fmt.Println("list len:", len(list))
  31. for k, v := range list {
  32. v.Department = "弘则权益研究"
  33. fmt.Println(k, v.ArticleId)
  34. //
  35. //publishDate, err := time.Parse(utils.FormatDateTime, v.PublishDate)
  36. //if err != nil {
  37. // fmt.Println("time.Parse:", err.Error())
  38. // return err
  39. //}
  40. //fmt.Println(publishDate)
  41. hh, _ := time.ParseDuration("8h")
  42. //pDate := publishDate.Add(hh)
  43. v.PublishDate = v.PublishDate.Add(hh)
  44. //判断是否已经存在
  45. if v.ArticleId < 0 {
  46. fmt.Println("参数错误")
  47. return err
  48. }
  49. count, err := models.GetArticleCountById(v.ArticleId)
  50. if err != nil && err.Error() != utils.ErrNoRow() {
  51. return err
  52. }
  53. v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
  54. expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
  55. if count > 0 {
  56. bodyText, _ := GetReportContentTextSub(v.Body)
  57. updateParams := make(map[string]interface{})
  58. updateParams["Title"] = v.Title
  59. updateParams["TitleEn"] = v.TitleEn
  60. updateParams["UpdateFrequency"] = v.UpdateFrequency
  61. updateParams["CreateDate"] = v.CreateDate
  62. updateParams["PublishDate"] = v.PublishDate
  63. updateParams["Body"] = html.EscapeString(v.Body)
  64. updateParams["BodyText"] = bodyText
  65. updateParams["Abstract"] = html.EscapeString(v.Abstract)
  66. updateParams["CategoryName"] = v.CategoryName
  67. updateParams["SubCategoryName"] = v.SubCategoryName
  68. updateParams["CategoryId"] = v.CategoryId
  69. updateParams["PublishStatus"] = v.PublishStatus
  70. updateParams["ExpertBackground"] = expertContentStr
  71. updateParams["ExpertNumber"] = expertNumStr
  72. updateParams["InterviewDate"] = interviewDateStr
  73. if v.Department != "弘则权益研究" {
  74. v.Department = "弘则权益研究"
  75. }
  76. updateParams["Department"] = v.Department
  77. whereParam := map[string]interface{}{"article_id": v.ArticleId}
  78. err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
  79. if err != nil {
  80. fmt.Println("UpdateByExpr Err:" + err.Error())
  81. }
  82. } else {
  83. fmt.Println(k, v.ArticleId, "add")
  84. item := new(models.CygxArticle)
  85. articleIdInt := v.ArticleId
  86. item.ArticleId = articleIdInt
  87. item.Title = v.Title
  88. item.TitleEn = v.TitleEn
  89. item.UpdateFrequency = v.UpdateFrequency
  90. item.CreateDate = v.CreateDate
  91. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  92. item.Body = html.EscapeString(v.Body)
  93. item.Abstract = html.EscapeString(v.Abstract)
  94. item.CategoryName = v.CategoryName
  95. item.SubCategoryName = v.SubCategoryName
  96. item.CategoryId = v.CategoryId
  97. item.PublishStatus = v.PublishStatus
  98. item.ExpertBackground = expertContentStr
  99. item.ExpertNumber = expertNumStr
  100. item.InterviewDate = interviewDateStr
  101. item.Department = v.Department
  102. item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt))
  103. _, err = models.AddCygxArticle(item)
  104. if err != nil {
  105. fmt.Println("AddCygxArticle Err:", err.Error())
  106. return err
  107. }
  108. }
  109. content := html.UnescapeString(v.Body)
  110. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  111. if err != nil {
  112. fmt.Println("create doc err:", err.Error())
  113. return err
  114. }
  115. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  116. a.Remove()
  117. })
  118. bodyText := doc.Text()
  119. item := new(ElasticTestArticleDetail)
  120. item.ArticleId = v.ArticleId
  121. item.Title = v.Title
  122. item.BodyText = bodyText
  123. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  124. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  125. }
  126. return
  127. }
  128. // 同步策略文章
  129. func SyncCygxArticleList() (err error) {
  130. defer func() {
  131. if err != nil {
  132. fmt.Println("同步失败,Err:", err.Error())
  133. }
  134. }()
  135. fmt.Println("同步数据")
  136. indexName := utils.IndexName
  137. fmt.Println("indexName:", indexName)
  138. time.Sleep(5 * time.Second)
  139. list, err := models.GetCygxArticleListAll()
  140. if err != nil {
  141. fmt.Println("GetTacticsList Err:", err.Error())
  142. return
  143. }
  144. fmt.Println("list len:", len(list))
  145. for k, v := range list {
  146. v.Department = "弘则权益研究"
  147. fmt.Println(k, v.ArticleId)
  148. //判断是否已经存在
  149. if v.ArticleId < 0 {
  150. fmt.Println("参数错误")
  151. return err
  152. }
  153. content := html.UnescapeString(v.Body)
  154. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  155. if err != nil {
  156. fmt.Println("create doc err:", err.Error())
  157. return err
  158. }
  159. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  160. a.Remove()
  161. })
  162. bodyText := doc.Text()
  163. item := new(ElasticTestArticleDetail)
  164. item.ArticleId = v.ArticleId
  165. item.Title = v.Title
  166. item.BodyText = bodyText
  167. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  168. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  169. }
  170. return
  171. }
  172. // body 解析
  173. func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr string) {
  174. body = html.UnescapeString(body)
  175. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  176. if err != nil {
  177. fmt.Println("create doc err:", err.Error())
  178. return
  179. }
  180. var expertNumArr []string
  181. var expertContentArr []string
  182. var interviewDateArr []string
  183. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  184. contentTxt := s.Text()
  185. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  186. interviewDate := s.Next().Text()
  187. interviewDateArr = append(interviewDateArr, interviewDate)
  188. }
  189. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  190. expertContent := s.Next().Text()
  191. if expertContent == "" {
  192. expertContent = contentTxt
  193. }
  194. if expertContent != "" {
  195. rightIndex := strings.Index(expertContent, ")")
  196. if rightIndex == 0 {
  197. rightIndex = strings.Index(expertContent, ")")
  198. }
  199. if rightIndex > 0 {
  200. expertNum := expertContent[:rightIndex]
  201. expertNum = strings.Replace(expertNum, "(", "", -1)
  202. expertNum = strings.Replace(expertNum, "(", "", -1)
  203. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  204. if expertNum != "" {
  205. expertNumArr = append(expertNumArr, expertNum)
  206. rightIndex = rightIndex
  207. expertContentStr := expertContent[rightIndex:]
  208. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  209. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  210. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  211. expertContentArr = append(expertContentArr, expertContentStr)
  212. }
  213. }
  214. }
  215. }
  216. })
  217. if len(expertContentArr) == 0 {
  218. doc.Find("pre").Each(func(i int, s *goquery.Selection) {
  219. contentTxt := s.Text()
  220. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  221. interviewDate := s.Next().Text()
  222. if interviewDate != "" {
  223. interviewDateArr = append(interviewDateArr, interviewDate)
  224. }
  225. }
  226. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  227. expertContent := s.Next().Text()
  228. if expertContent == "" {
  229. expertContent = contentTxt
  230. }
  231. if expertContent != "" {
  232. rightIndex := strings.Index(expertContent, ")")
  233. if rightIndex == 0 {
  234. rightIndex = strings.Index(expertContent, ")")
  235. }
  236. expertNum := expertContent[:rightIndex]
  237. expertNum = strings.Replace(expertNum, "(", "", -1)
  238. expertNum = strings.Replace(expertNum, "(", "", -1)
  239. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  240. if expertNum != "" {
  241. expertNumArr = append(expertNumArr, expertNum)
  242. rightIndex = rightIndex
  243. expertContentStr := expertContent[rightIndex:]
  244. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  245. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  246. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  247. if expertContentStr != "" {
  248. expertContentArr = append(expertContentArr, expertContentStr)
  249. }
  250. }
  251. }
  252. }
  253. })
  254. }
  255. if len(expertNumArr) > 0 {
  256. expertNumStr = expertNumArr[0]
  257. if expertNumStr != "" {
  258. expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
  259. expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
  260. expertNumStr = strings.Trim(expertNumStr, "")
  261. }
  262. }
  263. if len(expertContentArr) > 0 {
  264. expertContentStr = expertContentArr[0]
  265. }
  266. if len(interviewDateArr) > 0 {
  267. interviewDateStr = interviewDateArr[0]
  268. }
  269. return
  270. }
  271. // 同步策略,报告文章
  272. func SyncTacticsListAddreport(cont context.Context) (err error) {
  273. defer func() {
  274. if err != nil {
  275. fmt.Println("同步失败,Err:", err.Error())
  276. }
  277. }()
  278. fmt.Println("同步数据")
  279. //indexName := utils.IndexName
  280. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  281. list, err := models.GetTacticsList2(endDate)
  282. //list, err := models.GetTacticsListAll2()
  283. if err != nil {
  284. fmt.Println("GetTacticsList Err:", err.Error())
  285. return
  286. }
  287. listCustomArticle, err := models.GetCustomArticleId() //手动归类的文章,不替换文章类型
  288. if err != nil {
  289. fmt.Println("GetTacticsList Err:", err.Error())
  290. return
  291. }
  292. listGetMatchTypeName, errMatch := models.GetMatchTypeNamenNotNull() //手动归类的文章,不替换文章类型
  293. if errMatch != nil {
  294. fmt.Println("GetTacticsList Err:", errMatch.Error())
  295. return
  296. }
  297. fmt.Println("list len:", len(list))
  298. summaryCategoryIds := "28,32,45,50,57,62,72,74,79,84,86,88,90,93,95,96" //纪要库的文章类型categoty_id
  299. listSummary := strings.Split(summaryCategoryIds, ",")
  300. noSummaryArticleIds := "3454,3456,3457,3459,2449,2450,2453,2454,2459,2530,2583,2663,2670,2699,2715,2732,2748,2759,2399,2356,2870,3173,2978,2826,3470" //非纪要库类型的文章ID
  301. listNoSummaryArticleIds := strings.Split(noSummaryArticleIds, ",")
  302. listPermission, errper := models.GetPermissionMappingCategoryID()
  303. if errper != nil {
  304. fmt.Println("GetTacticsList Err:", errper.Error())
  305. return
  306. }
  307. summaryMap := make(map[int]int)
  308. for _, vSum := range listSummary {
  309. vSumInt, _ := strconv.Atoi(vSum)
  310. summaryMap[vSumInt] = 1
  311. }
  312. for k, v := range list {
  313. //同步匹配类型
  314. matchTypeName := ""
  315. for _, vMatch := range listGetMatchTypeName {
  316. if v.CategoryId == vMatch.CategoryId {
  317. matchTypeName = vMatch.MatchTypeName
  318. }
  319. }
  320. //是否属于纪要库的数据
  321. if _, has := summaryMap[v.CategoryId]; has {
  322. v.IsSummary = 1
  323. }
  324. //for _, vSum := range listSummary {
  325. // vSumInt, _ := strconv.Atoi(vSum)
  326. // if v.CategoryId == vSumInt {
  327. // v.IsSummary = 1
  328. // }
  329. //}
  330. //排除不属于纪要库类型的文章
  331. for _, vArt := range listNoSummaryArticleIds {
  332. vArtInt, _ := strconv.Atoi(vArt)
  333. if v.ArticleId == vArtInt {
  334. v.IsSummary = 0
  335. }
  336. }
  337. for _, vPer := range listPermission {
  338. if v.CategoryId == vPer.CategoryId {
  339. v.IsReport = 1
  340. }
  341. }
  342. if v.IsReport > 0 {
  343. //是否属于策略 策略自动归类
  344. //是否属于行业报告 行业报告自动归类
  345. if v.CategoryId == 7 || v.CategoryId == 9 || v.CategoryId == 11 || v.CategoryId == 51 || v.CategoryId == 52 || v.CategoryId == 64 || v.CategoryId == 80 || v.CategoryId == 87 {
  346. v.IsClass = 1
  347. v.ReportType = 1 //是否属于行业报告
  348. } else {
  349. v.ReportType = 2 //是否属于产业报告
  350. }
  351. }
  352. v.Department = "弘则权益研究"
  353. //fmt.Println(k, v.ArticleId)
  354. hh, _ := time.ParseDuration("8h")
  355. //pDate := publishDate.Add(hh)
  356. v.PublishDate = v.PublishDate.Add(hh)
  357. //判断是否已经存在
  358. if v.ArticleId < 0 {
  359. fmt.Println("AddCygxArticle Err:")
  360. return err
  361. }
  362. count, err := models.GetArticleCountById(v.ArticleId)
  363. if err != nil && err.Error() != utils.ErrNoRow() {
  364. fmt.Println("AddCygxArticle Err:", err.Error())
  365. return err
  366. }
  367. v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
  368. expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn := BodyAnalysis2(v.Body)
  369. if strings.Index(v.Body, "报告全文(") > 0 && strings.Index(v.Body, "PDF格式报告下载.pdf") > 0 {
  370. v.Body = strings.Replace(v.Body, "报告全文(", "", -1)
  371. v.Body = strings.Replace(v.Body, "PDF格式报告下载.pdf", "", -1)
  372. v.Body = strings.Replace(v.Body, "):", "", -1)
  373. }
  374. //fmt.Println(fileLink)
  375. var titleNew string
  376. titleNew = v.Title
  377. // 7资金流向 、11大类资产 、51每日复盘 、80医药周报、9估值研究
  378. if v.CategoryId == 7 || v.CategoryId == 11 || v.CategoryId == 51 || v.CategoryId == 9 {
  379. if v.UpdateFrequency == "daily" {
  380. var daystr string
  381. daystr = strconv.Itoa(v.PublishDate.Day())
  382. if len(daystr) == 1 {
  383. daystr = "0" + daystr
  384. }
  385. titleNew = v.Title + "(" + strconv.Itoa(v.PublishDate.Year())[2:len(strconv.Itoa(v.PublishDate.Year()))-0] + v.PublishDate.Format("01") + daystr + ")"
  386. } else if v.UpdateFrequency == "weekly" {
  387. titleNew = v.Title + utils.WeekByDate(v.PublishDate)
  388. }
  389. }
  390. if v.CategoryId == 80 {
  391. titleNew = v.Title + utils.WeekByDate(v.PublishDate)
  392. }
  393. //fmt.Println(k)
  394. //fmt.Println(expertContentStr)
  395. if count > 0 {
  396. fmt.Println(k, v.ArticleId, "edit")
  397. var isCustom bool
  398. bodyText, _ := GetReportContentTextSub(v.Body)
  399. updateParams := make(map[string]interface{})
  400. //updateParams["Title"] = v.Title
  401. updateParams["Title"] = titleNew
  402. updateParams["TitleEn"] = v.TitleEn
  403. updateParams["UpdateFrequency"] = v.UpdateFrequency
  404. updateParams["CreateDate"] = v.CreateDate
  405. updateParams["PublishDate"] = v.PublishDate
  406. //updateParams["Body"] = html.EscapeString(v.Body)
  407. updateParams["Body"] = html.EscapeString(bodyReturn)
  408. updateParams["BodyText"] = bodyText
  409. updateParams["Abstract"] = html.EscapeString(v.Abstract)
  410. updateParams["CategoryName"] = v.CategoryName
  411. for _, vCustom := range listCustomArticle {
  412. if v.ArticleId == vCustom.ArticleId {
  413. fmt.Println("手动归类的文章:" + strconv.Itoa(v.ArticleId))
  414. isCustom = true
  415. }
  416. }
  417. if isCustom == false {
  418. updateParams["CategoryId"] = v.CategoryId
  419. updateParams["MatchTypeName"] = matchTypeName
  420. updateParams["IsSummary"] = v.IsSummary
  421. updateParams["IsReport"] = v.IsReport
  422. updateParams["ReportType"] = v.ReportType
  423. updateParams["SubCategoryName"] = v.SubCategoryName
  424. }
  425. //updateParams["CategoryId"] = v.CategoryId
  426. updateParams["PublishStatus"] = v.PublishStatus
  427. updateParams["ExpertBackground"] = expertContentStr
  428. updateParams["ExpertNumber"] = expertNumStr
  429. updateParams["InterviewDate"] = interviewDateStr
  430. //updateParams["IsClass"] = v.IsClass
  431. if v.Department != "弘则权益研究" {
  432. v.Department = "弘则权益研究"
  433. }
  434. updateParams["Department"] = v.Department
  435. updateParams["FileLink"] = fileLink
  436. whereParam := map[string]interface{}{"article_id": v.ArticleId}
  437. err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
  438. if err != nil {
  439. fmt.Println("UpdateByExpr Err:" + err.Error())
  440. }
  441. } else {
  442. fmt.Println(k, v.ArticleId, "add")
  443. item := new(models.CygxArticle)
  444. articleIdInt := v.ArticleId
  445. item.ArticleId = articleIdInt
  446. //item.Title = v.Title
  447. item.Title = titleNew
  448. item.TitleEn = v.TitleEn
  449. item.UpdateFrequency = v.UpdateFrequency
  450. item.CreateDate = v.CreateDate
  451. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  452. //item.Body = html.EscapeString(v.Body)
  453. item.Body = html.EscapeString(bodyReturn)
  454. item.Abstract = html.EscapeString(v.Abstract)
  455. item.CategoryName = v.CategoryName
  456. item.SubCategoryName = v.SubCategoryName
  457. item.CategoryId = v.CategoryId
  458. item.PublishStatus = v.PublishStatus
  459. item.ExpertBackground = expertContentStr
  460. item.ExpertNumber = expertNumStr
  461. item.InterviewDate = interviewDateStr
  462. item.Department = v.Department
  463. item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt))
  464. item.IsClass = v.IsClass
  465. item.IsSummary = v.IsSummary
  466. item.IsReport = v.IsReport
  467. item.ReportType = v.ReportType
  468. item.FileLink = fileLink
  469. item.MatchTypeName = matchTypeName
  470. _, err = models.AddCygxArticles(item)
  471. if err != nil {
  472. fmt.Println("AddCygxArticle Err:", err.Error())
  473. return err
  474. }
  475. }
  476. ////纪要库的数据同步到Es
  477. //if v.IsSummary == 1 {
  478. // content := html.UnescapeString(v.Body)
  479. // doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  480. // if err != nil {
  481. // fmt.Println("AddCygxArticle Err:", err.Error())
  482. // return err
  483. // }
  484. // doc.Find("a").Each(func(i int, a *goquery.Selection) {
  485. // a.Remove()
  486. // })
  487. // bodyText := doc.Text()
  488. // item := new(ElasticTestArticleDetail)
  489. // item.ArticleId = v.ArticleId
  490. // item.Title = v.Title
  491. // item.BodyText = bodyText
  492. // item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  493. // EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  494. //}
  495. }
  496. return
  497. }
  498. // 同步策略到Es
  499. func SyncTacticsListToEs() (err error) {
  500. defer func() {
  501. if err != nil {
  502. fmt.Println("同步失败,Err:", err.Error())
  503. }
  504. }()
  505. fmt.Println("同步数据到Es")
  506. indexName := utils.IndexName
  507. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  508. list, err := models.GetTacticsList(endDate)
  509. //list, err := models.GetTacticsListAll()
  510. if err != nil {
  511. fmt.Println("GetTacticsList Err:", err.Error())
  512. return
  513. }
  514. fmt.Println("list len:", len(list))
  515. for k, v := range list {
  516. //是否属于纪要库的数据
  517. v.Department = "弘则权益研究"
  518. fmt.Println(k, v.ArticleId)
  519. hh, _ := time.ParseDuration("8h")
  520. //pDate := publishDate.Add(hh)
  521. v.PublishDate = v.PublishDate.Add(hh)
  522. //判断是否已经存在
  523. if v.ArticleId < 0 {
  524. fmt.Println("AddCygxArticle Err:")
  525. return err
  526. }
  527. //纪要库的数据同步到Es
  528. content := html.UnescapeString(v.Body)
  529. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  530. if err != nil {
  531. fmt.Println("AddCygxArticle Err:", err.Error())
  532. return err
  533. }
  534. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  535. a.Remove()
  536. })
  537. bodyText := doc.Text()
  538. item := new(ElasticTestArticleDetail)
  539. item.ArticleId = v.ArticleId
  540. item.Title = v.Title
  541. item.BodyText = bodyText
  542. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  543. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  544. }
  545. return
  546. }
  547. // body 解析
  548. func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn string) {
  549. body = html.UnescapeString(body)
  550. //fmt.Println(body)
  551. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  552. if err != nil {
  553. fmt.Println("create doc err:", err.Error())
  554. return
  555. }
  556. var expertNumArr []string
  557. var expertContentArr []string
  558. var interviewDateArr []string
  559. //var fileLink string
  560. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  561. contentTxt := s.Text()
  562. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  563. interviewDate := s.Next().Text()
  564. interviewDateArr = append(interviewDateArr, interviewDate)
  565. }
  566. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  567. expertContent := s.Next().Text()
  568. if expertContent == "" {
  569. expertContent = contentTxt
  570. }
  571. if expertContent != "" {
  572. rightIndex := strings.Index(expertContent, ")")
  573. if rightIndex == 0 {
  574. rightIndex = strings.Index(expertContent, ")")
  575. }
  576. if rightIndex > 0 {
  577. expertNum := expertContent[:rightIndex]
  578. expertNum = strings.Replace(expertNum, "(", "", -1)
  579. expertNum = strings.Replace(expertNum, "(", "", -1)
  580. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  581. if expertNum != "" {
  582. expertNumArr = append(expertNumArr, expertNum)
  583. rightIndex = rightIndex
  584. expertContentStr := expertContent[rightIndex:]
  585. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  586. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  587. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  588. expertContentArr = append(expertContentArr, expertContentStr)
  589. }
  590. }
  591. }
  592. }
  593. })
  594. if len(expertContentArr) == 0 {
  595. doc.Find("pre").Each(func(i int, s *goquery.Selection) {
  596. contentTxt := s.Text()
  597. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  598. interviewDate := s.Next().Text()
  599. if interviewDate != "" {
  600. interviewDateArr = append(interviewDateArr, interviewDate)
  601. }
  602. }
  603. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  604. expertContent := s.Next().Text()
  605. if expertContent == "" {
  606. expertContent = contentTxt
  607. }
  608. if expertContent != "" {
  609. rightIndex := strings.Index(expertContent, ")")
  610. if rightIndex == 0 {
  611. rightIndex = strings.Index(expertContent, ")")
  612. }
  613. expertNum := expertContent[:rightIndex]
  614. expertNum = strings.Replace(expertNum, "(", "", -1)
  615. expertNum = strings.Replace(expertNum, "(", "", -1)
  616. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  617. if expertNum != "" {
  618. expertNumArr = append(expertNumArr, expertNum)
  619. rightIndex = rightIndex
  620. expertContentStr := expertContent[rightIndex:]
  621. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  622. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  623. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  624. if expertContentStr != "" {
  625. expertContentArr = append(expertContentArr, expertContentStr)
  626. }
  627. }
  628. }
  629. }
  630. })
  631. }
  632. if len(expertNumArr) > 0 {
  633. expertNumStr = expertNumArr[0]
  634. if expertNumStr != "" {
  635. expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
  636. expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
  637. expertNumStr = strings.Replace(expertNumStr, " ", "", -1)
  638. expertNumStr = strings.Replace(expertNumStr, "\n", "", -1)
  639. expertNumStr = strings.Trim(expertNumStr, "")
  640. }
  641. }
  642. if len(expertContentArr) > 0 {
  643. expertContentStr = expertContentArr[0]
  644. }
  645. //当处理过之后的专家背景长度大于600的时候,说明他的格式跟之前的不一样,还要做二次处理 600 是一个约值,先运行看看
  646. if len(expertContentStr) > 600 {
  647. strnum := strings.Index(expertContentStr, "#专家评价:")
  648. content := expertContentStr[strnum:len(expertContentStr)]
  649. strnum2 := strings.Index(content, "(")
  650. content = content[strnum2+9 : len(content)] //中文括号3位 专家编号6位
  651. expertContentStr = content
  652. }
  653. //if strings.Index(body, "报告全文(") > 0 && strings.Index(body, "PDF格式报告下载.pdf") > 0 {
  654. // numStar := strings.Index(body, "http")
  655. // numEnd := strings.Index(body, ".pdf")
  656. // fmt.Println("获取PDF链接")
  657. // fileLink = body[numStar : numEnd+4]
  658. //}
  659. var hrefRegexp = regexp.MustCompile("(?m)<a.*?[^<]>.*?</a>")
  660. match := hrefRegexp.FindAllString(body, -1)
  661. if match != nil {
  662. for _, v := range match {
  663. //if k == 0 && strings.Index(v, ".pdf") > 0 {
  664. // numStar := strings.Index(v, "http")
  665. // numEnd := strings.Index(v, ".pdf")
  666. // fileLink = v[numStar : numEnd+4]
  667. //}
  668. //处理a标签中的PDF
  669. numStarAcount := strings.Index(v, "<a")
  670. numEndAcount := strings.Index(v, "<img")
  671. if numStarAcount < numEndAcount && strings.Index(fileLink, ".pdf") > 0 {
  672. Acount := v[numStarAcount:numEndAcount]
  673. if Acount != "" {
  674. body = strings.Replace(body, Acount, "", -1)
  675. }
  676. }
  677. }
  678. if !strings.HasPrefix(fileLink, "https") && len(fileLink) > 0 {
  679. fileLink = "https" + fileLink[4:len(fileLink)]
  680. }
  681. fileLink = strings.Replace(fileLink, "https://vmp.hzinsights.com/article/pdfviewer/?file=", "", -1)
  682. fileLink = strings.Replace(fileLink, "http://vmp.hzinsights.com/article/pdfviewer/?file=", "", -1)
  683. body = strings.Replace(body, "完整报告请点击链接:", "", -1)
  684. body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
  685. body = strings.Replace(body, "报告全文():", "", -1)
  686. }
  687. bodyReturn = body
  688. if len(interviewDateArr) > 0 {
  689. interviewDateStr = interviewDateArr[0]
  690. }
  691. return
  692. }
  693. //func init() {
  694. // GetSummarytoEs(7720)
  695. //}
  696. func GetSummarytoEs(articleId int) (err error) {
  697. defer func() {
  698. if err != nil {
  699. fmt.Println("同步ES记录失败" + err.Error())
  700. go utils.SendAlarmMsg("同步ES记录失败"+err.Error(), 2)
  701. }
  702. }()
  703. //endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  704. v, err := models.GetArticleDetailTestById(articleId)
  705. //allList, err := models.GetArticleAllDate(endDate)
  706. if err != nil && err.Error() != utils.ErrNoRow() {
  707. fmt.Println("GetArticleAll Err:", err.Error())
  708. return
  709. }
  710. if v == nil {
  711. return
  712. }
  713. if v.IsSummary != 1 {
  714. return
  715. }
  716. indexName := utils.IndexName
  717. //for _, v := range allList {
  718. content := html.UnescapeString(v.Body)
  719. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  720. if err != nil {
  721. fmt.Println("create doc err:", err.Error())
  722. return err
  723. }
  724. bodyText := doc.Text()
  725. item := new(ElasticTestArticleDetail)
  726. item.ArticleId = v.ArticleId
  727. item.Title = v.Title
  728. item.PublishDate = v.PublishDate
  729. bodyText, _ = GetReportContentTextSubNew(v.Body)
  730. item.BodyText = bodyText
  731. item.CategoryId = strconv.Itoa(v.CategoryId)
  732. item.ExpertBackground = v.ExpertBackground
  733. item.Annotation, _ = GetReportContentTextSubNew(v.Annotation)
  734. item.Abstract, _ = GetReportContentTextSubNew(v.Abstract)
  735. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  736. fmt.Println(v.ArticleId)
  737. //}
  738. return
  739. }