tactics.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. package services
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "hongze/hongze_cygx/models"
  6. "hongze/hongze_cygx/utils"
  7. "html"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. )
  13. //同步策略文章
  14. func SyncTacticsList() (err error) {
  15. defer func() {
  16. if err != nil {
  17. fmt.Println("同步失败,Err:", err.Error())
  18. }
  19. }()
  20. fmt.Println("同步数据")
  21. indexName := utils.IndexName
  22. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  23. list, err := models.GetTacticsList2(endDate)
  24. //list, err := models.GetTacticsListAll()
  25. if err != nil {
  26. fmt.Println("GetTacticsList Err:", err.Error())
  27. return
  28. }
  29. fmt.Println("list len:", len(list))
  30. for k, v := range list {
  31. v.Department = "弘则权益研究"
  32. fmt.Println(k, v.ArticleId)
  33. //
  34. //publishDate, err := time.Parse(utils.FormatDateTime, v.PublishDate)
  35. //if err != nil {
  36. // fmt.Println("time.Parse:", err.Error())
  37. // return err
  38. //}
  39. //fmt.Println(publishDate)
  40. hh, _ := time.ParseDuration("8h")
  41. //pDate := publishDate.Add(hh)
  42. v.PublishDate = v.PublishDate.Add(hh)
  43. //判断是否已经存在
  44. if v.ArticleId < 0 {
  45. fmt.Println("参数错误")
  46. return err
  47. }
  48. count, err := models.GetArticleCountById(v.ArticleId)
  49. if err != nil && err.Error() != utils.ErrNoRow() {
  50. return err
  51. }
  52. v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
  53. expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
  54. if count > 0 {
  55. bodyText, _ := GetReportContentTextSub(v.Body)
  56. updateParams := make(map[string]interface{})
  57. updateParams["Title"] = v.Title
  58. updateParams["TitleEn"] = v.TitleEn
  59. updateParams["UpdateFrequency"] = v.UpdateFrequency
  60. updateParams["CreateDate"] = v.CreateDate
  61. updateParams["PublishDate"] = v.PublishDate
  62. updateParams["Body"] = html.EscapeString(v.Body)
  63. updateParams["BodyText"] = bodyText
  64. updateParams["Abstract"] = html.EscapeString(v.Abstract)
  65. updateParams["CategoryName"] = v.CategoryName
  66. updateParams["SubCategoryName"] = v.SubCategoryName
  67. updateParams["CategoryId"] = v.CategoryId
  68. updateParams["PublishStatus"] = v.PublishStatus
  69. updateParams["ExpertBackground"] = expertContentStr
  70. updateParams["ExpertNumber"] = expertNumStr
  71. updateParams["InterviewDate"] = interviewDateStr
  72. if v.Department != "弘则权益研究" {
  73. v.Department = "弘则权益研究"
  74. }
  75. updateParams["Department"] = v.Department
  76. whereParam := map[string]interface{}{"article_id": v.ArticleId}
  77. err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
  78. if err != nil {
  79. fmt.Println("UpdateByExpr Err:" + err.Error())
  80. }
  81. } else {
  82. fmt.Println(k, v.ArticleId, "add")
  83. item := new(models.CygxArticle)
  84. articleIdInt := v.ArticleId
  85. item.ArticleId = articleIdInt
  86. item.Title = v.Title
  87. item.TitleEn = v.TitleEn
  88. item.UpdateFrequency = v.UpdateFrequency
  89. item.CreateDate = v.CreateDate
  90. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  91. item.Body = html.EscapeString(v.Body)
  92. item.Abstract = html.EscapeString(v.Abstract)
  93. item.CategoryName = v.CategoryName
  94. item.SubCategoryName = v.SubCategoryName
  95. item.CategoryId = v.CategoryId
  96. item.PublishStatus = v.PublishStatus
  97. item.ExpertBackground = expertContentStr
  98. item.ExpertNumber = expertNumStr
  99. item.InterviewDate = interviewDateStr
  100. item.Department = v.Department
  101. item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt))
  102. _, err = models.AddCygxArticle(item)
  103. if err != nil {
  104. fmt.Println("AddCygxArticle Err:", err.Error())
  105. return err
  106. }
  107. }
  108. content := html.UnescapeString(v.Body)
  109. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  110. if err != nil {
  111. fmt.Println("create doc err:", err.Error())
  112. return err
  113. }
  114. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  115. a.Remove()
  116. })
  117. bodyText := doc.Text()
  118. item := new(ElasticTestArticleDetail)
  119. item.ArticleId = v.ArticleId
  120. item.Title = v.Title
  121. item.BodyText = bodyText
  122. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  123. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  124. }
  125. return
  126. }
  127. //同步策略文章
  128. func SyncCygxArticleList() (err error) {
  129. defer func() {
  130. if err != nil {
  131. fmt.Println("同步失败,Err:", err.Error())
  132. }
  133. }()
  134. fmt.Println("同步数据")
  135. indexName := utils.IndexName
  136. fmt.Println("indexName:", indexName)
  137. time.Sleep(5 * time.Second)
  138. list, err := models.GetCygxArticleListAll()
  139. if err != nil {
  140. fmt.Println("GetTacticsList Err:", err.Error())
  141. return
  142. }
  143. fmt.Println("list len:", len(list))
  144. for k, v := range list {
  145. v.Department = "弘则权益研究"
  146. fmt.Println(k, v.ArticleId)
  147. //判断是否已经存在
  148. if v.ArticleId < 0 {
  149. fmt.Println("参数错误")
  150. return err
  151. }
  152. content := html.UnescapeString(v.Body)
  153. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  154. if err != nil {
  155. fmt.Println("create doc err:", err.Error())
  156. return err
  157. }
  158. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  159. a.Remove()
  160. })
  161. bodyText := doc.Text()
  162. item := new(ElasticTestArticleDetail)
  163. item.ArticleId = v.ArticleId
  164. item.Title = v.Title
  165. item.BodyText = bodyText
  166. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  167. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  168. }
  169. return
  170. }
  171. //body 解析
  172. func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr string) {
  173. body = html.UnescapeString(body)
  174. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  175. if err != nil {
  176. fmt.Println("create doc err:", err.Error())
  177. return
  178. }
  179. var expertNumArr []string
  180. var expertContentArr []string
  181. var interviewDateArr []string
  182. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  183. contentTxt := s.Text()
  184. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  185. interviewDate := s.Next().Text()
  186. interviewDateArr = append(interviewDateArr, interviewDate)
  187. }
  188. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  189. expertContent := s.Next().Text()
  190. if expertContent == "" {
  191. expertContent = contentTxt
  192. }
  193. if expertContent != "" {
  194. rightIndex := strings.Index(expertContent, ")")
  195. if rightIndex == 0 {
  196. rightIndex = strings.Index(expertContent, ")")
  197. }
  198. if rightIndex > 0 {
  199. expertNum := expertContent[:rightIndex]
  200. expertNum = strings.Replace(expertNum, "(", "", -1)
  201. expertNum = strings.Replace(expertNum, "(", "", -1)
  202. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  203. if expertNum != "" {
  204. expertNumArr = append(expertNumArr, expertNum)
  205. rightIndex = rightIndex
  206. expertContentStr := expertContent[rightIndex:]
  207. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  208. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  209. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  210. expertContentArr = append(expertContentArr, expertContentStr)
  211. }
  212. }
  213. }
  214. }
  215. })
  216. if len(expertContentArr) == 0 {
  217. doc.Find("pre").Each(func(i int, s *goquery.Selection) {
  218. contentTxt := s.Text()
  219. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  220. interviewDate := s.Next().Text()
  221. if interviewDate != "" {
  222. interviewDateArr = append(interviewDateArr, interviewDate)
  223. }
  224. }
  225. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  226. expertContent := s.Next().Text()
  227. if expertContent == "" {
  228. expertContent = contentTxt
  229. }
  230. if expertContent != "" {
  231. rightIndex := strings.Index(expertContent, ")")
  232. if rightIndex == 0 {
  233. rightIndex = strings.Index(expertContent, ")")
  234. }
  235. expertNum := expertContent[:rightIndex]
  236. expertNum = strings.Replace(expertNum, "(", "", -1)
  237. expertNum = strings.Replace(expertNum, "(", "", -1)
  238. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  239. if expertNum != "" {
  240. expertNumArr = append(expertNumArr, expertNum)
  241. rightIndex = rightIndex
  242. expertContentStr := expertContent[rightIndex:]
  243. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  244. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  245. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  246. if expertContentStr != "" {
  247. expertContentArr = append(expertContentArr, expertContentStr)
  248. }
  249. }
  250. }
  251. }
  252. })
  253. }
  254. if len(expertNumArr) > 0 {
  255. expertNumStr = expertNumArr[0]
  256. if expertNumStr != "" {
  257. expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
  258. expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
  259. expertNumStr = strings.Trim(expertNumStr, "")
  260. }
  261. }
  262. if len(expertContentArr) > 0 {
  263. expertContentStr = expertContentArr[0]
  264. }
  265. if len(interviewDateArr) > 0 {
  266. interviewDateStr = interviewDateArr[0]
  267. }
  268. return
  269. }
  270. //同步策略,报告文章
  271. func SyncTacticsListAddreport() (err error) {
  272. defer func() {
  273. if err != nil {
  274. fmt.Println("同步失败,Err:", err.Error())
  275. }
  276. }()
  277. fmt.Println("同步数据")
  278. //indexName := utils.IndexName
  279. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  280. list, err := models.GetTacticsList2(endDate)
  281. //list, err := models.GetTacticsListAll2()
  282. if err != nil {
  283. fmt.Println("GetTacticsList Err:", err.Error())
  284. return
  285. }
  286. listCustomArticle, err := models.GetCustomArticleId() //手动归类的文章,不替换文章类型
  287. if err != nil {
  288. fmt.Println("GetTacticsList Err:", err.Error())
  289. return
  290. }
  291. listGetMatchTypeName, errMatch := models.GetMatchTypeNamenNotNull() //手动归类的文章,不替换文章类型
  292. if errMatch != nil {
  293. fmt.Println("GetTacticsList Err:", errMatch.Error())
  294. return
  295. }
  296. fmt.Println("list len:", len(list))
  297. summaryCategoryIds := "28,32,45,50,57,62,72,74,79,84,86,88,90,93,95,96" //纪要库的文章类型categoty_id
  298. listSummary := strings.Split(summaryCategoryIds, ",")
  299. noSummaryArticleIds := "3454,3456,3457,3459,2449,2450,2453,2454,2459,2530,2583,2663,2670,2699,2715,2732,2748,2759,2399,2356,2870,3173,2978,2826,3470" //非纪要库类型的文章ID
  300. listNoSummaryArticleIds := strings.Split(noSummaryArticleIds, ",")
  301. listPermission, errper := models.GetPermissionMappingCategoryID()
  302. if errper != nil {
  303. fmt.Println("GetTacticsList Err:", errper.Error())
  304. return
  305. }
  306. summaryMap := make(map[int]int)
  307. for _, vSum := range listSummary {
  308. vSumInt, _ := strconv.Atoi(vSum)
  309. summaryMap[vSumInt] = 1
  310. }
  311. for k, v := range list {
  312. //同步匹配类型
  313. matchTypeName := ""
  314. for _, vMatch := range listGetMatchTypeName {
  315. if v.CategoryId == vMatch.CategoryId {
  316. matchTypeName = vMatch.MatchTypeName
  317. }
  318. }
  319. //是否属于纪要库的数据
  320. if _, has := summaryMap[v.CategoryId]; has {
  321. v.IsSummary = 1
  322. }
  323. //for _, vSum := range listSummary {
  324. // vSumInt, _ := strconv.Atoi(vSum)
  325. // if v.CategoryId == vSumInt {
  326. // v.IsSummary = 1
  327. // }
  328. //}
  329. //排除不属于纪要库类型的文章
  330. for _, vArt := range listNoSummaryArticleIds {
  331. vArtInt, _ := strconv.Atoi(vArt)
  332. if v.ArticleId == vArtInt {
  333. v.IsSummary = 0
  334. }
  335. }
  336. for _, vPer := range listPermission {
  337. if v.CategoryId == vPer.CategoryId {
  338. v.IsReport = 1
  339. }
  340. }
  341. if v.IsReport > 0 {
  342. //是否属于策略 策略自动归类
  343. //是否属于行业报告 行业报告自动归类
  344. if v.CategoryId == 7 || v.CategoryId == 9 || v.CategoryId == 11 || v.CategoryId == 51 || v.CategoryId == 52 || v.CategoryId == 64 || v.CategoryId == 80 || v.CategoryId == 87 {
  345. v.IsClass = 1
  346. v.ReportType = 1 //是否属于行业报告
  347. } else {
  348. v.ReportType = 2 //是否属于产业报告
  349. }
  350. }
  351. v.Department = "弘则权益研究"
  352. //fmt.Println(k, v.ArticleId)
  353. hh, _ := time.ParseDuration("8h")
  354. //pDate := publishDate.Add(hh)
  355. v.PublishDate = v.PublishDate.Add(hh)
  356. //判断是否已经存在
  357. if v.ArticleId < 0 {
  358. fmt.Println("AddCygxArticle Err:")
  359. return err
  360. }
  361. count, err := models.GetArticleCountById(v.ArticleId)
  362. if err != nil && err.Error() != utils.ErrNoRow() {
  363. fmt.Println("AddCygxArticle Err:", err.Error())
  364. return err
  365. }
  366. v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
  367. expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn := BodyAnalysis2(v.Body)
  368. if strings.Index(v.Body, "报告全文(") > 0 && strings.Index(v.Body, "PDF格式报告下载.pdf") > 0 {
  369. v.Body = strings.Replace(v.Body, "报告全文(", "", -1)
  370. v.Body = strings.Replace(v.Body, "PDF格式报告下载.pdf", "", -1)
  371. v.Body = strings.Replace(v.Body, "):", "", -1)
  372. }
  373. //fmt.Println(fileLink)
  374. var titleNew string
  375. titleNew = v.Title
  376. // 7资金流向 、11大类资产 、51每日复盘 、80医药周报、9估值研究
  377. if v.CategoryId == 7 || v.CategoryId == 11 || v.CategoryId == 51 || v.CategoryId == 9 {
  378. if v.UpdateFrequency == "daily" {
  379. var daystr string
  380. daystr = strconv.Itoa(v.PublishDate.Day())
  381. if len(daystr) == 1 {
  382. daystr = "0" + daystr
  383. }
  384. titleNew = v.Title + "(" + strconv.Itoa(v.PublishDate.Year())[2:len(strconv.Itoa(v.PublishDate.Year()))-0] + v.PublishDate.Format("01") + daystr + ")"
  385. } else if v.UpdateFrequency == "weekly" {
  386. titleNew = v.Title + utils.WeekByDate(v.PublishDate)
  387. }
  388. }
  389. if v.CategoryId == 80 {
  390. titleNew = v.Title + utils.WeekByDate(v.PublishDate)
  391. }
  392. //fmt.Println(k)
  393. //fmt.Println(expertContentStr)
  394. if count > 0 {
  395. fmt.Println(k, v.ArticleId, "edit")
  396. var isCustom bool
  397. bodyText, _ := GetReportContentTextSub(v.Body)
  398. updateParams := make(map[string]interface{})
  399. //updateParams["Title"] = v.Title
  400. updateParams["Title"] = titleNew
  401. updateParams["TitleEn"] = v.TitleEn
  402. updateParams["UpdateFrequency"] = v.UpdateFrequency
  403. updateParams["CreateDate"] = v.CreateDate
  404. updateParams["PublishDate"] = v.PublishDate
  405. //updateParams["Body"] = html.EscapeString(v.Body)
  406. updateParams["Body"] = html.EscapeString(bodyReturn)
  407. updateParams["BodyText"] = bodyText
  408. updateParams["Abstract"] = html.EscapeString(v.Abstract)
  409. updateParams["CategoryName"] = v.CategoryName
  410. updateParams["SubCategoryName"] = v.SubCategoryName
  411. for _, vCustom := range listCustomArticle {
  412. if v.ArticleId == vCustom.ArticleId {
  413. fmt.Println("手动归类的文章:" + strconv.Itoa(v.ArticleId))
  414. isCustom = true
  415. }
  416. }
  417. if isCustom == false {
  418. updateParams["CategoryId"] = v.CategoryId
  419. updateParams["MatchTypeName"] = matchTypeName
  420. }
  421. //updateParams["CategoryId"] = v.CategoryId
  422. updateParams["PublishStatus"] = v.PublishStatus
  423. updateParams["ExpertBackground"] = expertContentStr
  424. updateParams["ExpertNumber"] = expertNumStr
  425. updateParams["InterviewDate"] = interviewDateStr
  426. //updateParams["IsClass"] = v.IsClass
  427. updateParams["IsSummary"] = v.IsSummary
  428. updateParams["IsReport"] = v.IsReport
  429. updateParams["ReportType"] = v.ReportType
  430. if v.Department != "弘则权益研究" {
  431. v.Department = "弘则权益研究"
  432. }
  433. updateParams["Department"] = v.Department
  434. updateParams["FileLink"] = fileLink
  435. whereParam := map[string]interface{}{"article_id": v.ArticleId}
  436. err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
  437. if err != nil {
  438. fmt.Println("UpdateByExpr Err:" + err.Error())
  439. }
  440. } else {
  441. fmt.Println(k, v.ArticleId, "add")
  442. item := new(models.CygxArticle)
  443. articleIdInt := v.ArticleId
  444. item.ArticleId = articleIdInt
  445. //item.Title = v.Title
  446. item.Title = titleNew
  447. item.TitleEn = v.TitleEn
  448. item.UpdateFrequency = v.UpdateFrequency
  449. item.CreateDate = v.CreateDate
  450. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  451. //item.Body = html.EscapeString(v.Body)
  452. item.Body = html.EscapeString(bodyReturn)
  453. item.Abstract = html.EscapeString(v.Abstract)
  454. item.CategoryName = v.CategoryName
  455. item.SubCategoryName = v.SubCategoryName
  456. item.CategoryId = v.CategoryId
  457. item.PublishStatus = v.PublishStatus
  458. item.ExpertBackground = expertContentStr
  459. item.ExpertNumber = expertNumStr
  460. item.InterviewDate = interviewDateStr
  461. item.Department = v.Department
  462. item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt))
  463. item.IsClass = v.IsClass
  464. item.IsSummary = v.IsSummary
  465. item.IsReport = v.IsReport
  466. item.ReportType = v.ReportType
  467. item.FileLink = fileLink
  468. item.MatchTypeName = matchTypeName
  469. _, err = models.AddCygxArticles(item)
  470. if err != nil {
  471. fmt.Println("AddCygxArticle Err:", err.Error())
  472. return err
  473. }
  474. }
  475. ////纪要库的数据同步到Es
  476. //if v.IsSummary == 1 {
  477. // content := html.UnescapeString(v.Body)
  478. // doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  479. // if err != nil {
  480. // fmt.Println("AddCygxArticle Err:", err.Error())
  481. // return err
  482. // }
  483. // doc.Find("a").Each(func(i int, a *goquery.Selection) {
  484. // a.Remove()
  485. // })
  486. // bodyText := doc.Text()
  487. // item := new(ElasticTestArticleDetail)
  488. // item.ArticleId = v.ArticleId
  489. // item.Title = v.Title
  490. // item.BodyText = bodyText
  491. // item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  492. // EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  493. //}
  494. }
  495. return
  496. }
  497. //同步策略到Es
  498. func SyncTacticsListToEs() (err error) {
  499. defer func() {
  500. if err != nil {
  501. fmt.Println("同步失败,Err:", err.Error())
  502. }
  503. }()
  504. fmt.Println("同步数据到Es")
  505. indexName := utils.IndexName
  506. endDate := time.Now().AddDate(0, 0, -30).Format(utils.FormatDate)
  507. list, err := models.GetTacticsList(endDate)
  508. //list, err := models.GetTacticsListAll()
  509. if err != nil {
  510. fmt.Println("GetTacticsList Err:", err.Error())
  511. return
  512. }
  513. fmt.Println("list len:", len(list))
  514. for k, v := range list {
  515. //是否属于纪要库的数据
  516. v.Department = "弘则权益研究"
  517. fmt.Println(k, v.ArticleId)
  518. hh, _ := time.ParseDuration("8h")
  519. //pDate := publishDate.Add(hh)
  520. v.PublishDate = v.PublishDate.Add(hh)
  521. //判断是否已经存在
  522. if v.ArticleId < 0 {
  523. fmt.Println("AddCygxArticle Err:")
  524. return err
  525. }
  526. //纪要库的数据同步到Es
  527. content := html.UnescapeString(v.Body)
  528. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  529. if err != nil {
  530. fmt.Println("AddCygxArticle Err:", err.Error())
  531. return err
  532. }
  533. doc.Find("a").Each(func(i int, a *goquery.Selection) {
  534. a.Remove()
  535. })
  536. bodyText := doc.Text()
  537. item := new(ElasticTestArticleDetail)
  538. item.ArticleId = v.ArticleId
  539. item.Title = v.Title
  540. item.BodyText = bodyText
  541. item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
  542. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  543. }
  544. return
  545. }
  546. //body 解析
  547. func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn string) {
  548. body = html.UnescapeString(body)
  549. //fmt.Println(body)
  550. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  551. if err != nil {
  552. fmt.Println("create doc err:", err.Error())
  553. return
  554. }
  555. var expertNumArr []string
  556. var expertContentArr []string
  557. var interviewDateArr []string
  558. //var fileLink string
  559. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  560. contentTxt := s.Text()
  561. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  562. interviewDate := s.Next().Text()
  563. interviewDateArr = append(interviewDateArr, interviewDate)
  564. }
  565. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  566. expertContent := s.Next().Text()
  567. if expertContent == "" {
  568. expertContent = contentTxt
  569. }
  570. if expertContent != "" {
  571. rightIndex := strings.Index(expertContent, ")")
  572. if rightIndex == 0 {
  573. rightIndex = strings.Index(expertContent, ")")
  574. }
  575. if rightIndex > 0 {
  576. expertNum := expertContent[:rightIndex]
  577. expertNum = strings.Replace(expertNum, "(", "", -1)
  578. expertNum = strings.Replace(expertNum, "(", "", -1)
  579. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  580. if expertNum != "" {
  581. expertNumArr = append(expertNumArr, expertNum)
  582. rightIndex = rightIndex
  583. expertContentStr := expertContent[rightIndex:]
  584. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  585. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  586. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  587. expertContentArr = append(expertContentArr, expertContentStr)
  588. }
  589. }
  590. }
  591. }
  592. })
  593. if len(expertContentArr) == 0 {
  594. doc.Find("pre").Each(func(i int, s *goquery.Selection) {
  595. contentTxt := s.Text()
  596. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  597. interviewDate := s.Next().Text()
  598. if interviewDate != "" {
  599. interviewDateArr = append(interviewDateArr, interviewDate)
  600. }
  601. }
  602. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  603. expertContent := s.Next().Text()
  604. if expertContent == "" {
  605. expertContent = contentTxt
  606. }
  607. if expertContent != "" {
  608. rightIndex := strings.Index(expertContent, ")")
  609. if rightIndex == 0 {
  610. rightIndex = strings.Index(expertContent, ")")
  611. }
  612. expertNum := expertContent[:rightIndex]
  613. expertNum = strings.Replace(expertNum, "(", "", -1)
  614. expertNum = strings.Replace(expertNum, "(", "", -1)
  615. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  616. if expertNum != "" {
  617. expertNumArr = append(expertNumArr, expertNum)
  618. rightIndex = rightIndex
  619. expertContentStr := expertContent[rightIndex:]
  620. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  621. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  622. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  623. if expertContentStr != "" {
  624. expertContentArr = append(expertContentArr, expertContentStr)
  625. }
  626. }
  627. }
  628. }
  629. })
  630. }
  631. if len(expertNumArr) > 0 {
  632. expertNumStr = expertNumArr[0]
  633. if expertNumStr != "" {
  634. expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
  635. expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
  636. expertNumStr = strings.Trim(expertNumStr, "")
  637. }
  638. }
  639. if len(expertContentArr) > 0 {
  640. expertContentStr = expertContentArr[0]
  641. }
  642. if len(expertContentStr) > 600 {
  643. strnum := strings.Index(expertContentStr, "#专家评价:")
  644. content := expertContentStr[strnum:len(expertContentStr)]
  645. strnum2 := strings.Index(content, "(")
  646. content = content[strnum2+9 : len(content)] //中文括号3位 专家编号6位
  647. expertContentStr = content
  648. }
  649. //if strings.Index(body, "报告全文(") > 0 && strings.Index(body, "PDF格式报告下载.pdf") > 0 {
  650. // numStar := strings.Index(body, "http")
  651. // numEnd := strings.Index(body, ".pdf")
  652. // fmt.Println("获取PDF链接")
  653. // fileLink = body[numStar : numEnd+4]
  654. //}
  655. var hrefRegexp = regexp.MustCompile("(?m)<a.*?[^<]>.*?</a>")
  656. match := hrefRegexp.FindAllString(body, -1)
  657. if match != nil {
  658. for k, v := range match {
  659. if k == 0 && strings.Index(v, ".pdf") > 0 {
  660. numStar := strings.Index(v, "http")
  661. numEnd := strings.Index(v, ".pdf")
  662. fileLink = v[numStar : numEnd+4]
  663. }
  664. if strings.Index(v, ".pdf") > 0 {
  665. body = strings.Replace(body, v, "", -1)
  666. }
  667. }
  668. if !strings.HasPrefix(fileLink, "https") && len(fileLink) > 0 {
  669. fileLink = "https" + fileLink[4:len(fileLink)]
  670. }
  671. body = strings.Replace(body, "完整报告请点击链接:", "", -1)
  672. body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
  673. body = strings.Replace(body, "报告全文():", "", -1)
  674. }
  675. bodyReturn = body
  676. //fmt.Println(bodyReturn)
  677. //fmt.Println("_________________")
  678. //fmt.Println(fileLink)
  679. //body = strings.Replace(body, "报告全文(", "", -1)
  680. //body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
  681. //body = strings.Replace(body, "):", "", -1)
  682. if len(interviewDateArr) > 0 {
  683. interviewDateStr = interviewDateArr[0]
  684. }
  685. return
  686. }