task.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package services
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "github.com/astaxie/beego/toolbox"
  6. "hongze/hongze_cygx/models"
  7. "hongze/hongze_cygx/utils"
  8. "html"
  9. "strconv"
  10. "strings"
  11. //"github.com/astaxie/beego/cache"
  12. )
  13. func Task() {
  14. fmt.Println("start")
  15. //同步数据
  16. if utils.RunMode == "release" {
  17. //syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */5 * * * *", SyncTacticsList)
  18. //toolbox.AddTask("syncTacticsList", syncTacticsList)
  19. //toolbox.StartTask()
  20. syncTacticsListAddreport := toolbox.NewTask("syncTacticsListAddreport", "0 */5 * * * *", SyncTacticsListAddreport)
  21. toolbox.AddTask("syncTacticsListAddreport", syncTacticsListAddreport)
  22. //同步纪要库中的Es
  23. syncTacticsListToEs := toolbox.NewTask("syncTacticsListToEs", "0 */30 * * * *", SyncTacticsListToEs)
  24. toolbox.AddTask("syncTacticsListToEs", syncTacticsListToEs)
  25. toolbox.StartTask()
  26. }
  27. //if utils.RunMode == "debug" {
  28. // //syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */5 * * * *", SyncTacticsList)
  29. // //toolbox.AddTask("syncTacticsList", syncTacticsList)
  30. // //toolbox.StartTask()
  31. // syncTacticsListAddreport := toolbox.NewTask("syncTacticsListAddreport", "0 01 01 * * *", SyncTacticsListAddreport)
  32. // toolbox.AddTask("syncTacticsListAddreport", syncTacticsListAddreport)
  33. //
  34. // //同步纪要库中的Es
  35. // syncTacticsListToEs := toolbox.NewTask("syncTacticsListToEs", "0 01 01 * * *", SyncTacticsListToEs)
  36. // toolbox.AddTask("syncTacticsListToEs", syncTacticsListToEs)
  37. // toolbox.StartTask()
  38. //}
  39. //syncTacticsListAddreport := toolbox.NewTask("syncTacticsListAddreport", "0 42 09 * * *", SyncTacticsListAddreport)
  40. //toolbox.AddTask("syncTacticsListAddreport", syncTacticsListAddreport)
  41. //toolbox.StartTask()
  42. fmt.Println("end")
  43. }
  44. func ElasticOption() {
  45. //SyncTacticsList()
  46. //toolbox.StartTask()
  47. //keyWord := "专家"
  48. //pageSize := 20
  49. //SearchByKeyWord(keyWord, pageSize)
  50. //SaveData()
  51. //GetIndustryMap()
  52. //解析报告
  53. //GetArticleExpert()
  54. //SearchByKeyWordQuery("")
  55. //AddMap()
  56. //同步数据
  57. //ImportData()
  58. //AddMap()
  59. //Search("光伏行业")
  60. //修复报告内容
  61. //GetArticleAll()
  62. //indexName := "article_list"
  63. // mappingJson := `{
  64. //"mappings": {
  65. // "dynamic": true,
  66. // "properties": {
  67. // "ArticleId": {
  68. // "type": "integer"
  69. // },
  70. // "Title": {
  71. // "type": "text",
  72. // "analyzer": "ik_smart"
  73. // },
  74. // "TitleEn": {
  75. // "type": "text",
  76. // "analyzer": "ik_smart"
  77. // },
  78. // "UpdateFrequency": {
  79. // "type": "text",
  80. // "analyzer": "ik_smart"
  81. // },
  82. // "CreateDate": {
  83. // "type": "text",
  84. // "analyzer": "ik_smart"
  85. // },
  86. // "PublishDate": {
  87. // "type": "text",
  88. // "analyzer": "ik_smart"
  89. // },
  90. // "Abstract": {
  91. // "type": "text",
  92. // "analyzer": "ik_smart"
  93. // },
  94. // "CategoryName": {
  95. // "type": "text",
  96. // "analyzer": "ik_smart"
  97. // },
  98. // "SubCategoryName": {
  99. // "type": "text",
  100. // "analyzer": "ik_smart"
  101. // },
  102. // "InterviewDate": {
  103. // "type": "text",
  104. // "analyzer": "ik_smart"
  105. // },
  106. // "ExpertBackground": {
  107. // "type": "text",
  108. // "analyzer": "ik_smart"
  109. // },
  110. // "ExpertNumber": {
  111. // "type": "text",
  112. // "analyzer": "ik_smart"
  113. // },
  114. // "Department": {
  115. // "type": "text",
  116. // "analyzer": "ik_smart"
  117. // },
  118. // "BodyText": {
  119. // "type": "text",
  120. // "analyzer": "ik_smart"
  121. // },
  122. // "ArticleIdMd5": {
  123. // "type": "text",
  124. // "analyzer": "ik_smart"
  125. // }
  126. // }
  127. //}
  128. //}`
  129. //EsCreateIndex(indexName,mappingJson)
  130. //插入数据
  131. //allList, err := models.GetArticleAll()
  132. //if err != nil {
  133. // fmt.Println("GetArticleAll Err:", err.Error())
  134. // return
  135. //}
  136. //
  137. //indexName := "article_list"
  138. //
  139. //for _, v := range allList {
  140. // content := html.UnescapeString(v.Body)
  141. // doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  142. // if err != nil {
  143. // fmt.Println("create doc err:", err.Error())
  144. // return
  145. // }
  146. // bodyText := doc.Text()
  147. //
  148. // item := new(ElasticArticleDetail)
  149. // item.ArticleId = v.ArticleId
  150. // item.Title = v.Title
  151. // item.TitleEn = v.TitleEn
  152. // item.UpdateFrequency = v.UpdateFrequency
  153. // item.CreateDate = v.CreateDate
  154. // item.PublishDate = v.PublishDate
  155. // item.Abstract = v.Abstract
  156. // item.CategoryName = v.CategoryName
  157. // item.SubCategoryName = v.SubCategoryName
  158. // item.InterviewDate = v.InterviewDate
  159. // item.ExpertBackground = v.ExpertBackground
  160. // item.ExpertNumber = v.ExpertNumber
  161. // item.Department = v.Department
  162. // item.ArticleIdMd5 = v.ArticleIdMd5
  163. // item.BodyText = bodyText
  164. // EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  165. //}
  166. //fmt.Println("bm")
  167. //bm, err := cache.NewCache("file", `{"CachePath":"./cache","FileSuffix":".cache", "EmbedExpiry": "120"}`)
  168. ////
  169. //if err != nil {
  170. // fmt.Println("false",err)
  171. //}
  172. ////fmt.Println(bm)
  173. //key:="put_key_one"
  174. ////bm.Put(key,1,1*time.Second)
  175. //b:=bm.IsExist(key)
  176. //fmt.Println(b)
  177. //同步策略文章
  178. //SyncTacticsList()
  179. //indexName := "article_list"
  180. //EsDeleteData(indexName,)
  181. }
  182. /*
  183. searchItem := new(models.SearchItem)
  184. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  185. searchItem.Body = v.Highlight["BodyText"]
  186. searchItem.Title = title
  187. searchItem.PublishDate = article.PublishDate
  188. */
  189. func CreateIndex() {
  190. indexName := "cygx_article_v4" //utils.IndexName
  191. mappingJson := `{
  192. "mappings": {
  193. "dynamic": true,
  194. "properties": {
  195. "ArticleId": {
  196. "type": "integer"
  197. },
  198. "BodyText": {
  199. "type": "text",
  200. "term_vector": "with_positions_offsets",
  201. "analyzer": "ik_smart"
  202. },
  203. "PublishDate": {
  204. "type": "keyword"
  205. },
  206. "SubCategoryName": {
  207. "type": "text",
  208. "term_vector": "with_positions_offsets",
  209. "analyzer": "ik_smart"
  210. },
  211. "Title": {
  212. "type": "text",
  213. "term_vector": "with_positions_offsets",
  214. "analyzer": "ik_smart"
  215. },
  216. "TitleEn": {
  217. "type": "text",
  218. "term_vector": "with_positions_offsets"
  219. },
  220. "UpdateFrequency": {
  221. "type": "text",
  222. "term_vector": "with_positions_offsets"
  223. }
  224. }
  225. }
  226. }`
  227. EsCreateIndex(indexName, mappingJson)
  228. }
  229. func AddAllArticle() {
  230. allList, err := models.GetArticleAll()
  231. if err != nil {
  232. fmt.Println("GetArticleAll Err:", err.Error())
  233. return
  234. }
  235. indexName := "cygx_article_v3"
  236. for _, v := range allList {
  237. content := html.UnescapeString(v.Body)
  238. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  239. if err != nil {
  240. fmt.Println("create doc err:", err.Error())
  241. return
  242. }
  243. bodyText := doc.Text()
  244. item := new(ElasticTestArticleDetail)
  245. item.ArticleId = v.ArticleId
  246. item.Title = v.Title
  247. item.PublishDate = v.PublishDate
  248. item.BodyText = bodyText
  249. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  250. fmt.Println(v.ArticleId)
  251. }
  252. }
  253. // "term_vector": "with_positions_offsets"
  254. type ElasticArticleDetail struct {
  255. ArticleId int `description:"报告id"`
  256. Title string `description:"标题"`
  257. TitleEn string `description:"英文标题 "`
  258. UpdateFrequency string `description:"更新周期"`
  259. CreateDate string `description:"创建时间"`
  260. PublishDate string `description:"发布时间"`
  261. Abstract string `description:"摘要"`
  262. CategoryName string `description:"一级分类"`
  263. SubCategoryName string `description:"二级分类"`
  264. InterviewDate string `description:"访谈时间"`
  265. ExpertBackground string `description:"专家背景"`
  266. ExpertNumber string `description:"专家编号"`
  267. Department string `description:"作者"`
  268. ArticleIdMd5 string `description:"纪要id"`
  269. BodyText string `description:"内容"`
  270. }
  271. type ElasticTestArticleDetail struct {
  272. ArticleId int `description:"报告id"`
  273. Title string `description:"标题"`
  274. BodyText string `description:"内容"`
  275. PublishDate string `description:"发布时间"`
  276. }