task.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. package services
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "github.com/astaxie/beego/toolbox"
  6. "hongze/hongze_cygx/models"
  7. "hongze/hongze_cygx/utils"
  8. "html"
  9. "strconv"
  10. "strings"
  11. //"github.com/astaxie/beego/cache"
  12. )
  13. func Task() {
  14. fmt.Println("start")
  15. //同步数据
  16. if utils.RunMode == "release" {
  17. //syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */5 * * * *", SyncTacticsList)
  18. //toolbox.AddTask("syncTacticsList", syncTacticsList)
  19. //toolbox.StartTask()
  20. syncTacticsListAddreport := toolbox.NewTask("syncTacticsListAddreport", "0 */5 * * * *", SyncTacticsListAddreport)
  21. toolbox.AddTask("syncTacticsListAddreport", syncTacticsListAddreport)
  22. //同步纪要库中的Es
  23. syncTacticsListToEs := toolbox.NewTask("syncTacticsListToEs", "0 */30 * * * *", SyncTacticsListToEs)
  24. toolbox.AddTask("syncTacticsListToEs", syncTacticsListToEs)
  25. toolbox.StartTask()
  26. }
  27. //updateActivitySattus := toolbox.NewTask("updateActivitySattus", "0 */5 * * * *", UpdateActivitySattus)
  28. //toolbox.AddTask("updateActivitySattus", updateActivitySattus)
  29. //SyncTacticsListAddreport()
  30. //UpdateActivitySattus()
  31. updateActivitySattus := toolbox.NewTask("syncTacticsListAddreport", "0 */10 * * * *", UpdateActivitySattus)
  32. toolbox.AddTask("updateActivitySattus", updateActivitySattus)
  33. //
  34. //updateIndustrialManagementLabel := toolbox.NewTask("updateIndustrialManagementLabel", "0 */30 * * * *", UpdateIndustrialManagementLabel)
  35. //toolbox.AddTask("updateIndustrialManagementLabel", updateIndustrialManagementLabel)
  36. //UpdateIndustrialManagementSubjectNnames()
  37. sendActivityBeginMsg := toolbox.NewTask("sendActivityBeginMsg", "0 */10 * * * *", SendActivityBeginMsg) //会议前30分钟的提醒
  38. toolbox.AddTask("sendActivityBeginMsg", sendActivityBeginMsg)
  39. sendActivityBeginMsgMeeting := toolbox.NewTask("sendActivityBeginMsgMeeting", "0 */5 * * * *", SendActivityBeginMsgMeeting) //会议前15分钟的提醒
  40. toolbox.AddTask("sendActivityBeginMsgMeeting", sendActivityBeginMsgMeeting)
  41. toolbox.StartTask()
  42. //if utils.RunMode == "debug" {
  43. // //syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */5 * * * *", SyncTacticsList)
  44. // //toolbox.AddTask("syncTacticsList", syncTacticsList)
  45. // //toolbox.StartTask()
  46. // syncTacticsListAddreport := toolbox.NewTask("syncTacticsListAddreport", "0 01 01 * * *", SyncTacticsListAddreport)
  47. // toolbox.AddTask("syncTacticsListAddreport", syncTacticsListAddreport)
  48. //
  49. // //同步纪要库中的Es
  50. // syncTacticsListToEs := toolbox.NewTask("syncTacticsListToEs", "0 01 01 * * *", SyncTacticsListToEs)
  51. // toolbox.AddTask("syncTacticsListToEs", syncTacticsListToEs)
  52. // toolbox.StartTask()
  53. //}
  54. //syncTacticsListAddreport := toolbox.NewTask("syncTacticsListAddreport", "0 42 09 * * *", SyncTacticsListAddreport)
  55. //toolbox.AddTask("syncTacticsListAddreport", syncTacticsListAddreport)
  56. //toolbox.StartTask()
  57. //AddAllArticle()
  58. //SyncTacticsListToEs()
  59. fmt.Println("end")
  60. }
  61. func ElasticOption() {
  62. //SyncTacticsList()
  63. //toolbox.StartTask()
  64. //keyWord := "专家"
  65. //pageSize := 20
  66. //SearchByKeyWord(keyWord, pageSize)
  67. //SaveData()
  68. //GetIndustryMap()
  69. //解析报告
  70. //GetArticleExpert()
  71. //SearchByKeyWordQuery("")
  72. //AddMap()
  73. //同步数据
  74. //ImportData()
  75. //AddMap()
  76. //Search("光伏行业")
  77. //修复报告内容
  78. //GetArticleAll()
  79. //indexName := "article_list"
  80. // mappingJson := `{
  81. //"mappings": {
  82. // "dynamic": true,
  83. // "properties": {
  84. // "ArticleId": {
  85. // "type": "integer"
  86. // },
  87. // "Title": {
  88. // "type": "text",
  89. // "analyzer": "ik_smart"
  90. // },
  91. // "TitleEn": {
  92. // "type": "text",
  93. // "analyzer": "ik_smart"
  94. // },
  95. // "UpdateFrequency": {
  96. // "type": "text",
  97. // "analyzer": "ik_smart"
  98. // },
  99. // "CreateDate": {
  100. // "type": "text",
  101. // "analyzer": "ik_smart"
  102. // },
  103. // "PublishDate": {
  104. // "type": "text",
  105. // "analyzer": "ik_smart"
  106. // },
  107. // "Abstract": {
  108. // "type": "text",
  109. // "analyzer": "ik_smart"
  110. // },
  111. // "CategoryName": {
  112. // "type": "text",
  113. // "analyzer": "ik_smart"
  114. // },
  115. // "SubCategoryName": {
  116. // "type": "text",
  117. // "analyzer": "ik_smart"
  118. // },
  119. // "InterviewDate": {
  120. // "type": "text",
  121. // "analyzer": "ik_smart"
  122. // },
  123. // "ExpertBackground": {
  124. // "type": "text",
  125. // "analyzer": "ik_smart"
  126. // },
  127. // "ExpertNumber": {
  128. // "type": "text",
  129. // "analyzer": "ik_smart"
  130. // },
  131. // "Department": {
  132. // "type": "text",
  133. // "analyzer": "ik_smart"
  134. // },
  135. // "BodyText": {
  136. // "type": "text",
  137. // "analyzer": "ik_smart"
  138. // },
  139. // "ArticleIdMd5": {
  140. // "type": "text",
  141. // "analyzer": "ik_smart"
  142. // }
  143. // }
  144. //}
  145. //}`
  146. //EsCreateIndex(indexName,mappingJson)
  147. //插入数据
  148. //allList, err := models.GetArticleAll()
  149. //if err != nil {
  150. // fmt.Println("GetArticleAll Err:", err.Error())
  151. // return
  152. //}
  153. //
  154. //indexName := "article_list"
  155. //
  156. //for _, v := range allList {
  157. // content := html.UnescapeString(v.Body)
  158. // doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  159. // if err != nil {
  160. // fmt.Println("create doc err:", err.Error())
  161. // return
  162. // }
  163. // bodyText := doc.Text()
  164. //
  165. // item := new(ElasticArticleDetail)
  166. // item.ArticleId = v.ArticleId
  167. // item.Title = v.Title
  168. // item.TitleEn = v.TitleEn
  169. // item.UpdateFrequency = v.UpdateFrequency
  170. // item.CreateDate = v.CreateDate
  171. // item.PublishDate = v.PublishDate
  172. // item.Abstract = v.Abstract
  173. // item.CategoryName = v.CategoryName
  174. // item.SubCategoryName = v.SubCategoryName
  175. // item.InterviewDate = v.InterviewDate
  176. // item.ExpertBackground = v.ExpertBackground
  177. // item.ExpertNumber = v.ExpertNumber
  178. // item.Department = v.Department
  179. // item.ArticleIdMd5 = v.ArticleIdMd5
  180. // item.BodyText = bodyText
  181. // EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  182. //}
  183. //fmt.Println("bm")
  184. //bm, err := cache.NewCache("file", `{"CachePath":"./cache","FileSuffix":".cache", "EmbedExpiry": "120"}`)
  185. ////
  186. //if err != nil {
  187. // fmt.Println("false",err)
  188. //}
  189. ////fmt.Println(bm)
  190. //key:="put_key_one"
  191. ////bm.Put(key,1,1*time.Second)
  192. //b:=bm.IsExist(key)
  193. //fmt.Println(b)
  194. //同步策略文章
  195. //SyncTacticsList()
  196. //indexName := "article_list"
  197. //EsDeleteData(indexName,)
  198. }
  199. /*
  200. searchItem := new(models.SearchItem)
  201. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  202. searchItem.Body = v.Highlight["BodyText"]
  203. searchItem.Title = title
  204. searchItem.PublishDate = article.PublishDate
  205. */
  206. func CreateIndex() {
  207. indexName := "cygx_article_v4" //utils.IndexName
  208. mappingJson := `{
  209. "mappings": {
  210. "dynamic": true,
  211. "properties": {
  212. "ArticleId": {
  213. "type": "integer"
  214. },
  215. "BodyText": {
  216. "type": "text",
  217. "term_vector": "with_positions_offsets",
  218. "analyzer": "ik_smart"
  219. },
  220. "PublishDate": {
  221. "type": "keyword"
  222. },
  223. "SubCategoryName": {
  224. "type": "text",
  225. "term_vector": "with_positions_offsets",
  226. "analyzer": "ik_smart"
  227. },
  228. "Title": {
  229. "type": "text",
  230. "term_vector": "with_positions_offsets",
  231. "analyzer": "ik_smart"
  232. },
  233. "TitleEn": {
  234. "type": "text",
  235. "term_vector": "with_positions_offsets"
  236. },
  237. "UpdateFrequency": {
  238. "type": "text",
  239. "term_vector": "with_positions_offsets"
  240. }
  241. }
  242. }
  243. }`
  244. EsCreateIndex(indexName, mappingJson)
  245. }
  246. func AddAllArticle() {
  247. allList, err := models.GetArticleAll()
  248. if err != nil {
  249. fmt.Println("GetArticleAll Err:", err.Error())
  250. return
  251. }
  252. indexName := "cygx_article_v3"
  253. for _, v := range allList {
  254. content := html.UnescapeString(v.Body)
  255. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  256. if err != nil {
  257. fmt.Println("create doc err:", err.Error())
  258. return
  259. }
  260. bodyText := doc.Text()
  261. item := new(ElasticTestArticleDetail)
  262. item.ArticleId = v.ArticleId
  263. item.Title = v.Title
  264. item.PublishDate = v.PublishDate
  265. item.BodyText = bodyText
  266. EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
  267. fmt.Println(v.ArticleId)
  268. }
  269. }
  270. func AddAllArticleV4() {
  271. allList, err := models.GetArticleAll2()
  272. if err != nil {
  273. fmt.Println("GetArticleAll Err:", err.Error())
  274. return
  275. }
  276. indexName := "cygx_article_v4"
  277. for _, v := range allList {
  278. content := html.UnescapeString(v.Body)
  279. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  280. if err != nil {
  281. fmt.Println("create doc err:", err.Error())
  282. return
  283. }
  284. bodyText := doc.Text()
  285. item := new(ElasticTestArticleDetailV4)
  286. item.ArticleId = v.ArticleId
  287. item.Title = v.Title
  288. item.PublishDate = v.PublishDate
  289. item.BodyText = bodyText
  290. item.IsSummary = v.IsSummary
  291. item.IsReport = v.IsReport
  292. //if v.IsReport == 1 {
  293. // item.IsReport = true
  294. //}
  295. //if v.IsSummary == 1 {
  296. // item.IsSummary = true
  297. //}
  298. EsAddOrEditDataV4(indexName, strconv.Itoa(v.ArticleId), item)
  299. fmt.Println(v.ArticleId)
  300. }
  301. }
  302. // "term_vector": "with_positions_offsets"
  303. type ElasticArticleDetail struct {
  304. ArticleId int `description:"报告id"`
  305. Title string `description:"标题"`
  306. TitleEn string `description:"英文标题 "`
  307. UpdateFrequency string `description:"更新周期"`
  308. CreateDate string `description:"创建时间"`
  309. PublishDate string `description:"发布时间"`
  310. Abstract string `description:"摘要"`
  311. CategoryName string `description:"一级分类"`
  312. SubCategoryName string `description:"二级分类"`
  313. InterviewDate string `description:"访谈时间"`
  314. ExpertBackground string `description:"专家背景"`
  315. ExpertNumber string `description:"专家编号"`
  316. Department string `description:"作者"`
  317. ArticleIdMd5 string `description:"纪要id"`
  318. BodyText string `description:"内容"`
  319. }
  320. type ElasticTestArticleDetail struct {
  321. ArticleId int `description:"报告id"`
  322. Title string `description:"标题"`
  323. BodyText string `description:"内容"`
  324. PublishDate string `description:"发布时间"`
  325. }
  326. type ElasticTestArticleDetailV4 struct {
  327. ArticleId int `description:"报告id"`
  328. Title string `description:"标题"`
  329. BodyText string `description:"内容"`
  330. PublishDate string `description:"发布时间"`
  331. IsSummary int `description:"是否属于纪要库"`
  332. IsReport int `description:"是否属于报告"`
  333. }