Browse Source

煤矿Excel表格解析

xingzai 3 years ago
parent
commit
593c932068
7 changed files with 407 additions and 1 deletions
  1. 1 0
      .gitignore
  2. 1 0
      go.mod
  3. 2 0
      go.sum
  4. 118 0
      models/base_from_coalmine.go
  5. 5 0
      models/db.go
  6. 278 0
      services/commodity_coal_mine.go
  7. 2 1
      services/task.go

+ 1 - 0
.gitignore

@@ -7,3 +7,4 @@
 /hongze_data_crawler
 /hongze_data_crawler.exe
 /binlog/*.log
+/static/file/*

+ 1 - 0
go.mod

@@ -10,5 +10,6 @@ require (
 	github.com/go-sql-driver/mysql v1.6.0
 	github.com/mozillazg/go-pinyin v0.19.0
 	github.com/rdlucklib/rdluck_tools v1.0.2
+	github.com/tealeg/xlsx v1.0.5 // indirect
 	gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
 )

+ 2 - 0
go.sum

@@ -295,6 +295,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
 github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
 github.com/syndtr/goleveldb v0.0.0-20160425020131-cfa635847112/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0=
 github.com/syndtr/goleveldb v0.0.0-20181127023241-353a9fca669c/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0=
+github.com/tealeg/xlsx v1.0.5 h1:+f8oFmvY8Gw1iUXzPk+kz+4GpbDZPK1FhPiQRd+ypgE=
+github.com/tealeg/xlsx v1.0.5/go.mod h1:btRS8dz54TDnvKNosuAqxrM1QgN1udgk9O34bDCnORM=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
 github.com/ugorji/go v0.0.0-20171122102828-84cb69a8af83/go.mod h1:hnLbHMwcvSihnDhEfx2/BzKp2xb0Y+ErdfYcrs9tkJQ=
 github.com/wendal/errors v0.0.0-20130201093226-f66c77a7882b/go.mod h1:Q12BUT7DqIlHRmgv3RskH+UCM/4eqVMgI0EMmlSpAXc=

+ 118 - 0
models/base_from_coalmine.go

@@ -0,0 +1,118 @@
+package models
+
+import (
+	"github.com/rdlucklib/rdluck_tools/orm"
+	"time"
+)
+
+type BaseFromCoalmineMapping struct {
+	BaseFromCoalmineMappingId int       `orm:"column(base_from_coalmine_mapping_id);pk"`
+	IndexName                 string    `description:"持买单量指标名称"`
+	IndexCode                 string    `description:"持买单量指标编码"`
+	Exchange                  string    `description:"样本统计类别"`
+	Source                    string    `description:"来源"`
+	Province                  string    `description:"省份"`
+	Description               string    `description:"描述"`
+	CreateTime                time.Time `description:"时间"`
+}
+
+type BaseFromCoalmineIndex struct {
+	BaseFromCoalmineIndexId int       `orm:"column(base_from_coalmine_index_id);pk"`
+	IndexName               string    `description:"持买单量指标名称"`
+	IndexCode               string    `description:"持买单量指标编码"`
+	Exchange                string    `description:"样本统计类别"`
+	DealValue               string    `description:"成交量"`
+	DataTime                string    `description:"数据日期"`
+	CreateTime              time.Time `description:"插入时间"`
+	ModifyTime              time.Time `description:"修改时间"`
+}
+
+type BaseFromCoalmineCompanyMapping struct {
+	BaseFromCoalmineCompanyMappingId int       `orm:"column(base_from_coalmine_company_mapping_id);pk"`
+	IndexName                        string    `description:"持买单量指标名称"`
+	IndexCode                        string    `description:"持买单量指标编码"`
+	Source                           string    `description:"来源"`
+	Rank                             string    `description:"序号"`
+	Province                         string    `description:"省份"`
+	City                             string    `description:"城市"`
+	CompanyName                      string    `description:"公司名称"`
+	GroupName                        string    `description:"集团名称"`
+	CreateTime                       time.Time `description:"时间"`
+}
+
+type BaseFromCoalmineCompanyIndex struct {
+	BaseFromCoalmineCompanyIndexId int       `orm:"column(base_from_coalmine_company_index_id);pk"`
+	IndexName                      string    `description:"持买单量指标名称"`
+	IndexCode                      string    `description:"持买单量指标编码"`
+	DealValue                      string    `description:"成交量"`
+	DataTime                       string    `description:"数据日期"`
+	CreateTime                     time.Time `description:"插入时间"`
+	ModifyTime                     time.Time `description:"修改时间"`
+}
+
+//添加指标
+func AddBaseFromCoalmineMapping(item *BaseFromCoalmineMapping) (lastId int64, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	lastId, err = o.Insert(item)
+	return
+}
+
+//查询指标
+func GetBaseFromCoalmineMapping() (items []*BaseFromCoalmineMapping, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	sql := `SELECT * FROM base_from_coalmine_mapping`
+	_, err = o.Raw(sql).QueryRows(&items)
+	return
+}
+
+//查询数据
+func GetBaseFromCoalmineIndex() (items []*BaseFromCoalmineIndex, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	sql := `SELECT * FROM base_from_coalmine_index`
+	_, err = o.Raw(sql).QueryRows(&items)
+	return
+}
+
+func UpdateBaseFromCoalmineIndex(item *BaseFromCoalmineIndex) (err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	sql := `UPDATE base_from_coalmine_index SET deal_value=?  WHERE index_name=?  AND  data_time = ?`
+	_, err = o.Raw(sql, item.DealValue, item.IndexName, item.DataTime).Exec()
+	return
+}
+
+//添加数据
+func AddBaseFromCoalmineIndex(item *BaseFromCoalmineIndex) (lastId int64, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	lastId, err = o.Insert(item)
+	return
+}
+
+//添加公司指标
+func AddBaseFromCoalmineCompanyMapping(item *BaseFromCoalmineCompanyMapping) (lastId int64, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	lastId, err = o.Insert(item)
+	return
+}
+
+//查询指标
+func GetBaseFromCoalmineCompanyMapping() (items []*BaseFromCoalmineCompanyMapping, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	sql := `SELECT * FROM base_from_coalmine_company_mapping`
+	_, err = o.Raw(sql).QueryRows(&items)
+	return
+}
+
+//添加公司指标
+func AddBaseFromCoalmineCompanyIndex(item *BaseFromCoalmineCompanyIndex) (lastId int64, err error) {
+	o := orm.NewOrm()
+	o.Using("data")
+	lastId, err = o.Insert(item)
+	return
+}

+ 5 - 0
models/db.go

@@ -35,6 +35,11 @@ func init() {
 		new(BaseFromTradeMapping),
 		//
 		new(BaseFromTradeDalianIndex),
+		new(BaseFromCoalmineMapping),
+		new(BaseFromCoalmineIndex),
+		new(BaseFromCoalmineCompanyMapping),
+		new(BaseFromCoalmineCompanyIndex),
+
 		//new(BaseFromTradeDalianData),
 	)
 }

+ 278 - 0
services/commodity_coal_mine.go

@@ -0,0 +1,278 @@
+package services
+
+import (
+	"fmt"
+	"github.com/tealeg/xlsx"
+	"hongze/hongze_data_crawler/models"
+	"hongze/hongze_data_crawler/utils"
+	"strings"
+	"time"
+)
+
+func FileCoalJsm() (err error) {
+
+	defer func() {
+		if err != nil {
+			fmt.Println("RefreshDataFromDalian Err:" + err.Error())
+			go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
+		}
+	}()
+	path := "static/file/jsm.xlsx"
+	xlFile, err := xlsx.OpenFile(path)
+	if err != nil {
+		return
+	}
+
+	var source string
+	var province string
+	var description string
+	var exchange string
+	var rank string
+	var city string
+	var companyName string
+	var group string
+
+	var dateMap = make(map[int]string)
+	var codeMap = make(map[string]string)
+	var indexMap = make(map[string]string)
+	var codeCompanyMap = make(map[string]string)
+	var items []*models.BaseFromCoalmineMapping
+	var itemsCompany []*models.BaseFromCoalmineCompanyMapping
+	var itemsIndex []*models.BaseFromCoalmineIndex
+	var itemsCompanyIndex []*models.BaseFromCoalmineCompanyIndex
+	codeList, err := models.GetBaseFromCoalmineMapping()
+	if err != nil && err.Error() != utils.ErrNoRow() {
+		utils.FileLog.Info("获取煤炭指标失败:", err)
+		return err
+	}
+
+	if len(codeList) > 0 {
+		for _, v := range codeList {
+			codeMap[v.IndexName] = v.IndexCode
+		}
+	}
+
+	codeCompanyList, err := models.GetBaseFromCoalmineCompanyMapping()
+	if err != nil && err.Error() != utils.ErrNoRow() {
+		utils.FileLog.Info("获取煤炭公司指标失败:", err)
+		return err
+	}
+	if len(codeCompanyList) > 0 {
+		for _, v := range codeCompanyList {
+			codeCompanyMap[v.IndexName] = v.IndexCode
+		}
+	}
+
+	indexCompanyList, err := models.GetBaseFromCoalmineIndex()
+	if err != nil && err.Error() != utils.ErrNoRow() {
+		utils.FileLog.Info("获取煤炭公司指标失败:", err)
+		return err
+	}
+	if len(indexCompanyList) > 0 {
+		for _, v := range indexCompanyList {
+			indexMap[v.IndexName+v.DataTime] = v.DealValue
+		}
+	}
+
+	for _, sheet := range xlFile.Sheets {
+		//遍历行读取
+		maxRow := sheet.MaxRow
+		for i := 0; i < maxRow; i++ {
+			//获取制表来源
+			if i == 1 {
+				row := sheet.Row(i)
+				cells := row.Cells
+			Loop:
+				for k, cell := range cells {
+					text := cell.String()
+					if k == 0 {
+						source = text
+						break Loop
+					}
+				}
+				if source != "" {
+					source = strings.Replace(source, "制表单位:", "", -1)
+				}
+			}
+			//获取样本情况
+			if i > 3 && i < 16 {
+				row := sheet.Row(i)
+				cells := row.Cells
+			Loop2:
+				for k, cell := range cells {
+					text := cell.String()
+					if text != "" {
+						if k == 1 {
+							province = text
+						}
+						if k == 3 {
+							description = text
+						}
+						if k == 4 {
+							item := new(models.BaseFromCoalmineMapping)
+							exchange = text
+							item.IndexName = source + "_" + province + "_" + exchange
+							item.IndexCode = utils.MD5(item.IndexName)
+							item.Exchange = exchange
+							item.Description = description
+							item.Province = province
+							item.Source = source
+							item.CreateTime = time.Now()
+							items = append(items, item)
+							break Loop2
+						}
+					}
+				}
+			}
+			//获取日期
+			if i == 3 {
+				row := sheet.Row(i)
+				cells := row.Cells
+				for k, cell := range cells {
+					text := cell.String()
+					stamp, _ := time.ParseInLocation("01-02-06", text, time.Now().Location())
+					if k > 4 {
+						dateMap[k] = stamp.Format(utils.FormatDate)
+					}
+				}
+			}
+			//获取具体产量
+			if i > 3 && i < 16 {
+				row := sheet.Row(i)
+				cells := row.Cells
+				for k, cell := range cells {
+					text := cell.String()
+					if k > 4 {
+						item := new(models.BaseFromCoalmineIndex)
+						item.IndexName = source + "_" + province + "_" + exchange
+						item.IndexCode = codeMap[item.IndexName]
+						item.Exchange = exchange
+						item.DealValue = text
+						item.DataTime = dateMap[k]
+						item.ModifyTime = time.Now()
+						item.CreateTime = time.Now()
+						itemsIndex = append(itemsIndex, item)
+					}
+				}
+			}
+
+			//获取公司指标名称
+			if i > 17 {
+				row := sheet.Row(i)
+				cells := row.Cells
+			Loop3:
+				for k, cell := range cells {
+					text := cell.String()
+					if text != "" {
+						if k == 0 {
+							rank = text
+						}
+						if k == 1 {
+							province = text
+						}
+						if k == 2 {
+							city = text
+						}
+						if k == 3 {
+							companyName = text
+						}
+						if k == 4 {
+							item := new(models.BaseFromCoalmineCompanyMapping)
+							group = text
+							item.IndexName = source + "_" + province + "_" + city + "_" + companyName
+							item.IndexCode = utils.MD5(item.IndexName)
+							item.Rank = rank
+							item.Province = province
+							item.City = city
+							item.CompanyName = companyName
+							item.GroupName = group
+							item.Source = source
+							item.CreateTime = time.Now()
+							itemsCompany = append(itemsCompany, item)
+							break Loop3
+						}
+					}
+				}
+			}
+
+			//获取公司具体产量
+			if i > 18 {
+				row := sheet.Row(i)
+				cells := row.Cells
+				for k, cell := range cells {
+					text := cell.String()
+					if k > 4 {
+						item := new(models.BaseFromCoalmineCompanyIndex)
+						item.IndexName = source + "_" + province + "_" + city + "_" + companyName
+						item.IndexCode = codeCompanyMap[item.IndexName]
+						item.DealValue = text
+						item.DataTime = dateMap[k]
+						item.ModifyTime = time.Now()
+						item.CreateTime = time.Now()
+						//fmt.Println(item)
+						itemsCompanyIndex = append(itemsCompanyIndex, item)
+					}
+				}
+			}
+		}
+	}
+
+	//添加数据到数据库
+	for _, v := range items {
+		if codeMap[v.IndexName] == "" {
+			codeMap[v.IndexName] = v.IndexCode
+			newId, err := models.AddBaseFromCoalmineMapping(v)
+			if err != nil {
+				fmt.Println("添加指标名称错误")
+			} else {
+				fmt.Println("添加指标名称成功", newId)
+			}
+		}
+	}
+	fmt.Println("指标操作完成")
+
+	for _, v := range itemsIndex {
+		v.IndexCode = codeMap[v.IndexName]
+		if indexMap[v.IndexName+v.DataTime] == "" && v.DealValue != "" {
+			newId, err := models.AddBaseFromCoalmineIndex(v)
+			if err != nil {
+				fmt.Println("添加数据错误", err)
+			} else {
+				fmt.Println("新增成功", newId)
+			}
+		} else {
+			if indexMap[v.IndexName+v.DataTime] == v.IndexName+v.DataTime && indexMap[v.IndexName+v.DataTime] != v.DealValue {
+				err = models.UpdateBaseFromCoalmineIndex(v)
+				if err != nil {
+					fmt.Println("修改数据错误错误", err)
+					return
+				}
+			}
+		}
+	}
+	fmt.Println("数据操作完成")
+
+	for _, v := range itemsCompany {
+		if codeCompanyMap[v.IndexName] == "" {
+			codeCompanyMap[v.IndexName] = v.IndexCode
+			newId, err := models.AddBaseFromCoalmineCompanyMapping(v)
+			if err != nil {
+				fmt.Println("添加公司指标名称错误", err)
+			} else {
+				fmt.Println("新增公司成功", newId)
+			}
+		}
+	}
+	fmt.Println("公司指标操作完成")
+
+	//for _, v := range itemsCompanyIndex {
+	//	newId, err := models.AddBaseFromCoalmineCompanyIndex(v)
+	//	if err != nil {
+	//		fmt.Println("添加公司数据错误", err)
+	//return
+	//	} else {
+	//		fmt.Println("新增公司数据成功", newId)
+	//	}
+	//}
+	return
+}

+ 2 - 1
services/task.go

@@ -12,7 +12,8 @@ func Task() {
 	refreshEic := task.NewTask("refreshData", "0 0 1,6 * * *", RefreshEic)
 	task.AddTask("数据爬取", refreshData)
 	task.AddTask("欧洲天然气爬取", refreshEic)
-	task.StartTask()
+	//task.StartTask()
+	FileCoalJsm()
 	fmt.Println("end crawler")
 }