1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- package daf
- // DFANode [DFA 全称为: Deterministic Finite Automaton(确定有穷自动机)算法]
- type DFANode struct {
- children map[rune]*DFANode
- isEnd bool
- keyword string
- }
- type DFA struct {
- root *DFANode
- }
- func NewDFA() *DFA {
- return &DFA{
- root: &DFANode{
- children: make(map[rune]*DFANode),
- },
- }
- }
- func (d *DFA) AddKeyword(keyword string) {
- node := d.root
- for _, r := range keyword {
- if _, exists := node.children[r]; !exists {
- node.children[r] = &DFANode{
- children: make(map[rune]*DFANode),
- isEnd: false,
- }
- }
- node = node.children[r]
- }
- node.isEnd = true
- node.keyword = keyword
- }
- func (d *DFA) Build(keywords []string) {
- for _, kw := range keywords {
- d.AddKeyword(kw)
- }
- }
- func (d *DFA) Search(text string) map[string]int {
- result := make(map[string]int)
- runes := []rune(text)
- n := len(runes)
- for i := 0; i < n; {
- node := d.root
- longestMatch := ""
- matchEnd := i
- // 尝试寻找从i开始的最长匹配
- for j := i; j < n; j++ {
- r := runes[j]
- if nextNode, exists := node.children[r]; exists {
- node = nextNode
- if node.isEnd {
- // 检查边界条件 - 中文不需要严格的单词边界检查
- longestMatch = node.keyword
- matchEnd = j + 1
- }
- } else {
- break
- }
- }
- if longestMatch != "" {
- result[longestMatch]++
- i = matchEnd // 跳到匹配结束位置
- } else {
- i++
- }
- }
- return result
- }
- //单元测试用
- //func main() {
- // keywords := []string{"人工智能", "机器学习", "深度学习", "AI"}
- // text := "人工智能人工智能(AI)是机器学习的重要分支,深度学习则是机器学习的一个子领域。AI技术正在快速发展。机器学习AI机器学习机器学习机器学习机器学习机器学习机器学习机器学习机器学习机器学习AIAIAI机器学习机器学习AI"
- //
- // dfa := NewDFA()
- // dfa.Build(keywords)
- // result := dfa.Search(text)
- //
- // fmt.Println("关键词出现次数:")
- // for kw, count := range result {
- // fmt.Printf("%s: %d\n", kw, count)
- // }
- //}
|