dfa_algorithm.go 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. package daf
  2. // DFANode [DFA 全称为: Deterministic Finite Automaton(确定有穷自动机)算法]
  3. type DFANode struct {
  4. children map[rune]*DFANode
  5. isEnd bool
  6. keyword string
  7. }
  8. type DFA struct {
  9. root *DFANode
  10. }
  11. func NewDFA() *DFA {
  12. return &DFA{
  13. root: &DFANode{
  14. children: make(map[rune]*DFANode),
  15. },
  16. }
  17. }
  18. func (d *DFA) AddKeyword(keyword string) {
  19. node := d.root
  20. for _, r := range keyword {
  21. if _, exists := node.children[r]; !exists {
  22. node.children[r] = &DFANode{
  23. children: make(map[rune]*DFANode),
  24. isEnd: false,
  25. }
  26. }
  27. node = node.children[r]
  28. }
  29. node.isEnd = true
  30. node.keyword = keyword
  31. }
  32. func (d *DFA) Build(keywords []string) {
  33. for _, kw := range keywords {
  34. d.AddKeyword(kw)
  35. }
  36. }
  37. func (d *DFA) Search(text string) map[string]int {
  38. result := make(map[string]int)
  39. runes := []rune(text)
  40. n := len(runes)
  41. for i := 0; i < n; {
  42. node := d.root
  43. longestMatch := ""
  44. matchEnd := i
  45. // 尝试寻找从i开始的最长匹配
  46. for j := i; j < n; j++ {
  47. r := runes[j]
  48. if nextNode, exists := node.children[r]; exists {
  49. node = nextNode
  50. if node.isEnd {
  51. // 检查边界条件 - 中文不需要严格的单词边界检查
  52. longestMatch = node.keyword
  53. matchEnd = j + 1
  54. }
  55. } else {
  56. break
  57. }
  58. }
  59. if longestMatch != "" {
  60. result[longestMatch]++
  61. i = matchEnd // 跳到匹配结束位置
  62. } else {
  63. i++
  64. }
  65. }
  66. return result
  67. }
  68. //单元测试用
  69. //func main() {
  70. // keywords := []string{"人工智能", "机器学习", "深度学习", "AI"}
  71. // text := "人工智能人工智能(AI)是机器学习的重要分支,深度学习则是机器学习的一个子领域。AI技术正在快速发展。机器学习AI机器学习机器学习机器学习机器学习机器学习机器学习机器学习机器学习机器学习AIAIAI机器学习机器学习AI"
  72. //
  73. // dfa := NewDFA()
  74. // dfa.Build(keywords)
  75. // result := dfa.Search(text)
  76. //
  77. // fmt.Println("关键词出现次数:")
  78. // for kw, count := range result {
  79. // fmt.Printf("%s: %d\n", kw, count)
  80. // }
  81. //}