From deb1fb612b471789f5452f9536421ebee0f36155 Mon Sep 17 00:00:00 2001
From: zhangqian <zhangqian@123.com>
Date: 星期三, 22 十一月 2023 18:16:14 +0800
Subject: [PATCH] 换一个纯go实现的中文分词包

---
 pkg/blevex/analyzer.go |   58 +++++++++++++++++++++++++---------------------------------
 1 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/pkg/blevex/analyzer.go b/pkg/blevex/analyzer.go
index 29cb8a4..2839517 100644
--- a/pkg/blevex/analyzer.go
+++ b/pkg/blevex/analyzer.go
@@ -7,38 +7,14 @@
 	"github.com/blevesearch/bleve/v2/registry"
 )
 
-func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
-	tokenizerName, ok := config["tokenizer"].(string)
-	if !ok {
-		return nil, errors.New("must specify tokenizer")
-	}
-	tokenizer, err := cache.TokenizerNamed(tokenizerName)
-	if err != nil {
-		return nil, err
-	}
-
-	jbtk, ok := tokenizer.(*JiebaTokenizer)
-	if !ok {
-		return nil, errors.New("tokenizer must be of type jieba")
-	}
-	alz := &JiebaAnalyzer{
-		Tokenizer: jbtk,
-	}
-	return alz, nil
-}
-
-func init() {
-	registry.RegisterAnalyzer("gojieba", analyzerConstructor)
-}
-
-// JiebaAnalyzer from analysis.DefaultAnalyzer
-type JiebaAnalyzer struct {
+// SegoAnalyzer from analysis.DefaultAnalyzer
+type SegoAnalyzer struct {
 	CharFilters  []analysis.CharFilter
-	Tokenizer    *JiebaTokenizer
+	Tokenizer    *SegoTokenizer
 	TokenFilters []analysis.TokenFilter
 }
 
-func (a *JiebaAnalyzer) Analyze(input []byte) analysis.TokenStream {
+func (a *SegoAnalyzer) Analyze(input []byte) analysis.TokenStream {
 	if a.CharFilters != nil {
 		for _, cf := range a.CharFilters {
 			input = cf.Filter(input)
@@ -53,10 +29,26 @@
 	return tokens
 }
 
-func (a *JiebaAnalyzer) Free() {
-	if a.Tokenizer != nil {
-		a.Tokenizer.Free()
-	} else {
-		panic("JiebaAnalyzer.Tokenizer is nil, this should not happen")
+func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizerName, ok := config["tokenizer"].(string)
+	if !ok {
+		return nil, errors.New("must specify tokenizer")
 	}
+	tokenizer, err := cache.TokenizerNamed(tokenizerName)
+	if err != nil {
+		return nil, err
+	}
+
+	segoTokenizer, ok := tokenizer.(*SegoTokenizer)
+	if !ok {
+		return nil, errors.New("tokenizer must be of type sego")
+	}
+	alz := &SegoAnalyzer{
+		Tokenizer: segoTokenizer,
+	}
+	return alz, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer("sego", analyzerConstructor)
 }

--
Gitblit v1.8.0