From deb1fb612b471789f5452f9536421ebee0f36155 Mon Sep 17 00:00:00 2001
From: zhangqian <zhangqian@123.com>
Date: 星期三, 22 十一月 2023 18:16:14 +0800
Subject: [PATCH] 换一个纯go实现的中文分词包

---
 pkg/blevex/bleve.go |   25 ++++++++-----------------
 1 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/pkg/blevex/bleve.go b/pkg/blevex/bleve.go
index 3afe948..40b334e 100644
--- a/pkg/blevex/bleve.go
+++ b/pkg/blevex/bleve.go
@@ -4,43 +4,34 @@
 	"fmt"
 	"github.com/blevesearch/bleve/v2"
 	"github.com/blevesearch/bleve/v2/mapping"
-	"github.com/yanyiwu/gojieba"
 	"sync"
 )
 
-// InitAnalyzer 鍔犺浇鑷畾涔夊垎璇嶅櫒锛堢粨宸村垎璇嶏級
+// InitAnalyzer 鍔犺浇鑷畾涔夊垎璇嶅櫒锛坰ego锛�
 
 var defaultAnalyzer *mapping.IndexMappingImpl
 
 func InitAnalyzer() {
 	indexMapping := bleve.NewIndexMapping()
-	//os.RemoveAll(IndexDir)
-	//// clean index when example finished
-	//defer os.RemoveAll(IndexDir)
-
-	err := indexMapping.AddCustomTokenizer("gojieba",
+	err := indexMapping.AddCustomTokenizer("sego",
 		map[string]interface{}{
-			"dictpath":     gojieba.DICT_PATH,
-			"hmmpath":      gojieba.HMM_PATH,
-			"userdictpath": gojieba.USER_DICT_PATH,
-			"idf":          gojieba.IDF_PATH,
-			"stop_words":   gojieba.STOP_WORDS_PATH,
-			"type":         "gojieba",
+			"dictpath": "conf/dictionary.txt", // 鏇挎崲涓哄疄闄呯殑瀛楀吀璺緞
+			"type":     "sego",
 		},
 	)
 	if err != nil {
 		panic(err)
 	}
-	err = indexMapping.AddCustomAnalyzer("gojieba",
+	err = indexMapping.AddCustomAnalyzer("sego",
 		map[string]interface{}{
-			"type":      "gojieba",
-			"tokenizer": "gojieba",
+			"type":      "sego",
+			"tokenizer": "sego",
 		},
 	)
 	if err != nil {
 		panic(err)
 	}
-	indexMapping.DefaultAnalyzer = "gojieba"
+	indexMapping.DefaultAnalyzer = "sego"
 	defaultAnalyzer = indexMapping
 }
 

--
Gitblit v1.8.0