From deb1fb612b471789f5452f9536421ebee0f36155 Mon Sep 17 00:00:00 2001 From: zhangqian <zhangqian@123.com> Date: 星期三, 22 十一月 2023 18:16:14 +0800 Subject: [PATCH] 换一个纯go实现的中文分词包 --- pkg/blevex/bleve.go | 25 ++++++++----------------- 1 files changed, 8 insertions(+), 17 deletions(-) diff --git a/pkg/blevex/bleve.go b/pkg/blevex/bleve.go index 3afe948..40b334e 100644 --- a/pkg/blevex/bleve.go +++ b/pkg/blevex/bleve.go @@ -4,43 +4,34 @@ "fmt" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/mapping" - "github.com/yanyiwu/gojieba" "sync" ) -// InitAnalyzer 鍔犺浇鑷畾涔夊垎璇嶅櫒锛堢粨宸村垎璇嶏級 +// InitAnalyzer 鍔犺浇鑷畾涔夊垎璇嶅櫒锛坰ego锛� var defaultAnalyzer *mapping.IndexMappingImpl func InitAnalyzer() { indexMapping := bleve.NewIndexMapping() - //os.RemoveAll(IndexDir) - //// clean index when example finished - //defer os.RemoveAll(IndexDir) - - err := indexMapping.AddCustomTokenizer("gojieba", + err := indexMapping.AddCustomTokenizer("sego", map[string]interface{}{ - "dictpath": gojieba.DICT_PATH, - "hmmpath": gojieba.HMM_PATH, - "userdictpath": gojieba.USER_DICT_PATH, - "idf": gojieba.IDF_PATH, - "stop_words": gojieba.STOP_WORDS_PATH, - "type": "gojieba", + "dictpath": "conf/dictionary.txt", // 鏇挎崲涓哄疄闄呯殑瀛楀吀璺緞 + "type": "sego", }, ) if err != nil { panic(err) } - err = indexMapping.AddCustomAnalyzer("gojieba", + err = indexMapping.AddCustomAnalyzer("sego", map[string]interface{}{ - "type": "gojieba", - "tokenizer": "gojieba", + "type": "sego", + "tokenizer": "sego", }, ) if err != nil { panic(err) } - indexMapping.DefaultAnalyzer = "gojieba" + indexMapping.DefaultAnalyzer = "sego" defaultAnalyzer = indexMapping } -- Gitblit v1.8.0