package blevex import ( "errors" "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/registry" ) func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { tokenizerName, ok := config["tokenizer"].(string) if !ok { return nil, errors.New("must specify tokenizer") } tokenizer, err := cache.TokenizerNamed(tokenizerName) if err != nil { return nil, err } jbtk, ok := tokenizer.(*JiebaTokenizer) if !ok { return nil, errors.New("tokenizer must be of type jieba") } alz := &JiebaAnalyzer{ Tokenizer: jbtk, } return alz, nil } func init() { registry.RegisterAnalyzer("gojieba", analyzerConstructor) } // JiebaAnalyzer from analysis.DefaultAnalyzer type JiebaAnalyzer struct { CharFilters []analysis.CharFilter Tokenizer *JiebaTokenizer TokenFilters []analysis.TokenFilter } func (a *JiebaAnalyzer) Analyze(input []byte) analysis.TokenStream { if a.CharFilters != nil { for _, cf := range a.CharFilters { input = cf.Filter(input) } } tokens := a.Tokenizer.Tokenize(input) if a.TokenFilters != nil { for _, tf := range a.TokenFilters { tokens = tf.Filter(tokens) } } return tokens } func (a *JiebaAnalyzer) Free() { if a.Tokenizer != nil { a.Tokenizer.Free() } else { panic("JiebaAnalyzer.Tokenizer is nil, this should not happen") } }