package blevex import ( "errors" "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/registry" ) // SegoAnalyzer from analysis.DefaultAnalyzer type SegoAnalyzer struct { CharFilters []analysis.CharFilter Tokenizer *SegoTokenizer TokenFilters []analysis.TokenFilter } func (a *SegoAnalyzer) Analyze(input []byte) analysis.TokenStream { if a.CharFilters != nil { for _, cf := range a.CharFilters { input = cf.Filter(input) } } tokens := a.Tokenizer.Tokenize(input) if a.TokenFilters != nil { for _, tf := range a.TokenFilters { tokens = tf.Filter(tokens) } } return tokens } func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { tokenizerName, ok := config["tokenizer"].(string) if !ok { return nil, errors.New("must specify tokenizer") } tokenizer, err := cache.TokenizerNamed(tokenizerName) if err != nil { return nil, err } segoTokenizer, ok := tokenizer.(*SegoTokenizer) if !ok { return nil, errors.New("tokenizer must be of type sego") } alz := &SegoAnalyzer{ Tokenizer: segoTokenizer, } return alz, nil } func init() { registry.RegisterAnalyzer("sego", analyzerConstructor) }