package blevex
|
|
import (
|
"errors"
|
|
"github.com/blevesearch/bleve/v2/analysis"
|
"github.com/blevesearch/bleve/v2/registry"
|
)
|
|
// SegoAnalyzer from analysis.DefaultAnalyzer
|
type SegoAnalyzer struct {
|
CharFilters []analysis.CharFilter
|
Tokenizer *SegoTokenizer
|
TokenFilters []analysis.TokenFilter
|
}
|
|
func (a *SegoAnalyzer) Analyze(input []byte) analysis.TokenStream {
|
if a.CharFilters != nil {
|
for _, cf := range a.CharFilters {
|
input = cf.Filter(input)
|
}
|
}
|
tokens := a.Tokenizer.Tokenize(input)
|
if a.TokenFilters != nil {
|
for _, tf := range a.TokenFilters {
|
tokens = tf.Filter(tokens)
|
}
|
}
|
return tokens
|
}
|
|
func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
|
tokenizerName, ok := config["tokenizer"].(string)
|
if !ok {
|
return nil, errors.New("must specify tokenizer")
|
}
|
tokenizer, err := cache.TokenizerNamed(tokenizerName)
|
if err != nil {
|
return nil, err
|
}
|
|
segoTokenizer, ok := tokenizer.(*SegoTokenizer)
|
if !ok {
|
return nil, errors.New("tokenizer must be of type sego")
|
}
|
alz := &SegoAnalyzer{
|
Tokenizer: segoTokenizer,
|
}
|
return alz, nil
|
}
|
|
func init() {
|
registry.RegisterAnalyzer("sego", analyzerConstructor)
|
}
|