| | |
| | | github.com/gin-gonic/gin v1.9.0 |
| | | github.com/golang-jwt/jwt/v4 v4.5.0 |
| | | github.com/google/uuid v1.3.1 |
| | | github.com/huichen/sego v0.0.0-20210824061530-c87651ea5c76 |
| | | github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 |
| | | github.com/nsqio/go-nsq v1.1.0 |
| | | github.com/open-policy-agent/opa v0.57.1 |
| | |
| | | github.com/swaggo/gin-swagger v1.6.0 |
| | | github.com/swaggo/swag v1.16.1 |
| | | github.com/xuri/excelize/v2 v2.8.0 |
| | | github.com/yanyiwu/gojieba v1.3.0 |
| | | go.uber.org/zap v1.24.0 |
| | | golang.org/x/crypto v0.15.0 |
| | | google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 |
| | |
| | | github.com/KyleBanks/depth v1.2.1 // indirect |
| | | github.com/OneOfOne/xxhash v1.2.8 // indirect |
| | | github.com/RoaringBitmap/roaring v1.2.3 // indirect |
| | | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect |
| | | github.com/agnivade/levenshtein v1.1.1 // indirect |
| | | github.com/beorn7/perks v1.0.1 // indirect |
| | | github.com/bits-and-blooms/bitset v1.2.0 // indirect |
| | |
| | | github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= |
| | | github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY= |
| | | github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE= |
| | | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew= |
| | | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:PRWNwWq0yifz6XDPZu48aSld8BWwBfr2JKB2bGWiEd4= |
| | | github.com/adamzy/sego v0.0.0-20151004184924-5eab9a44f8e8/go.mod h1:KQxo+Xesl2wLJ3yJcX443KaoWzXpbPzU1GNRyE8kNEY= |
| | | github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8= |
| | | github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo= |
| | | github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= |
| | |
| | | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= |
| | | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= |
| | | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= |
| | | github.com/huichen/sego v0.0.0-20210824061530-c87651ea5c76 h1:qNQ2+1IQT9Mor/vfEHePOQSbiapLoNI7sQmpxM7l1Ew= |
| | | github.com/huichen/sego v0.0.0-20210824061530-c87651ea5c76/go.mod h1:Fymg8+khR/cKSuIwqRxy/jmZg7PIPLk7CauXzrbcMUM= |
| | | github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= |
| | | github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= |
| | | github.com/issue9/assert v1.4.1 h1:gUtOpMTeaE4JTe9kACma5foOHBvVt1p5XTFrULDwdXI= |
| | | github.com/issue9/assert v1.4.1/go.mod h1:Yktk83hAVl1SPSYtd9kjhBizuiBIqUQyj+D5SE2yjVY= |
| | | github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= |
| | | github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= |
| | | github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= |
| | |
| | | github.com/xuri/nfp v0.0.0-20230819163627-dc951e3ffe1a/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= |
| | | github.com/xuri/nfp v0.0.0-20230919160717-d98342af3f05 h1:qhbILQo1K3mphbwKh1vNm4oGezE1eF9fQWmNiIpSfI4= |
| | | github.com/xuri/nfp v0.0.0-20230919160717-d98342af3f05/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= |
| | | github.com/yanyiwu/gojieba v1.3.0 h1:6VeaPOR+MawnImdeSvWNr7rP4tvUfnGlEKaoBnR33Ds= |
| | | github.com/yanyiwu/gojieba v1.3.0/go.mod h1:54wkP7sMJ6bklf7yPl6F+JG71dzVUU1WigZbR47nGdY= |
| | | github.com/yashtewari/glob-intersection v0.2.0 h1:8iuHdN88yYuCzCdjt0gDe+6bAhUwBeEWqThExu54RFg= |
| | | github.com/yashtewari/glob-intersection v0.2.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok= |
| | | github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= |
| | |
| | | "github.com/blevesearch/bleve/v2/registry" |
| | | ) |
| | | |
| | | func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { |
| | | tokenizerName, ok := config["tokenizer"].(string) |
| | | if !ok { |
| | | return nil, errors.New("must specify tokenizer") |
| | | } |
| | | tokenizer, err := cache.TokenizerNamed(tokenizerName) |
| | | if err != nil { |
| | | return nil, err |
| | | } |
| | | |
| | | jbtk, ok := tokenizer.(*JiebaTokenizer) |
| | | if !ok { |
| | | return nil, errors.New("tokenizer must be of type jieba") |
| | | } |
| | | alz := &JiebaAnalyzer{ |
| | | Tokenizer: jbtk, |
| | | } |
| | | return alz, nil |
| | | } |
| | | |
| | | func init() { |
| | | registry.RegisterAnalyzer("gojieba", analyzerConstructor) |
| | | } |
| | | |
| | | // JiebaAnalyzer from analysis.DefaultAnalyzer |
| | | type JiebaAnalyzer struct { |
| | | // SegoAnalyzer from analysis.DefaultAnalyzer |
| | | type SegoAnalyzer struct { |
| | | CharFilters []analysis.CharFilter |
| | | Tokenizer *JiebaTokenizer |
| | | Tokenizer *SegoTokenizer |
| | | TokenFilters []analysis.TokenFilter |
| | | } |
| | | |
| | | func (a *JiebaAnalyzer) Analyze(input []byte) analysis.TokenStream { |
| | | func (a *SegoAnalyzer) Analyze(input []byte) analysis.TokenStream { |
| | | if a.CharFilters != nil { |
| | | for _, cf := range a.CharFilters { |
| | | input = cf.Filter(input) |
| | |
| | | return tokens |
| | | } |
| | | |
| | | func (a *JiebaAnalyzer) Free() { |
| | | if a.Tokenizer != nil { |
| | | a.Tokenizer.Free() |
| | | } else { |
| | | panic("JiebaAnalyzer.Tokenizer is nil, this should not happen") |
| | | func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { |
| | | tokenizerName, ok := config["tokenizer"].(string) |
| | | if !ok { |
| | | return nil, errors.New("must specify tokenizer") |
| | | } |
| | | tokenizer, err := cache.TokenizerNamed(tokenizerName) |
| | | if err != nil { |
| | | return nil, err |
| | | } |
| | | |
| | | segoTokenizer, ok := tokenizer.(*SegoTokenizer) |
| | | if !ok { |
| | | return nil, errors.New("tokenizer must be of type sego") |
| | | } |
| | | alz := &SegoAnalyzer{ |
| | | Tokenizer: segoTokenizer, |
| | | } |
| | | return alz, nil |
| | | } |
| | | |
| | | func init() { |
| | | registry.RegisterAnalyzer("sego", analyzerConstructor) |
| | | } |
| | |
| | | "fmt" |
| | | "github.com/blevesearch/bleve/v2" |
| | | "github.com/blevesearch/bleve/v2/mapping" |
| | | "github.com/yanyiwu/gojieba" |
| | | "sync" |
| | | ) |
| | | |
| | | // InitAnalyzer 加载自定义分词器(结巴分词) |
| | | // InitAnalyzer 加载自定义分词器(sego) |
| | | |
| | | var defaultAnalyzer *mapping.IndexMappingImpl |
| | | |
| | | func InitAnalyzer() { |
| | | indexMapping := bleve.NewIndexMapping() |
| | | //os.RemoveAll(IndexDir) |
| | | //// clean index when example finished |
| | | //defer os.RemoveAll(IndexDir) |
| | | |
| | | err := indexMapping.AddCustomTokenizer("gojieba", |
| | | err := indexMapping.AddCustomTokenizer("sego", |
| | | map[string]interface{}{ |
| | | "dictpath": gojieba.DICT_PATH, |
| | | "hmmpath": gojieba.HMM_PATH, |
| | | "userdictpath": gojieba.USER_DICT_PATH, |
| | | "idf": gojieba.IDF_PATH, |
| | | "stop_words": gojieba.STOP_WORDS_PATH, |
| | | "type": "gojieba", |
| | | "dictpath": "conf/dictionary.txt", // 替换为实际的字典路径 |
| | | "type": "sego", |
| | | }, |
| | | ) |
| | | if err != nil { |
| | | panic(err) |
| | | } |
| | | err = indexMapping.AddCustomAnalyzer("gojieba", |
| | | err = indexMapping.AddCustomAnalyzer("sego", |
| | | map[string]interface{}{ |
| | | "type": "gojieba", |
| | | "tokenizer": "gojieba", |
| | | "type": "sego", |
| | | "tokenizer": "sego", |
| | | }, |
| | | ) |
| | | if err != nil { |
| | | panic(err) |
| | | } |
| | | indexMapping.DefaultAnalyzer = "gojieba" |
| | | indexMapping.DefaultAnalyzer = "sego" |
| | | defaultAnalyzer = indexMapping |
| | | } |
| | | |
| | |
| | | |
| | | import ( |
| | | "errors" |
| | | "github.com/huichen/sego" |
| | | |
| | | "github.com/blevesearch/bleve/v2/analysis" |
| | | "github.com/blevesearch/bleve/v2/registry" |
| | | "github.com/yanyiwu/gojieba" |
| | | ) |
| | | |
| | | type JiebaTokenizer struct { |
| | | handle *gojieba.Jieba |
| | | type SegoTokenizer struct { |
| | | segmenter sego.Segmenter |
| | | } |
| | | |
| | | var _ analysis.Tokenizer = &JiebaTokenizer{} |
| | | var _ analysis.Tokenizer = &SegoTokenizer{} |
| | | |
| | | func NewJiebaTokenizer(dictpath, hmmpath, userdictpath, idf, stop_words string) *JiebaTokenizer { |
| | | x := gojieba.NewJieba(dictpath, hmmpath, userdictpath, idf, stop_words) |
| | | return &JiebaTokenizer{x} |
| | | func NewSegoTokenizer(dictpath string) *SegoTokenizer { |
| | | segmenter := sego.Segmenter{} |
| | | segmenter.LoadDictionary(dictpath) |
| | | return &SegoTokenizer{segmenter: segmenter} |
| | | } |
| | | |
| | | func (x *JiebaTokenizer) Free() { |
| | | x.handle.Free() |
| | | } |
| | | |
| | | // Analyze([]byte) TokenStream |
| | | func (x *JiebaTokenizer) Tokenize(sentence []byte) analysis.TokenStream { |
| | | func (st *SegoTokenizer) Tokenize(sentence []byte) analysis.TokenStream { |
| | | result := make(analysis.TokenStream, 0) |
| | | pos := 1 |
| | | words := x.handle.Tokenize(string(sentence), gojieba.SearchMode, true) |
| | | for _, word := range words { |
| | | segments := st.segmenter.Segment(sentence) |
| | | for _, segment := range segments { |
| | | token := analysis.Token{ |
| | | Term: []byte(word.Str), |
| | | Start: word.Start, |
| | | End: word.End, |
| | | Term: []byte(segment.Token().Text()), |
| | | Start: segment.Start(), |
| | | End: segment.End(), |
| | | Position: pos, |
| | | Type: analysis.Ideographic, |
| | | } |
| | |
| | | if !ok { |
| | | return nil, errors.New("config dictpath not found") |
| | | } |
| | | hmmpath, ok := config["hmmpath"].(string) |
| | | if !ok { |
| | | return nil, errors.New("config hmmpath not found") |
| | | } |
| | | userdictpath, ok := config["userdictpath"].(string) |
| | | if !ok { |
| | | return nil, errors.New("config userdictpath not found") |
| | | } |
| | | idf, ok := config["idf"].(string) |
| | | if !ok { |
| | | return nil, errors.New("config idf not found") |
| | | } |
| | | stop_words, ok := config["stop_words"].(string) |
| | | if !ok { |
| | | return nil, errors.New("config stop_words not found") |
| | | } |
| | | return NewJiebaTokenizer(dictpath, hmmpath, userdictpath, idf, stop_words), nil |
| | | return NewSegoTokenizer(dictpath), nil |
| | | } |
| | | |
| | | func init() { |
| | | registry.RegisterTokenizer("gojieba", tokenizerConstructor) |
| | | registry.RegisterTokenizer("sego", tokenizerConstructor) |
| | | } |