zhangqian
2024-12-13 233bd12749d48c0b2cd8c305e2428277bbe6b5fd
models/gather_model.go
@@ -4,6 +4,7 @@
   "bytes"
   "context"
   "encoding/json"
   "errors"
   "fmt"
   "github.com/elastic/go-elasticsearch/v6"
   "log"
@@ -29,8 +30,15 @@
   Task           *db.ModelTask
}
func (m *GatherModel) Init(task *db.ModelTask) error {
type ProcessedRecord struct {
   UniqueKey string    // 唯一标识
   Timestamp time.Time // 记录的时间戳
}
func (m *GatherModel) Init(task *db.ModelTask) error {
   if len(task.DomainUnitIds) == 0 {
      return errors.New("empty domain set")
   }
   orgIds, areaIds, err := service.GetOrgIdsAndAreaIdsByDomainUnitIds(task.DomainUnitIds)
   if err != nil {
      return err
@@ -63,7 +71,22 @@
   AppearInterval int    `gorm:"type:int;" json:"appearInterval"` //出现间隔,单位为秒
}
var (
   processed        map[string]ProcessedRecord         // 存储已处理记录
   cleanupThreshold = time.Now().Add(-100 * time.Hour) // 定义一个时间窗口,假设只保存最近100小时的记录
)
func init() {
   processed = make(map[string]ProcessedRecord)
}
func (m *GatherModel) Run() error {
   // 清理过期的记录
   for key, record := range processed {
      if record.Timestamp.Before(cleanupThreshold) {
         delete(processed, key)
      }
   }
   records, err := queryElasticsearch(db.GetEsClient(), m)
   if err != nil {
      log.Fatalf("Failed to query Elasticsearch: %v", err)
@@ -73,7 +96,27 @@
      return nil
   }
   aggregation, err := analyzeAndAggregate(records)
   newRecords := make([]*GatherRecord, 0)
   // 聚合逻辑
   for _, record := range records {
      // 生成唯一标识
      uniqueKey := fmt.Sprintf("%s-%s", record.DocumentNumber, record.PicDate)
      // 如果已经处理过,跳过
      if _, exists := processed[uniqueKey]; exists {
         continue
      }
      // 添加到已处理记录
      processed[uniqueKey] = ProcessedRecord{
         UniqueKey: uniqueKey,
         Timestamp: time.Now(),
      }
      newRecords = append(newRecords, record)
   }
   aggregation, err := analyzeAndAggregate(newRecords)
   if err != nil {
      log.Fatalf("Failed to analyze and aggregate data: %v", err)
   }
@@ -89,18 +132,19 @@
      return err
   }
   event := strings.Join(typeNames, ",")
   for location, persons := range aggregation {
   for lt, persons := range aggregation {
      result := &db.ModelTaskResults{
         Title:       m.Task.Name,
         Event:       fmt.Sprintf("%s/%d人", event, len(persons)),
         ModelID:     m.Task.ModelID,
         ModelTaskID: m.Task.ID,
         CommunityId: location.CommunityId,
         OrgID:       location.OrgId,
         CommunityId: lt.CommunityId,
         OrgID:       lt.OrgId,
         ObjectIds:   strings.Join(persons.Elements(), ","),
         Location:    location.Location,
         Building:    location.Building,
         Floor:       location.Floor,
         Location:    lt.Location,
         Building:    lt.Building,
         Floor:       lt.Floor,
         PicDate:     lt.Time,
      }
      results = append(results, result)
   }
@@ -113,7 +157,7 @@
   return nil
}
func queryElasticsearch(esClient *elasticsearch.Client, gatherModel *GatherModel) ([]GatherRecord, error) {
func queryElasticsearch(esClient *elasticsearch.Client, gatherModel *GatherModel) ([]*GatherRecord, error) {
   var buf bytes.Buffer
   now := time.Now()
   start := now.Add(-time.Duration(gatherModel.DaysWindow) * 24 * time.Hour)
@@ -227,6 +271,19 @@
                                             },
                                          },
                                       },
                                       "frequency_filter": map[string]interface{}{ // 添加频率过滤
                                          "bucket_selector": map[string]interface{}{
                                             "buckets_path": map[string]interface{}{
                                                "eventCount": "_count", // 聚合事件次数
                                             },
                                             "script": map[string]interface{}{
                                                "source": "params.eventCount >= params.threshold", // 筛选频率达到阈值的事件
                                                "params": map[string]interface{}{
                                                   "threshold": gatherModel.Threshold,
                                                },
                                             },
                                          },
                                       },
                                    },
                                 },
                              },
@@ -269,7 +326,7 @@
   }
   // 解析聚合结果
   var records []GatherRecord
   var records []*GatherRecord
   if aggs, ok := result["aggregations"].(map[string]interface{}); ok {
      if orgBuckets, ok := aggs["orgs"].(map[string]interface{})["buckets"].([]interface{}); ok {
         for _, orgBucket := range orgBuckets {
@@ -294,7 +351,7 @@
                              if gatherEvents, ok := floorBucket.(map[string]interface{})["gather_events"].(map[string]interface{})["buckets"].([]interface{}); ok {
                                 for _, eventBucket := range gatherEvents {
                                    key := int64(eventBucket.(map[string]interface{})["key"].(float64)) / 1000 // 将毫秒转换为秒
                                    timestamp := time.Unix(key, 0).Format("2006-01-02T15:04:05")
                                    timestamp := time.Unix(key, 0).UTC().Format("2006-01-02 15:04:05")
                                    // 解析人员
                                    if peopleBuckets, ok := eventBucket.(map[string]interface{})["people"].(map[string]interface{})["buckets"].([]interface{}); ok {
@@ -302,7 +359,7 @@
                                          documentNumber := person.(map[string]interface{})["key"].(string)
                                          // 构建 GatherRecord 结构体
                                          record := GatherRecord{
                                          record := &GatherRecord{
                                             PicDate:        timestamp,
                                             DocumentNumber: documentNumber,
                                             CommunityId:    communityId,
@@ -331,16 +388,17 @@
   return records, nil
}
type GatherLocation struct {
type GatherLocationTime struct {
   CommunityId string
   OrgId       string
   Building    string
   Floor       string
   Location    string
   Time        string
}
func analyzeAndAggregate(records []GatherRecord) (map[GatherLocation]set.StringSet, error) {
   aggregation := make(map[GatherLocation]set.StringSet)
func analyzeAndAggregate(records []*GatherRecord) (map[GatherLocationTime]set.StringSet, error) {
   aggregation := make(map[GatherLocationTime]set.StringSet)
   domainIds := set.NewStringSet()
   for _, record := range records {
      domainIds.Add(record.CommunityId)
@@ -352,16 +410,20 @@
   }
   for _, record := range records {
      if record.DocumentNumber == "" {
         continue
      }
      if domains[record.CommunityId] == nil {
         continue
      }
      location := GatherLocation{
      location := GatherLocationTime{
         CommunityId: record.CommunityId,
         OrgId:       record.OrgId,
         Building:    record.Building,
         Floor:       record.Floor,
         Location:    fmt.Sprintf("%s%s%s", domains[record.CommunityId].Name, record.Building, record.Floor),
         Time:        record.PicDate,
      }
      if aggregation[location] == nil {
         aggregation[location] = set.NewStringSet()