admin管理员组

文章数量:1278978

I am querying product links from an Elasticsearch database, where I previously imported results from various online shops. The query returns 123 records, but many products are sets, different colors, or similar variations. I want to limit the results to a maximum of 3 products per shop. However, all attempts still return the full list. How can I modify my query to achieve this per-shop limit? Here is my current query:

    {
      "size": 100,
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "key": {
                  "query": "Some Keyword",
                  "fuzziness": "1",
                  "prefix_length": "0",
                  "max_expansions": 3,
                  "operator": "and",
                  "analyzer": "product_analyzer"
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "group_by_shop": {
          "terms": {
            "field": "shop_id.keyword",
            "size": 100
          },
          "aggs": {
            "top_hits_per_shop": {
              "top_hits": {
                "size": 100,
                "sort": [
                  { "_score": { "order": "desc" } }
                ]
              }
            }
          }
        }
      }
    }

I am querying product links from an Elasticsearch database, where I previously imported results from various online shops. The query returns 123 records, but many products are sets, different colors, or similar variations. I want to limit the results to a maximum of 3 products per shop. However, all attempts still return the full list. How can I modify my query to achieve this per-shop limit? Here is my current query:

    {
      "size": 100,
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "key": {
                  "query": "Some Keyword",
                  "fuzziness": "1",
                  "prefix_length": "0",
                  "max_expansions": 3,
                  "operator": "and",
                  "analyzer": "product_analyzer"
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "group_by_shop": {
          "terms": {
            "field": "shop_id.keyword",
            "size": 100
          },
          "aggs": {
            "top_hits_per_shop": {
              "top_hits": {
                "size": 100,
                "sort": [
                  { "_score": { "order": "desc" } }
                ]
              }
            }
          }
        }
      }
    }
Share Improve this question edited Feb 24 at 8:29 Paulo 10.7k5 gold badges23 silver badges37 bronze badges asked Feb 23 at 22:19 Michael MüllerMichael Müller 4018 silver badges23 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 0

Tdlr;

I think you are super close to what you want to achieve. You might want to tweak the size parameters for both the query and the top-hit aggregation.

Demo

Set up

Below is a small dataset for running the demo

DELETE 79462081

POST 79462081/_bulk
{"index":{}}
{"shop_id": 1, "product": "product_1", "priority": 1}
{"index":{}}
{"shop_id": 1, "product": "product_2", "priority": 2}
{"index":{}}
{"shop_id": 1, "product": "product_3", "priority": 3}
{"index":{}}
{"shop_id": 1, "product": "product_4", "priority": 4}
{"index":{}}
{"shop_id": 2, "product": "product_1", "priority": 1}
{"index":{}}
{"shop_id": 2, "product": "product_2", "priority": 2}
{"index":{}}
{"shop_id": 2, "product": "product_3", "priority": 3}
{"index":{}}
{"shop_id": 2, "product": "product_4", "priority": 4}

Query

Here is a variation of your query

GET 79462081/_search
{
  "size": 0,   # The result for the search query do not really interest me. 
  "query": {
    "match_all": {}. # For demo purposes I am matching the whole dataset
  },
  "aggs": {
    "top_shops": {
      "terms": {
        "field": "shop_id" 
      },
      "aggs": {
        "top_products": {
          "top_hits": {
            "size": 2, # I only want to top best product with regard to priority, but you might also use _score
            "sort": { "priority": { "order": "desc" } }
          }
        }
      }
    }
  }
}

Results

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 8,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [].  # Notice how this section is empty, while "value" is 8, this is because of the size: 0. Documents are found, used in the aggregation, but not returned.
  },
  "aggregations": {
    "top_shops": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 1,
          "doc_count": 4,
          "top_products": {
            "hits": {
              "total": {
                "value": 4,
                "relation": "eq"
              },
              "max_score": null,
              "hits": [ # I only get the top 2 documents with regards to the priority, out of the 4 that were matched.
                {
                  "_index": "79462081",
                  "_id": "CnUaN5UBJxYdI_ZD5ifg",
                  "_score": null,
                  "_source": {
                    "shop_id": 1,
                    "product": "product_4",
                    "priority": 4
                  },
                  "sort": [
                    4
                  ]
                },
                {
                  "_index": "79462081",
                  "_id": "CXUaN5UBJxYdI_ZD5ifg",
                  "_score": null,
                  "_source": {
                    "shop_id": 1,
                    "product": "product_3",
                    "priority": 3
                  },
                  "sort": [
                    3
                  ]
                }
              ]
            }
          }
        },
        {
          "key": 2,
          "doc_count": 4,
          "top_products": {
            "hits": {
              "total": {
                "value": 4,
                "relation": "eq"
              },
              "max_score": null,
              "hits": [
                {
                  "_index": "79462081",
                  "_id": "DnUaN5UBJxYdI_ZD5ifg",
                  "_score": null,
                  "_source": {
                    "shop_id": 2,
                    "product": "product_4",
                    "priority": 4
                  },
                  "sort": [
                    4
                  ]
                },
                {
                  "_index": "79462081",
                  "_id": "DXUaN5UBJxYdI_ZD5ifg",
                  "_score": null,
                  "_source": {
                    "shop_id": 2,
                    "product": "product_3",
                    "priority": 3
                  },
                  "sort": [
                    3
                  ]
                }
              ]
            }
          }
        }
      ]
    }
  }
}

本文标签: elasticsearchElastic Search 8 queryStack Overflow