admin管理员组

文章数量:1123197

I am trying to figure out how to index nested documents in Solr 8. I have found examples that talk about doing this but none provide a schema.xml.

schema.xml

<schema name="example-data-driven-schema" version="1.6">
  
  <fields>
        <field name="_version_" type="long" indexed="true" stored="true" required="true"/> 
        <field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
        <field name="id" type="string" indexed="true" stored="true" required="true"/>
        <field name="title" type="text_general" indexed="true" stored="true"/>
        <field name="author" type="text_general" indexed="true" stored="true"/>
        <field name="comment" type="text_general" indexed="true" stored="true"/>
        <field name="commenter" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_name" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_role" type="text_general" indexed="true" stored="true"/>
        <field name="_nest_path_" type="_nest_path_" />
        <field name="_nest_parent_" type="string" indexed="true" stored="true" />
    
    <dynamicField name="*" type="ignored"/>

  </fields>  
  
  <uniqueKey>id</uniqueKey>
 
  
  <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  <fieldType name="_nest_path_" class="solr.NestPathField" />
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="0"/>
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
  <fieldType name="tdates" class="solr.TrieDateField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="6"/>
  <fieldType name="tdoubles" class="solr.TrieDoubleField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"/>
  <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>

</schema>

I am adding documents with this command:

curl -X POST -k -H 'Content-Type: application/json' -i 'http://localhost:8983/solr/nested_documents_example/update?commit=true' --data '[
  {
    "id": "post101",
    "title": "How to Optimize Solr Queries",
    "author": "Mike Johnson",
    "_nest_parent_": "post101",
    "_childDocuments_": [
      {
        "id": "comment101",
        "comment": "This article helped me a lot!",
        "commenter": "Sophie"
      },
      {
        "id": "contributor101",
        "contributor_name": "Karen",
        "contributor_role": "Reviewer"
      }
    ]
  },
  {
    "id": "post102",
    "title": "Advanced Solr Schema Design",
    "author": "Sarah Brown",
    "_nest_parent_": "post102",
    "_childDocuments_": [
      {
        "id": "comment102",
        "comment": "Great schema design tips!",
        "commenter": "James"
      }
    ]
  }
]'

This is what I get when I query Solr (http://localhost:8983/solr/nested_documents_example/select?q=*:*):

{

      "responseHeader": {
            "zkConnected": true,
            "status": 0,
            "QTime": 0,
            "params": {
                  "q": "*:*",
                  "indent": "true",
                  "q.op": "OR"
            }
      },
      "response": {
            "numFound": 5,
            "start": 0,
            "numFoundExact": true,
            "docs": [
                  {
                        "id": "comment101",
                        "comment": "This article helped me a lot!",
                        "commenter": "Sophie",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "contributor101",
                        "contributor_name": "Karen",
                        "contributor_role": "Reviewer",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "post101",
                        "title": "How to Optimize Solr Queries",
                        "author": "Mike Johnson",
                        "_nest_parent_": "post101",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "comment102",
                        "comment": "Great schema design tips!",
                        "commenter": "James",
                        "_version_": 1820876636939092000
                  },
                  {
                        "id": "post102",
                        "title": "Advanced Solr Schema Design",
                        "author": "Sarah Brown",
                        "_nest_parent_": "post102",
                        "_version_": 1820876636939092000
                  }
            ]
      }

}

Instead of nested results, everything is at the top level (flattened). Can anyone shed light on what I'm doing wrong?

I am trying to figure out how to index nested documents in Solr 8. I have found examples that talk about doing this but none provide a schema.xml.

schema.xml

<schema name="example-data-driven-schema" version="1.6">
  
  <fields>
        <field name="_version_" type="long" indexed="true" stored="true" required="true"/> 
        <field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
        <field name="id" type="string" indexed="true" stored="true" required="true"/>
        <field name="title" type="text_general" indexed="true" stored="true"/>
        <field name="author" type="text_general" indexed="true" stored="true"/>
        <field name="comment" type="text_general" indexed="true" stored="true"/>
        <field name="commenter" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_name" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_role" type="text_general" indexed="true" stored="true"/>
        <field name="_nest_path_" type="_nest_path_" />
        <field name="_nest_parent_" type="string" indexed="true" stored="true" />
    
    <dynamicField name="*" type="ignored"/>

  </fields>  
  
  <uniqueKey>id</uniqueKey>
 
  
  <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  <fieldType name="_nest_path_" class="solr.NestPathField" />
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="0"/>
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
  <fieldType name="tdates" class="solr.TrieDateField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="6"/>
  <fieldType name="tdoubles" class="solr.TrieDoubleField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"/>
  <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>

</schema>

I am adding documents with this command:

curl -X POST -k -H 'Content-Type: application/json' -i 'http://localhost:8983/solr/nested_documents_example/update?commit=true' --data '[
  {
    "id": "post101",
    "title": "How to Optimize Solr Queries",
    "author": "Mike Johnson",
    "_nest_parent_": "post101",
    "_childDocuments_": [
      {
        "id": "comment101",
        "comment": "This article helped me a lot!",
        "commenter": "Sophie"
      },
      {
        "id": "contributor101",
        "contributor_name": "Karen",
        "contributor_role": "Reviewer"
      }
    ]
  },
  {
    "id": "post102",
    "title": "Advanced Solr Schema Design",
    "author": "Sarah Brown",
    "_nest_parent_": "post102",
    "_childDocuments_": [
      {
        "id": "comment102",
        "comment": "Great schema design tips!",
        "commenter": "James"
      }
    ]
  }
]'

This is what I get when I query Solr (http://localhost:8983/solr/nested_documents_example/select?q=*:*):

{

      "responseHeader": {
            "zkConnected": true,
            "status": 0,
            "QTime": 0,
            "params": {
                  "q": "*:*",
                  "indent": "true",
                  "q.op": "OR"
            }
      },
      "response": {
            "numFound": 5,
            "start": 0,
            "numFoundExact": true,
            "docs": [
                  {
                        "id": "comment101",
                        "comment": "This article helped me a lot!",
                        "commenter": "Sophie",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "contributor101",
                        "contributor_name": "Karen",
                        "contributor_role": "Reviewer",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "post101",
                        "title": "How to Optimize Solr Queries",
                        "author": "Mike Johnson",
                        "_nest_parent_": "post101",
                        "_version_": 1820876636938043400
                  },
                  {
                        "id": "comment102",
                        "comment": "Great schema design tips!",
                        "commenter": "James",
                        "_version_": 1820876636939092000
                  },
                  {
                        "id": "post102",
                        "title": "Advanced Solr Schema Design",
                        "author": "Sarah Brown",
                        "_nest_parent_": "post102",
                        "_version_": 1820876636939092000
                  }
            ]
      }

}

Instead of nested results, everything is at the top level (flattened). Can anyone shed light on what I'm doing wrong?

Share Improve this question edited 8 hours ago Mark Sholund asked 8 hours ago Mark SholundMark Sholund 1,3023 gold badges18 silver badges33 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 0

Here's what I came up with, it seems to work:

schema.xml

<schema name="example-data-driven-schema" version="1.6">
  <fields>
        <field name="_version_" type="long" indexed="true" stored="true" required="true"/> 
        <field name="_root_" type="string" indexed="true" />
        <field name="id" type="string" indexed="true" stored="true" required="true"/>
        <field name="title" type="text_general" indexed="true" stored="true"/>
        <field name="author" type="text_general" indexed="true" stored="true"/>
        <field name="comment" type="text_general" indexed="true" stored="true"/>
        <field name="commenter" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_name" type="text_general" indexed="true" stored="true"/>
        <field name="contributor_role" type="text_general" indexed="true" stored="true"/>
        <field name="_nest_path_" type="_nest_path_" />
        <field name="_nest_parent_" type="string"/>
        <dynamicField name="*" type="ignored"/>
  </fields>  
  <uniqueKey>id</uniqueKey>
  <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  <fieldType name="_nest_path_" class="solr.NestPathField" />
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" precisionStep="0"/>
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
  <fieldType name="tdates" class="solr.TrieDateField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="6"/>
  <fieldType name="tdoubles" class="solr.TrieDoubleField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"/>
  <fieldType name="tlongs" class="solr.TrieLongField" positionIncrementGap="0" docValues="true" multiValued="true" precisionStep="8"/>
</schema>

Insert these documents:

[
  {
    "id": "post101",
    "title": "How to Optimize Solr Queries",
    "author": "Mike Johnson",
    "comments": [
      {
        "id": "comment101",
        "comment": "This article helped me a lot!",
        "commenter": "Sophie"
      }],
      "contributors": [{
        "id": "contributor101",
        "contributor_name": "Karen",
        "contributor_role": "Reviewer"
      }
    ]
  },
  {
    "id": "post102",
    "title": "Advanced Solr Schema Design",
    "author": "Sarah Brown",
    "comments": [
      {
        "id": "comment102",
        "comment": "Great schema design tips!",
        "commenter": "James"
      }
    ]
  }
]

The select that I did is correct in returning everything "flattened" - the problem was that I needed to add fl=*,[child] to the request. After doing that, my results are:

.
.
.
docs": [

          {
                "id": "comment101",
                "comment": "This article helped me a lot!",
                "commenter": "Sophie",
                "_nest_parent_": "post101",
                "_root_": "post101",
                "_version_": 1820881500429615000
          },
          {
                "id": "contributor101",
                "contributor_name": "Karen",
                "contributor_role": "Reviewer",
                "_nest_parent_": "post101",
                "_root_": "post101",
                "_version_": 1820881500429615000
          },
          {
                "id": "post101",
                "title": "How to Optimize Solr Queries",
                "author": "Mike Johnson",
                "_version_": 1820881500429615000,
                "_root_": "post101",
                "comments": [
                      {
                            "id": "comment101",
                            "comment": "This article helped me a lot!",
                            "commenter": "Sophie",
                            "_nest_parent_": "post101",
                            "_root_": "post101",
                            "_version_": 1820881500429615000
                      }
                ],
                "contributors": [
                      {
                            "id": "contributor101",
                            "contributor_name": "Karen",
                            "contributor_role": "Reviewer",
                            "_nest_parent_": "post101",
                            "_root_": "post101",
                            "_version_": 1820881500429615000
                      }
                ]
          },
          {
                "id": "comment102",
                "comment": "Great schema design tips!",
                "commenter": "James",
                "_nest_parent_": "post102",
                "_root_": "post102",
                "_version_": 1820881500430663700
          },
          {
                "id": "post102",
                "title": "Advanced Solr Schema Design",
                "author": "Sarah Brown",
                "_version_": 1820881500430663700,
                "_root_": "post102",
                "comments": [
                      {
                            "id": "comment102",
                            "comment": "Great schema design tips!",
                            "commenter": "James",
                            "_nest_parent_": "post102",
                            "_root_": "post102",
                            "_version_": 1820881500430663700
                      }
                ]
          }

    ]

本文标签: luceneSolr Nested DocumentsStack Overflow