admin管理员组文章数量:1122826
Need to parse nested XML in Snowflake. Example:
WITH xml_table
AS (
SELECT 1 AS ID
, PARSE_XML(
'<root>
<docs>
<doc>
<Id>1</Id>
<Name>
<Kvp>
<Key>k1</Key>
<Value>v1</Value>
</Kvp>
<Kvp>
<Key>k2</Key>
<Value>v2</Value>
</Kvp>
<Kvp>
<Key>k3</Key>
<Value>v3</Value>
</Kvp>
</Name>
</doc>
<doc>
<Id>2</Id>
<Name>
<Kvp>
<Key>k4</Key>
<Value>v4</Value>
</Kvp>
<Kvp>
<Key>k5</Key>
<Value>v5</Value>
</Kvp>
</Name>
</doc>
<doc>
<Id>3</Id>
<Name>
<Kvp>
<Key>k6</Key>
<Value>v6</Value>
</Kvp>
</Name>
</doc>
</docs>
</root>'
) AS XML_COL
)
SELECT docs.ID
, docs.docId
--, docs.docName
, GET(XMLGET(kvps.VALUE, 'Key'), '$')::STRING AS docNameKey
, GET(XMLGET(kvps.VALUE, 'Value'), '$')::STRING AS docNameValue
--, kvps.*
FROM --docs
(
SELECT xml_table.ID
, xml_table.XML_COL
, GET(xml_table.XML_COL, '@')::STRING AS ROOT_NODE_NAME
, XMLGET(docs.VALUE, 'Id') : "$"::STRING AS docId
, XMLGET(docs.VALUE, 'Name') AS docName
FROM xml_table
, LATERAL FLATTEN(GET(XMLGET(xml_table.XML_COL, 'docs'), '$')) AS docs
WHERE 1 = 1
) docs
, LATERAL FLATTEN(GET(docs.docName, '$')) AS kvps
WHERE 1 = 1
So i need to parse it to table format:
doc.Id AS docId
doc.Name.Kvp.Key AS docNameKey
doc.Name.Kvp.Value AS docNameValue
The given example seesm to work as expected when there is more than one key-value-pair under KVP(like the docId IN (1,2) in the example), but then when there is one key-value-pair then the result is not as expected. Why?
The query returns:
ID DOCID DOCNAMEKEY DOCNAMEVALUE
1 1 k1 v1
1 1 k2 v2
1 1 k3 v3
1 2 k4 v4
1 2 k5 v5
1 3 null null
1 3 null null
1 3 null null
1 3 null null
As you see the docId=3 has been "blown" to 4 records and no key-value-pair is found. But it has one key value pair k6-v6.
So the expected output (what i would like to get) is:
ID DOCID DOCNAMEKEY DOCNAMEVALUE
1 1 k1 v1
1 1 k2 v2
1 1 k3 v3
1 2 k4 v4
1 2 k5 v5
1 3 k6 v6
Why does the xml parsing depend on the number of sub-elements?
Need to parse nested XML in Snowflake. Example:
WITH xml_table
AS (
SELECT 1 AS ID
, PARSE_XML(
'<root>
<docs>
<doc>
<Id>1</Id>
<Name>
<Kvp>
<Key>k1</Key>
<Value>v1</Value>
</Kvp>
<Kvp>
<Key>k2</Key>
<Value>v2</Value>
</Kvp>
<Kvp>
<Key>k3</Key>
<Value>v3</Value>
</Kvp>
</Name>
</doc>
<doc>
<Id>2</Id>
<Name>
<Kvp>
<Key>k4</Key>
<Value>v4</Value>
</Kvp>
<Kvp>
<Key>k5</Key>
<Value>v5</Value>
</Kvp>
</Name>
</doc>
<doc>
<Id>3</Id>
<Name>
<Kvp>
<Key>k6</Key>
<Value>v6</Value>
</Kvp>
</Name>
</doc>
</docs>
</root>'
) AS XML_COL
)
SELECT docs.ID
, docs.docId
--, docs.docName
, GET(XMLGET(kvps.VALUE, 'Key'), '$')::STRING AS docNameKey
, GET(XMLGET(kvps.VALUE, 'Value'), '$')::STRING AS docNameValue
--, kvps.*
FROM --docs
(
SELECT xml_table.ID
, xml_table.XML_COL
, GET(xml_table.XML_COL, '@')::STRING AS ROOT_NODE_NAME
, XMLGET(docs.VALUE, 'Id') : "$"::STRING AS docId
, XMLGET(docs.VALUE, 'Name') AS docName
FROM xml_table
, LATERAL FLATTEN(GET(XMLGET(xml_table.XML_COL, 'docs'), '$')) AS docs
WHERE 1 = 1
) docs
, LATERAL FLATTEN(GET(docs.docName, '$')) AS kvps
WHERE 1 = 1
So i need to parse it to table format:
doc.Id AS docId
doc.Name.Kvp.Key AS docNameKey
doc.Name.Kvp.Value AS docNameValue
The given example seesm to work as expected when there is more than one key-value-pair under KVP(like the docId IN (1,2) in the example), but then when there is one key-value-pair then the result is not as expected. Why?
The query returns:
ID DOCID DOCNAMEKEY DOCNAMEVALUE
1 1 k1 v1
1 1 k2 v2
1 1 k3 v3
1 2 k4 v4
1 2 k5 v5
1 3 null null
1 3 null null
1 3 null null
1 3 null null
As you see the docId=3 has been "blown" to 4 records and no key-value-pair is found. But it has one key value pair k6-v6.
So the expected output (what i would like to get) is:
ID DOCID DOCNAMEKEY DOCNAMEVALUE
1 1 k1 v1
1 1 k2 v2
1 1 k3 v3
1 2 k4 v4
1 2 k5 v5
1 3 k6 v6
Why does the xml parsing depend on the number of sub-elements?
Share Improve this question edited Nov 25, 2024 at 7:54 user12761950 asked Nov 22, 2024 at 8:20 user12761950user12761950 1491 gold badge2 silver badges10 bronze badges 2- Can you share expected output based on input, so that we can test – samhita Commented Nov 24, 2024 at 10:05
- @samhita updated the original question with the result i get and what i would like to get. The docId=3 which has one key-value-pair is parsed differently than the other ones which have more sub elements. Is this expected behaviour? – user12761950 Commented Nov 25, 2024 at 7:52
1 Answer
Reset to default 0The way I solved it currently:
WITH xml_table
AS (
SELECT 1 AS ID
, PARSE_XML(
'<root>
<docs>
<doc>
<Id>1</Id>
<Name>
<Kvp>
<Key>k1</Key>
<Value>v1</Value>
</Kvp>
<Kvp>
<Key>k2</Key>
<Value>v2</Value>
</Kvp>
<Kvp>
<Key>k3</Key>
<Value>v3</Value>
</Kvp>
</Name>
</doc>
<doc>
<Id>2</Id>
<Name>
<Kvp>
<Key>k4</Key>
<Value>v4</Value>
</Kvp>
<Kvp>
<Key>k5</Key>
<Value>v5</Value>
</Kvp>
</Name>
</doc>
<doc>
<Id>3</Id>
<Name>
<Kvp>
<Key>k6</Key>
<Value>v6</Value>
</Kvp>
</Name>
</doc>
</docs>
</root>'
) AS XML_COL
)
SELECT docs.ID
, docs.docId
, CASE
WHEN kvps.KEY = '$' --For some reason Snowflake works differently if there is 1 sub element than when multiple
THEN CAST(GET(XMLGET(kvps.THIS, 'Key'), '$') AS STRING)
ELSE CAST(GET(XMLGET(kvps.VALUE, 'Key'), '$') AS STRING)
END AS docNameKey
, CASE
WHEN kvps.KEY = '$' --For some reason Snowflake works differently if there is 1 sub element than when multiple
THEN CAST(GET(XMLGET(kvps.THIS, 'Value'), '$') AS STRING)
ELSE CAST(GET(XMLGET(kvps.VALUE, 'Value'), '$') AS STRING)
END AS docNameValue
FROM --docs
(
SELECT xml_table.ID
, xml_table.XML_COL
, CAST(GET(xml_table.XML_COL, '@') AS STRING) AS ROOT_NODE_NAME
, CAST(GET(XMLGET(docs.VALUE, 'Id'), '$') AS STRING) AS docId
, XMLGET(docs.VALUE, 'Name') AS docName
FROM xml_table
, LATERAL FLATTEN(GET(XMLGET(xml_table.XML_COL, 'docs'), '$')) AS docs
WHERE 1 = 1
) docs
, LATERAL FLATTEN(GET(docs.docName, '$')) AS kvps
WHERE 1 = 1
AND (
kvps.KEY IS NULL
OR kvps.KEY = '$'
) --For some reason Snowflake works differently if there is 1 sub element than when multiple
So added the WHERE clause condition to exclude the "extra" lines Snowflake adds when there is only one sub set of value pairs. And then in SELECT clause extra CASE statements to pull the values either from THIS or VALUE attributes depending on is there one or many value pairs.
本文标签: xml parsingParse nested XML with key value pairs in SnowflakeStack Overflow
版权声明:本文标题:xml parsing - Parse nested XML with key value pairs in Snowflake - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1736305180a1932499.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论