admin管理员组文章数量:1122826
Please see below for what I have and what I would like to have. I have tried a subquery using LEAD and LAG to create a flag for partitioning but couldn't get that to work. Thanks.
CREATE TABLE #t (PackedStr VARCHAR(100), Ident INT, BeginDate DATE, EndDate DATE);
INSERT #t
VALUES ('A,B,C,D,E', 86, '2019-03-18', '2019-03-27')
, ('A,B,C,D,E', 87, '2019-03-28', '2019-04-09')
, ('A,B,C,D,E,F,G', 88, '2019-04-10', '2019-04-15')
, ('A,B,C,D,E', 89, '2019-04-16', '2019-04-24')
, ('A,B,C,D,E', 90, '2019-04-25', '2019-05-14');
SELECT *
, ROW_NUMBER() OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) RowNumber
, MIN( BeginDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) FirstDate
, MAX( EndDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident] DESC) LastDate
FROM [#t]
ORDER BY [Ident];
/*
--Current results
PackedStr Ident BeginDate EndDate RowNumber FirstDate LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E 86 2019-03-18 2019-03-27 1 2019-03-18 2019-05-14
A,B,C,D,E 87 2019-03-28 2019-04-09 2 2019-03-18 2019-05-14
A,B,C,D,E,F,G 88 2019-04-10 2019-04-15 1 2019-04-10 2019-04-15
A,B,C,D,E 89 2019-04-16 2019-04-24 3 2019-03-18 2019-05-14
A,B,C,D,E 90 2019-04-25 2019-05-14 4 2019-03-18 2019-05-14
--Desired results should be:
PackedStr Ident BeginDate EndDate RowNumber FirstDate LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E 86 2019-03-18 2019-03-27 1 2019-03-18 2019-04-09
A,B,C,D,E 87 2019-03-28 2019-04-09 2 2019-03-18 2019-04-09
A,B,C,D,E,F,G 88 2019-04-10 2019-04-15 1 2019-04-10 2019-04-15
A,B,C,D,E 89 2019-04-16 2019-04-24 1 2019-04-16 2019-05-14
A,B,C,D,E 90 2019-04-25 2019-05-14 2 2019-04-16 2019-05-14
*/
Please see below for what I have and what I would like to have. I have tried a subquery using LEAD and LAG to create a flag for partitioning but couldn't get that to work. Thanks.
CREATE TABLE #t (PackedStr VARCHAR(100), Ident INT, BeginDate DATE, EndDate DATE);
INSERT #t
VALUES ('A,B,C,D,E', 86, '2019-03-18', '2019-03-27')
, ('A,B,C,D,E', 87, '2019-03-28', '2019-04-09')
, ('A,B,C,D,E,F,G', 88, '2019-04-10', '2019-04-15')
, ('A,B,C,D,E', 89, '2019-04-16', '2019-04-24')
, ('A,B,C,D,E', 90, '2019-04-25', '2019-05-14');
SELECT *
, ROW_NUMBER() OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) RowNumber
, MIN( BeginDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) FirstDate
, MAX( EndDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident] DESC) LastDate
FROM [#t]
ORDER BY [Ident];
/*
--Current results
PackedStr Ident BeginDate EndDate RowNumber FirstDate LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E 86 2019-03-18 2019-03-27 1 2019-03-18 2019-05-14
A,B,C,D,E 87 2019-03-28 2019-04-09 2 2019-03-18 2019-05-14
A,B,C,D,E,F,G 88 2019-04-10 2019-04-15 1 2019-04-10 2019-04-15
A,B,C,D,E 89 2019-04-16 2019-04-24 3 2019-03-18 2019-05-14
A,B,C,D,E 90 2019-04-25 2019-05-14 4 2019-03-18 2019-05-14
--Desired results should be:
PackedStr Ident BeginDate EndDate RowNumber FirstDate LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E 86 2019-03-18 2019-03-27 1 2019-03-18 2019-04-09
A,B,C,D,E 87 2019-03-28 2019-04-09 2 2019-03-18 2019-04-09
A,B,C,D,E,F,G 88 2019-04-10 2019-04-15 1 2019-04-10 2019-04-15
A,B,C,D,E 89 2019-04-16 2019-04-24 1 2019-04-16 2019-05-14
A,B,C,D,E 90 2019-04-25 2019-05-14 2 2019-04-16 2019-05-14
*/
Share
Improve this question
edited Nov 22, 2024 at 21:28
plntreltn
asked Nov 22, 2024 at 17:27
plntreltnplntreltn
213 bronze badges
1
- Please tag with dbms. – keithwalsh Commented Nov 22, 2024 at 18:27
2 Answers
Reset to default 1For your initial idea - using lag(...)
Solve like a regular task gaps an islands
.
Each change in PackedStr (ordered by Ident) is gap
.
Calculate number of gap as sum(isGap) - gapN.
Calculate row_number() within gap - rn.
See example
select *
,row_number()over(partition by gapN order by Ident) rn
,MIN( BeginDate ) OVER (PARTITION BY gapN) FirstDate
,MAX( EndDate ) OVER (PARTITION BY gapN) LastDate
from(
select * ,sum(isGap)over(order by Ident)gapN
from(
select *
,case when PackedStr=lag(PackedStr)over(order by Ident) then 0 else 1 end as isGap
from #t
)a
)b
PackedStr | Ident | BeginDate | EndDate | isGap | gapN | rn | FirstDate | LastDate |
---|---|---|---|---|---|---|---|---|
A,B,C,D,E | 86 | 2019-03-18 | 2019-03-27 | 1 | 1 | 1 | 2019-03-18 | 2019-04-09 |
A,B,C,D,E | 87 | 2019-03-28 | 2019-04-09 | 0 | 1 | 2 | 2019-03-18 | 2019-04-09 |
A,B,C,D,E,F,G | 88 | 2019-04-10 | 2019-04-15 | 1 | 2 | 1 | 2019-04-10 | 2019-04-15 |
A,B,C,D,E | 89 | 2019-04-16 | 2019-04-24 | 1 | 3 | 1 | 2019-04-16 | 2019-05-14 |
A,B,C,D,E | 90 | 2019-04-25 | 2019-05-14 | 0 | 3 | 2 | 2019-04-16 | 2019-05-14 |
fiddle
I would try with "sessionisation": Create a "session identifier" that changes every time a given column, packedstr
in our case, changes. Usually that is the running sum (in an outer query) of a counter (in an inner query) that is at 1 in case of change, at 0 otherwise.
Then, partition by that session id, and order by begindate
.
WITH
-- your input, not to use in query ...
indata(packedstr,ident,begindate,enddate) AS (
SELECT 'A,B,C,D,E' , 86, DATE '2019-03-18', DATE '2019-03-27'
UNION ALL SELECT 'A,B,C,D,E' , 87, DATE '2019-03-28', DATE '2019-04-09'
UNION ALL SELECT 'A,B,C,D,E,F,G', 88, DATE '2019-04-10', DATE '2019-04-15'
UNION ALL SELECT 'A,B,C,D,E' , 89, DATE '2019-04-16', DATE '2019-04-24'
UNION ALL SELECT 'A,B,C,D,E' , 90, DATE '2019-04-25', DATE '2019-05-14'
)
-- end of input, real query starts here, replace following comma with "WITH"
,
-- sessionisation if the DBMS is not Vertica, in two nested queries
-- q1: counter at 1 when packedstr changes, else at 0)
w_counter AS (
SELECT
packedstr
, ident
, begindate
, enddate
, CASE WHEN LAG(packedstr) OVER(ORDER BY begindate) = packedstr
THEN 0
ELSE 1
END AS counter
FROM indata
-- dbg packedstr | ident | begindate | enddate | counter
-- dbg ---------------+-------+------------+------------+---------
-- dbg A,B,C,D,E | 86 | 2019-03-18 | 2019-03-27 | 1
-- dbg A,B,C,D,E | 87 | 2019-03-28 | 2019-04-09 | 0
-- dbg A,B,C,D,E,F,G | 88 | 2019-04-10 | 2019-04-15 | 1
-- dbg A,B,C,D,E | 89 | 2019-04-16 | 2019-04-24 | 1
-- dbg A,B,C,D,E | 90 | 2019-04-25 | 2019-05-14 | 0
)
,
-- sessionisation if the DBMS is not Vertica, in two nested queries
-- q2: running sum of "counter" of q1 as session
w_session AS (
SELECT
packedstr
, ident
, begindate
, enddate
, SUM(counter) OVER(ORDER BY begindate) AS session_id
FROM w_counter
-- dbg packedstr | ident | begindate | enddate | session_id
-- dbg ---------------+-------+------------+------------+------------
-- dbg A,B,C,D,E | 86 | 2019-03-18 | 2019-03-27 | 1
-- dbg A,B,C,D,E | 87 | 2019-03-28 | 2019-04-09 | 1
-- dbg A,B,C,D,E,F,G | 88 | 2019-04-10 | 2019-04-15 | 2
-- dbg A,B,C,D,E | 89 | 2019-04-16 | 2019-04-24 | 3
-- dbg A,B,C,D,E | 90 | 2019-04-25 | 2019-05-14 | 3
)
SELECT
packedstr
, ident
, begindate
, enddate
, ROW_NUMBER() OVER(PARTITION BY session_id ORDER BY begindate) FROM w_session;
packedstr | ident | begindate | enddate | ROW_NUMBER |
---|---|---|---|---|
A,B,C,D,E | 86 | 2019-03-18 | 2019-03-27 | 1 |
A,B,C,D,E | 87 | 2019-03-28 | 2019-04-09 | 2 |
A,B,C,D,E,F,G | 88 | 2019-04-10 | 2019-04-15 | 1 |
A,B,C,D,E | 89 | 2019-04-16 | 2019-04-24 | 1 |
A,B,C,D,E | 90 | 2019-04-25 | 2019-05-14 | 2 |
本文标签:
版权声明:本文标题:sql - How do you get ROW_NUMBER to start over at different value between equal values in the partition? - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1736301977a1931368.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论