admin管理员组

文章数量:1122826

Please see below for what I have and what I would like to have. I have tried a subquery using LEAD and LAG to create a flag for partitioning but couldn't get that to work. Thanks.

CREATE TABLE #t (PackedStr VARCHAR(100), Ident INT, BeginDate DATE, EndDate DATE);
INSERT #t
VALUES ('A,B,C,D,E', 86, '2019-03-18', '2019-03-27')
     , ('A,B,C,D,E', 87, '2019-03-28', '2019-04-09')
     , ('A,B,C,D,E,F,G', 88, '2019-04-10', '2019-04-15')
     , ('A,B,C,D,E', 89, '2019-04-16', '2019-04-24')
     , ('A,B,C,D,E', 90, '2019-04-25', '2019-05-14');

SELECT *
     , ROW_NUMBER() OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) RowNumber
     , MIN( BeginDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) FirstDate
     , MAX( EndDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident] DESC) LastDate
FROM [#t]
ORDER BY [Ident];

/*
--Current results
PackedStr      Ident  BeginDate  EndDate    RowNumber  FirstDate  LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E      86     2019-03-18 2019-03-27 1          2019-03-18 2019-05-14
A,B,C,D,E      87     2019-03-28 2019-04-09 2          2019-03-18 2019-05-14
A,B,C,D,E,F,G  88     2019-04-10 2019-04-15 1          2019-04-10 2019-04-15
A,B,C,D,E      89     2019-04-16 2019-04-24 3          2019-03-18 2019-05-14
A,B,C,D,E      90     2019-04-25 2019-05-14 4          2019-03-18 2019-05-14

--Desired results should be:
PackedStr      Ident  BeginDate  EndDate    RowNumber  FirstDate  LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E      86     2019-03-18 2019-03-27 1          2019-03-18 2019-04-09
A,B,C,D,E      87     2019-03-28 2019-04-09 2          2019-03-18 2019-04-09
A,B,C,D,E,F,G  88     2019-04-10 2019-04-15 1          2019-04-10 2019-04-15
A,B,C,D,E      89     2019-04-16 2019-04-24 1          2019-04-16 2019-05-14
A,B,C,D,E      90     2019-04-25 2019-05-14 2          2019-04-16 2019-05-14
*/

Please see below for what I have and what I would like to have. I have tried a subquery using LEAD and LAG to create a flag for partitioning but couldn't get that to work. Thanks.

CREATE TABLE #t (PackedStr VARCHAR(100), Ident INT, BeginDate DATE, EndDate DATE);
INSERT #t
VALUES ('A,B,C,D,E', 86, '2019-03-18', '2019-03-27')
     , ('A,B,C,D,E', 87, '2019-03-28', '2019-04-09')
     , ('A,B,C,D,E,F,G', 88, '2019-04-10', '2019-04-15')
     , ('A,B,C,D,E', 89, '2019-04-16', '2019-04-24')
     , ('A,B,C,D,E', 90, '2019-04-25', '2019-05-14');

SELECT *
     , ROW_NUMBER() OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) RowNumber
     , MIN( BeginDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident]) FirstDate
     , MAX( EndDate ) OVER (PARTITION BY [PackedStr] ORDER BY [Ident] DESC) LastDate
FROM [#t]
ORDER BY [Ident];

/*
--Current results
PackedStr      Ident  BeginDate  EndDate    RowNumber  FirstDate  LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E      86     2019-03-18 2019-03-27 1          2019-03-18 2019-05-14
A,B,C,D,E      87     2019-03-28 2019-04-09 2          2019-03-18 2019-05-14
A,B,C,D,E,F,G  88     2019-04-10 2019-04-15 1          2019-04-10 2019-04-15
A,B,C,D,E      89     2019-04-16 2019-04-24 3          2019-03-18 2019-05-14
A,B,C,D,E      90     2019-04-25 2019-05-14 4          2019-03-18 2019-05-14

--Desired results should be:
PackedStr      Ident  BeginDate  EndDate    RowNumber  FirstDate  LastDate
-------------- ------ ---------- ---------- ---------- ---------- ----------
A,B,C,D,E      86     2019-03-18 2019-03-27 1          2019-03-18 2019-04-09
A,B,C,D,E      87     2019-03-28 2019-04-09 2          2019-03-18 2019-04-09
A,B,C,D,E,F,G  88     2019-04-10 2019-04-15 1          2019-04-10 2019-04-15
A,B,C,D,E      89     2019-04-16 2019-04-24 1          2019-04-16 2019-05-14
A,B,C,D,E      90     2019-04-25 2019-05-14 2          2019-04-16 2019-05-14
*/
Share Improve this question edited Nov 22, 2024 at 21:28 plntreltn asked Nov 22, 2024 at 17:27 plntreltnplntreltn 213 bronze badges 1
  • Please tag with dbms. – keithwalsh Commented Nov 22, 2024 at 18:27
Add a comment  | 

2 Answers 2

Reset to default 1

For your initial idea - using lag(...) Solve like a regular task gaps an islands.
Each change in PackedStr (ordered by Ident) is gap.
Calculate number of gap as sum(isGap) - gapN.
Calculate row_number() within gap - rn.
See example

select *
  ,row_number()over(partition by gapN order by Ident) rn
  ,MIN( BeginDate ) OVER (PARTITION BY gapN) FirstDate
  ,MAX( EndDate ) OVER (PARTITION BY gapN) LastDate
from(
   select * ,sum(isGap)over(order by Ident)gapN
   from(
     select *
       ,case when PackedStr=lag(PackedStr)over(order by Ident) then 0 else 1 end as isGap
     from #t
   )a
)b
PackedStr Ident BeginDate EndDate isGap gapN rn FirstDate LastDate
A,B,C,D,E 86 2019-03-18 2019-03-27 1 1 1 2019-03-18 2019-04-09
A,B,C,D,E 87 2019-03-28 2019-04-09 0 1 2 2019-03-18 2019-04-09
A,B,C,D,E,F,G 88 2019-04-10 2019-04-15 1 2 1 2019-04-10 2019-04-15
A,B,C,D,E 89 2019-04-16 2019-04-24 1 3 1 2019-04-16 2019-05-14
A,B,C,D,E 90 2019-04-25 2019-05-14 0 3 2 2019-04-16 2019-05-14

fiddle

I would try with "sessionisation": Create a "session identifier" that changes every time a given column, packedstr in our case, changes. Usually that is the running sum (in an outer query) of a counter (in an inner query) that is at 1 in case of change, at 0 otherwise.

Then, partition by that session id, and order by begindate.

WITH                                                                                                                            
-- your input, not to use in query ...
indata(packedstr,ident,begindate,enddate)  AS (
          SELECT 'A,B,C,D,E'    , 86, DATE '2019-03-18', DATE '2019-03-27'
UNION ALL SELECT 'A,B,C,D,E'    , 87, DATE '2019-03-28', DATE '2019-04-09'
UNION ALL SELECT 'A,B,C,D,E,F,G', 88, DATE '2019-04-10', DATE '2019-04-15'
UNION ALL SELECT 'A,B,C,D,E'    , 89, DATE '2019-04-16', DATE '2019-04-24'
UNION ALL SELECT 'A,B,C,D,E'    , 90, DATE '2019-04-25', DATE '2019-05-14'
)
-- end of input, real query starts here, replace following comma with "WITH"
,
-- sessionisation if the DBMS is not Vertica, in two nested queries
-- q1: counter at 1 when packedstr changes, else at 0)
w_counter AS (
  SELECT
    packedstr
  , ident
  , begindate
  , enddate
  , CASE WHEN LAG(packedstr) OVER(ORDER BY begindate) = packedstr 
      THEN 0 
      ELSE 1 
    END AS counter
  FROM indata
-- dbg    packedstr   | ident | begindate  |  enddate   | counter 
-- dbg ---------------+-------+------------+------------+---------
-- dbg  A,B,C,D,E     |    86 | 2019-03-18 | 2019-03-27 |       1
-- dbg  A,B,C,D,E     |    87 | 2019-03-28 | 2019-04-09 |       0
-- dbg  A,B,C,D,E,F,G |    88 | 2019-04-10 | 2019-04-15 |       1
-- dbg  A,B,C,D,E     |    89 | 2019-04-16 | 2019-04-24 |       1
-- dbg  A,B,C,D,E     |    90 | 2019-04-25 | 2019-05-14 |       0
)
,
-- sessionisation if the DBMS is not Vertica, in two nested queries
-- q2: running sum of "counter" of q1 as session
w_session AS (
  SELECT
    packedstr
  , ident
  , begindate
  , enddate
  , SUM(counter)  OVER(ORDER BY begindate) AS session_id
  FROM w_counter
-- dbg    packedstr   | ident | begindate  |  enddate   | session_id 
-- dbg ---------------+-------+------------+------------+------------
-- dbg  A,B,C,D,E     |    86 | 2019-03-18 | 2019-03-27 |          1
-- dbg  A,B,C,D,E     |    87 | 2019-03-28 | 2019-04-09 |          1
-- dbg  A,B,C,D,E,F,G |    88 | 2019-04-10 | 2019-04-15 |          2
-- dbg  A,B,C,D,E     |    89 | 2019-04-16 | 2019-04-24 |          3
-- dbg  A,B,C,D,E     |    90 | 2019-04-25 | 2019-05-14 |          3
)
SELECT
  packedstr
, ident
, begindate
, enddate
, ROW_NUMBER() OVER(PARTITION BY session_id ORDER BY begindate) FROM w_session;
packedstr ident begindate enddate ROW_NUMBER
A,B,C,D,E 86 2019-03-18 2019-03-27 1
A,B,C,D,E 87 2019-03-28 2019-04-09 2
A,B,C,D,E,F,G 88 2019-04-10 2019-04-15 1
A,B,C,D,E 89 2019-04-16 2019-04-24 1
A,B,C,D,E 90 2019-04-25 2019-05-14 2

本文标签: