SET NOCOUNT ON; IF OBJECT_ID('tempdb.dbo.#Events', 'U') IS NOT NULL DROP TABLE #Events; CREATE TABLE #Events ( ID BIGINT IDENTITY PRIMARY KEY ,UserID INT ,EventTime DATETIME ,[Event] TINYINT -- 0=Login, 1=Logout ); DECLARE @StartDate DATETIME = '2005-01-01' ,@UserIDs INT = 1000 -- 500 days x 1000 users x 2 endpoints = 1,000,000 rows ,@Days INT = 2000 ,@MeanLoginSeconds INT = 240 ,@StdDevLoginSeconds INT = 60; WITH Tally (n) AS ( SELECT TOP ( SELECT CASE WHEN @UserIDs > @Days THEN @UserIDs ELSE @Days END ) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM sys.all_columns a CROSS JOIN sys.all_columns b ) INSERT INTO #Events SELECT UserID=f.n, EventTime, [Event] -- Create 2 uniform random numbers FROM (SELECT RAND(CHECKSUM(NEWID()))) a(URN1) CROSS APPLY (SELECT RAND(CHECKSUM(NEWID()))) b(URN2) -- Create 1 Normally-distributed random number from the 2 URNs using the -- using the Box-Muller transform function CROSS APPLY ( SELECT dbo.RN_NORMAL(@MeanLoginSeconds, @StdDevLoginSeconds, URN1, URN2) ) c(NRN) -- Use our Tally table to generate the required number of @Days then convert to a date CROSS APPLY (SELECT n FROM Tally WHERE n BETWEEN 1 AND @Days) d CROSS APPLY (SELECT DATEADD(day, d.n-1, @StartDate)) e([Date]) -- Use our Tally table to generate the required number of @users and logins/logouts CROSS APPLY (SELECT n FROM Tally WHERE n BETWEEN 1 AND @UserIDs) f CROSS APPLY (SELECT DATEADD(second, ABS(CHECKSUM(NEWID()))%84000, e.[Date])) g(LoginStart) CROSS APPLY ( VALUES (0, LoginStart),(1, DATEADD(second, NRN, LoginStart)) ) h([Event], EventTime); PRINT 'Number of Test Rows: ' + CAST(@@ROWCOUNT AS VARCHAR(15)); -- Remove some logouts to create "open" logins WITH Logouts AS ( SELECT *, rn=ROW_NUMBER() OVER (ORDER BY EventTime) FROM #Events WHERE [Event] = 1 ) DELETE FROM Logouts WHERE rn % 300 = 2; PRINT 'Number of Test Rows Removed: ' + CAST(@@ROWCOUNT AS VARCHAR(15)) + CHAR(10); PRINT 'Remove Missing End Points'; SET STATISTICS TIME ON; SELECT UserID, LoginTime, LogoutTime INTO #Results_1 FROM ( SELECT ID, UserID ,LoginTime=CASE WHEN [Event] = 0 THEN EventTime END ,LogoutTime=CASE -- If the next Event by EventTime is a logout WHEN LEAD([Event], 1) OVER (PARTITION BY UserID ORDER BY EventTime) = 1 -- Then we pick up the corresponding EventTime THEN LEAD(EventTime, 1) OVER (PARTITION BY UserID ORDER BY EventTime) -- Otherwise logout time will be set to NULL END ,[Event] FROM #Events ) a WHERE [Event] = 0 AND LogoutTime IS NOT NULL; SET STATISTICS TIME OFF; PRINT 'Packing Intervals'; SET STATISTICS TIME ON; WITH GroupedLogins AS ( SELECT UserID, LoginTime, LogoutTime FROM ( SELECT ID, UserID ,LoginTime=CASE WHEN [Event] = 0 THEN EventTime END ,LogoutTime=CASE -- If the next Event by EventTime is a logout WHEN LEAD([Event], 1) OVER (PARTITION BY UserID ORDER BY EventTime) = 1 -- Then we pick up the corresponding EventTime THEN LEAD(EventTime, 1) OVER (PARTITION BY UserID ORDER BY EventTime) -- Otherwise logout time will be set to NULL END ,[Event] FROM #Events ) a -- Filter only on login events that are not "open" (no logout) WHERE [Event] = 0 AND LogoutTime IS NOT NULL ), C1 AS ( -- Since the CTE above produces rows with a start and end date, we'll first unpivot -- those using CROSS APPLY VALUES and assign a type to each. The columns e and s will -- either be NULL (s NULL for an end date, e NULL for a start date) or row numbers -- sequenced by the time. UserID has been eliminated because we're looking for overlapping -- intervals across all users. SELECT ts, [Type] ,e=CASE [Type] WHEN 1 THEN NULL ELSE ROW_NUMBER() OVER (PARTITION BY [Type] ORDER BY LogoutTime) END ,s=CASE [Type] WHEN -1 THEN NULL ELSE ROW_NUMBER() OVER (PARTITION BY [Type] ORDER BY LoginTime) END FROM GroupedLogins CROSS APPLY (VALUES (1, LoginTime), (-1, LogoutTime)) a([Type], ts) ), C2 AS ( -- Add a row number ordered as shown SELECT C1.*, se=ROW_NUMBER() OVER (ORDER BY ts, [Type] DESC) FROM C1 ), C3 AS ( -- Create a grpnm that pairs the rows SELECT ts, grpnm=FLOOR((ROW_NUMBER() OVER (ORDER BY ts)-1) / 2 + 1) FROM C2 -- This filter is the magic that eliminates the overlaps WHERE COALESCE(s-(se-s)-1, (se-e)-e) = 0 ), C4 AS ( -- Grouping by grpnm restores the records to only non-overlapped intervals SELECT StartDate=MIN(ts), EndDate=MAX(ts) FROM C3 GROUP BY grpnm ) SELECT * INTO #Results_2 FROM C4; SET STATISTICS TIME OFF; PRINT 'Interval Gaps'; SET STATISTICS TIME ON; WITH GroupedLogins AS ( SELECT UserID, LoginTime, LogoutTime FROM ( SELECT ID, UserID ,LoginTime=CASE WHEN [Event] = 0 THEN EventTime END ,LogoutTime=CASE -- If the next Event by EventTime is a logout WHEN LEAD([Event], 1) OVER (PARTITION BY UserID ORDER BY EventTime) = 1 -- Then we pick up the corresponding EventTime THEN LEAD(EventTime, 1) OVER (PARTITION BY UserID ORDER BY EventTime) -- Otherwise logout time will be set to NULL END ,[Event] FROM #Events ) a WHERE [Event] = 0 AND LogoutTime IS NOT NULL ), C1 AS ( -- Since the CTE above produces rows with a start and end date, we'll first unpivot -- those using CROSS APPLY VALUES and assign a type to each. The columns e and s will -- either be NULL (s NULL for an end date, e NULL for a start date) or row numbers -- sequenced by the time. UserID has been eliminated because we're looking for overlapping -- intervals across all users. SELECT ts, [Type] ,e=CASE [Type] WHEN 1 THEN NULL ELSE ROW_NUMBER() OVER (PARTITION BY [Type] ORDER BY LogoutTime) END ,s=CASE [Type] WHEN -1 THEN NULL ELSE ROW_NUMBER() OVER (PARTITION BY [Type] ORDER BY LoginTime) END FROM GroupedLogins CROSS APPLY (VALUES (1, LoginTime), (-1, LogoutTime)) a([Type], ts) ), C2 AS ( -- Add a row number ordered as shown SELECT C1.*, se=ROW_NUMBER() OVER (ORDER BY ts, [Type] DESC) FROM C1 ), C3 AS ( -- Create a grpnm that pairs the rows SELECT ts, grpnm=FLOOR((ROW_NUMBER() OVER (ORDER BY ts)-1) / 2 + 1) FROM C2 -- This filter is the magic that eliminates the overlaps WHERE COALESCE(s-(se-s)-1, (se-e)-e) = 0 ), C4 AS ( -- Grouping by grpnm restores the records to only non-overlapped intervals SELECT StartDate=MIN(ts), EndDate=MAX(ts) FROM C3 GROUP BY grpnm ) -- CROSS APPLY VALUES to convert islands to gaps SELECT StartTime=MIN(EventTime), EndTime=MAX(EventTime) INTO #Results_3 FROM ( SELECT EventTime ,rn=ROW_NUMBER() OVER (ORDER BY EventTime)/2 FROM C4 a CROSS APPLY (VALUES (StartDate), (EndDate)) b(EventTime) ) a GROUP BY rn HAVING COUNT(*) = 2; SET STATISTICS TIME OFF; GO DROP TABLE #Events; DROP TABLE #Results_1; DROP TABLE #Results_2; DROP TABLE #Results_3;