Time Series Analysis

Working with Dates and Times

Building dates

Execises 🧮

DECLARE
 @SomeTime DATETIME2(7) = SYSUTCDATETIME();

-- Retrieve the year, month, and day
SELECT
 Year(@SomeTime) AS TheYear,
 Month(@SomeTime) AS TheMonth,
 DAy(@SomeTime) AS TheDay;



DECLARE
 @BerlinWallFalls DATETIME2(7) = '1989-11-09 23:49:36.2294852';

-- Fill in each date part
SELECT
 DATEPART(year, @BerlinWallFalls) AS TheYear,
 DATEPART(month, @BerlinWallFalls) AS TheMonth,
 DATEPART(day, @BerlinWallFalls) AS TheDay,
 DATEPART(dayofyear, @BerlinWallFalls) AS TheDayOfYear,
    -- Day of week is WEEKDAY
 DATEPART(WEEKDAY, @BerlinWallFalls) AS TheDayOfWeek,
 DATEPART(week, @BerlinWallFalls) AS TheWeek,
 DATEPART(second, @BerlinWallFalls) AS TheSecond,
 DATEPART(nanosecond, @BerlinWallFalls) AS TheNanosecond;

DECLARE
 @BerlinWallFalls DATETIME2(7) = '1989-11-09 23:49:36.2294852';

-- Fill in the function to show the name of each date part
SELECT
 DATENAME(YEAR, @BerlinWallFalls) AS TheYear,
 DATENAME(MONTH, @BerlinWallFalls) AS TheMonth,
 DATENAME(DAY, @BerlinWallFalls) AS TheDay,
 DATENAME(DAYOFYEAR, @BerlinWallFalls) AS TheDayOfYear,
    -- Day of week is WEEKDAY
 DATENAME(WEEKDAY, @BerlinWallFalls) AS TheDayOfWeek,
 DATENAME(WEEK, @BerlinWallFalls) AS TheWeek,
 DATENAME(SECOND, @BerlinWallFalls) AS TheSecond,
 DATENAME(NANOSECOND, @BerlinWallFalls) AS TheNanosecond;

Dealing with leap years

DECLARE
 @LeapDay DATETIME2(7) = '2012-02-29 18:00:00';

-- Fill in the date parts and intervals as needed
SELECT
 DATEADD(DAY, -1, @LeapDay) AS PriorDay,
 DATEADD(DAY, 1, @LeapDay) AS NextDay,
    -- For leap years, we need to move 4 years, not just 1
 DATEADD(YEAR, -4, @LeapDay) AS PriorLeapYear,
 DATEADD(YEAR, 4, @LeapDay) AS NextLeapYear,
 DATEADD(Year, -1, @LeapDay) AS PriorYear;

 DECLARE
 @PostLeapDay DATETIME2(7) = '2012-03-01 18:00:00';

-- Fill in the date parts and intervals as needed
SELECT
 DATEADD(DAY, -1, @PostLeapDay) AS PriorDay,
 DATEADD(DAY, 1, @PostLeapDay) AS NextDay,
 DATEADD(YEAR, -4, @PostLeapDay) AS PriorLeapYear,
 DATEADD(YEAR, 4, @PostLeapDay) AS NextLeapYear,
 DATEADD(YEAR, -1, @PostLeapDay) AS PriorYear,
    -- Move 4 years forward and one day back
 DATEADD(day, -1, DATEADD(year, 4, @PostLeapDay)) AS NextLeapDay,
    DATEADD(day, -2, @PostLeapDay) AS TwoDaysAgo;


 DECLARE
 @PostLeapDay DATETIME2(7) = '2012-03-01 18:00:00',
    @TwoDaysAgo DATETIME2(7);

SELECT
 @TwoDaysAgo = DATEADD(DAY, -2, @PostLeapDay);

SELECT
 @TwoDaysAgo AS TwoDaysAgo,
 @PostLeapDay AS SomeTime,
    -- Fill in the appropriate function and date types
 Datediff(day, @TwoDaysAgo, @PostLeapDay) AS DaysDifference,
 Datediff(hour, @TwoDaysAgo, @PostLeapDay) AS HoursDifference,
 Datediff(minute, @TwoDaysAgo, @PostLeapDay) AS MinutesDifference;

Rounding dates SQL Server does not have an intuitive way to round down to the month, hour, or minute. You can, however, combine the DATEADD() and DATEDIFF() functions to perform this rounding. o round the date 1914-08-16 down to the year, we would call DATEADD(YEAR, DATEDIFF(YEAR, 0, '1914-08-16'), 0). To round that date down to the month, we would call DATEADD(MONTH, DATEDIFF(MONTH, 0, '1914-08-16'), 0). This works for several other date parts as well.

DECLARE
 @SomeTime DATETIME2(7) = '2018-06-14 16:29:36.2248991';

-- Fill in the appropriate functions and date parts
SELECT
 DATEADD(day, DATEDIFF(day, 0, @SomeTime), 0) AS RoundedToDay,
 DATEADD(hour, datediff(hour, 0, @SomeTime), 0) AS RoundedToHour,
 DATEADD(minute, datediff(minute, 0, @SomeTime), 0) AS RoundedToMinute;

formatting dates for reporting

sql server has three formatting functions: CAST() FORMAT() CONVERT()

using the Convert(): it takes datatype, input, optional style FORMAT() is single threaded: (input, format code, optional culture)

DECLARE
 @CubsWinWorldSeries DATETIME2(3) = '2016-11-03 00:30:29.245',
 @OlderDateType DATETIME = '2016-11-03 00:30:29.245';

SELECT
 -- Fill in the missing function calls
 CAST(@CubsWinWorldSeries AS DATE) AS CubsWinDateForm,
 CAST(@CubsWinWorldSeries AS NVARCHAR(30)) AS CubsWinStringForm,
 CAST(@OlderDateType AS DATE) AS OlderDateForm,
 CAST(@OlderDateType AS NVARCHAR(30)) AS OlderStringForm;


 --example of inner and outer function casting
 DECLARE
 @CubsWinWorldSeries DATETIME2(3) = '2016-11-03 00:30:29.245';

SELECT
 CAST(CAST(@CubsWinWorldSeries AS DATE) AS NVARCHAR(30)) AS DateStringForm;



 --convert function
 DECLARE
 @CubsWinWorldSeries DATETIME2(3) = '2016-11-03 00:30:29.245';

SELECT
 CONVERT(DATE, @CubsWinWorldSeries) AS CubsWinDateForm,
 CONVERT(NVARCHAR(30), @CubsWinWorldSeries) AS CubsWinStringForm;

 --using optional style
 DECLARE
 @CubsWinWorldSeries DATETIME2(3) = '2016-11-03 00:30:29.245';

SELECT
 convert(NVARCHAR(30), @CubsWinWorldSeries, 0) AS DefaultForm,
 convert(NVARCHAR(30), @CubsWinWorldSeries, 3) AS UK_dmy,
 convert(NVARCHAR(30), @CubsWinWorldSeries, 1) AS US_mdy,
 convert(NVARCHAR(30), @CubsWinWorldSeries, 103) AS UK_dmyyyy,
 convert(NVARCHAR(30), @CubsWinWorldSeries, 101) AS US_mdyyyy;

 --using format
 DECLARE
 @Python3ReleaseDate DATETIME2(3) = '2008-12-03 19:45:00.033';

SELECT
 -- Fill in the function call and format parameter
 format(@Python3ReleaseDate, 'd', 'en-US') AS US_d,
 format(@Python3ReleaseDate, 'd', 'de-DE') AS DE_d,
 -- Fill in the locale for Japan
 format(@Python3ReleaseDate, 'd', 'jp-JP') AS JP_d,
 format(@Python3ReleaseDate, 'd', 'zh-cn') AS CN_d;

 DECLARE
 @Python3ReleaseDate DATETIME2(3) = '2008-12-03 19:45:00.033';

SELECT
 -- Fill in the format parameter
 FORMAT(@Python3ReleaseDate, 'D', 'en-US') AS US_D,
 FORMAT(@Python3ReleaseDate, 'D', 'de-DE') AS DE_D,
 -- Fill in the locale for Indonesia
 FORMAT(@Python3ReleaseDate, 'D', 'id-ID') AS ID_D,
 FORMAT(@Python3ReleaseDate, 'D', 'zh-cn') AS CN_D;

working with calender tables

also knows as data dimensions calender table: is a table that stores dates for easy retrieval this is a table you build once and never update (nearly) Afte building the table, you can easily retrieve data from it

SELECT c.Date FROM dbo.Calender
where
 c.MonthName = 'April'
 AND c.DayName = 'Saturday'
 AND c.CalenderYear = '2020'
order by
c.Date

-- Find fiscal week 29 of fiscal year 2019
SELECT
 c.Date
FROM dbo.Calendar c
WHERE
    -- Instead of month, use the fiscal week
 c.FiscalWeekOfYear = 29
    -- Instead of calendar year, use fiscal year
 AND c.FiscalYear = 2019
ORDER BY
 c.Date ASC;

Apply() opertor: it excuete a function for each row on the result set, it simplifed calculations look at the slides, very important

we can use Calendar table to join other tables 🏓 :

SELECT
 ir.IncidentDate,
 c.FiscalDayOfYear,
 c.FiscalWeekOfYear
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
    -- Incident type 4
 ir.IncidentTypeID = 4
    -- Fiscal year 2019
 AND c.FiscalYear = 2019
    -- Beyond fiscal week of year 30
 AND c.FiscalWeekOfYear > 30
    -- Only return weekends
 AND c.IsWeekend = 1;

Converting to Dates and Times

I this chapter we will learn how to create dates

Building Dates from parts

SQL Server has six function to build dates and times from component parts "the from parts series" look at the slides

If any of your input is null, the result will be null

SELECT TOP(10)
 c.CalendarQuarterName,
 c.MonthName,
 c.CalendarDayOfYear
FROM dbo.Calendar c
WHERE
 -- Create dates from component parts
 DATEFROMPARTS(c.CalendarYear, c.CalendarMonth, c.Day) >= '2018-06-01'
 AND c.DayName = 'Tuesday'
ORDER BY
 c.FiscalYear,
 c.FiscalDayOfYear ASC;

exercise: Build data that Neil armstrong landed the moon at [date]

SELECT
 -- Mark the date and time the lunar module touched down
    -- Use 24-hour notation for hours, so e.g., 9 PM is 21
 DATETIME2FROMPARTS(1969, 07, 20, 20, 17, 00, 000, 0) AS TheEagleHasLanded,
 -- Mark the date and time the lunar module took back off
    -- Use 24-hour notation for hours, so e.g., 9 PM is 21
 DATETIMEFROMPARTS(1969, 07, 21, 18, 54, 00, 000) AS MoonDeparture;
SELECT
 -- Fill in the millisecond PRIOR TO chaos
 DATETIMEOFFSETFROMPARTS(2038, 01, 19, 03, 14, 07, 999, 0, 0, 3) AS LastMoment,
    -- Fill in the date and time when we will experience the Y2.038K problem
    -- Then convert to the Eastern Standard Time time zone
 DATETIMEOFFSETFROMPARTS(2038, 01, 19, 03, 14, 08, 0, 0, 0, 3) AT TIME ZONE 'Eastern Standard Time' AS TimeForChaos;

Translating Date Strings


SELECT
 d.DateText AS String,
 -- Cast as DATE
 CAST(d.DateText AS DATE) AS StringAsDate,
 -- Cast as DATETIME2(7)
 CAST(d.DateText AS DATETIME2(7)) AS StringAsDateTime2
FROM dbo.Dates d;
SET LANGUAGE 'GERMAN'

SELECT
 d.DateText AS String,
 -- Convert to DATE
 CONVERT(DATE, d.DateText) AS StringAsDate,
 -- Convert to DATETIME2(7)
 CONVERT(DATETIME2(7), d.DateText) AS StringAsDateTime2
FROM dbo.Dates d;
SELECT
 d.DateText AS String,
 -- Parse as DATE using German
 PARSE(d.DateText AS DATE USING 'de-de') AS StringAsDate,
 -- Parse as DATETIME2(7) using German
 PARSE(d.DateText AS DATETIME2(7) USING 'de-de') AS StringAsDateTime2
FROM dbo.Dates d;

working with offsets

DECLARE
 @OlympicsUTC NVARCHAR(50) = N'2016-08-08 23:00:00';

SELECT
 -- Fill in the time zone for Brasilia, Brazil
 SWITCHOFFSET(@OlympicsUTC, '-03:00') AS BrasiliaTime,
 -- Fill in the time zone for Chicago, Illinois
 SWITCHOFFSET(@OlympicsUTC, '-05:00') AS ChicagoTime,
 -- Fill in the time zone for New Delhi, India
 SWITCHOFFSET(@OlympicsUTC, '+05:30') AS NewDelhiTime;
DECLARE
 @OlympicsClosingUTC DATETIME2(0) = '2016-08-21 23:00:00';

SELECT
 -- Fill in 7 hours back and a '-07:00' offset
 TODATETIMEOFFSET(DATEADD(HOUR, -7, @OlympicsClosingUTC), '-07:00') AS PhoenixTime,
 -- Fill in 12 hours forward and a '+12:00' offset.  
 TODATETIMEOFFSET(DATEADD(HOUR, 12, @OlympicsClosingUTC), '+12:00') AS TuvaluTime;

handling invalid Dates

DECLARE
 @GoodDateINTL NVARCHAR(30) = '2019-03-01 18:23:27.920',
 @GoodDateDE NVARCHAR(30) = '13.4.2019',
 @GoodDateUS NVARCHAR(30) = '4/13/2019',
 @BadDate NVARCHAR(30) = N'SOME BAD DATE';

SELECT
 -- Fill in the correct data type based on our input
 TRY_CONVERT(DATETIME2(3), @GoodDateINTL) AS GoodDateINTL,
 -- Fill in the correct function
 TRY_CONVERT(DATE, @GoodDateDE) AS GoodDateDE,
 TRY_CONVERT(DATE, @GoodDateUS) AS GoodDateUS,
 -- Fill in the correct input parameter for BadDate
 TRY_CONVERT(DATETIME2(3), @BadDate) AS BadDate;
DECLARE
 @GoodDateINTL NVARCHAR(30) = '2019-03-01 18:23:27.920',
 @GoodDateDE NVARCHAR(30) = '13.4.2019',
 @GoodDateUS NVARCHAR(30) = '4/13/2019',
 @BadDate NVARCHAR(30) = N'SOME BAD DATE';

-- The prior solution using TRY_CONVERT
SELECT
 TRY_CONVERT(DATETIME2(3), @GoodDateINTL) AS GoodDateINTL,
 TRY_CONVERT(DATE, @GoodDateDE) AS GoodDateDE,
 TRY_CONVERT(DATE, @GoodDateUS) AS GoodDateUS,
 TRY_CONVERT(DATETIME2(3), @BadDate) AS BadDate;

SELECT
 -- Fill in the correct data type based on our input
 TRY_CAST(@GoodDateINTL AS DATETIME2(3)) AS GoodDateINTL,
    -- Be sure to match these data types with the
    -- TRY_CONVERT() examples above!
 TRY_CAST(@GoodDateDE AS DATE) AS GoodDateDE,
 TRY_CAST(@GoodDateUS AS DATE) AS GoodDateUS,
 TRY_CAST(@BadDate AS DATETIME2(3)) AS BadDate;
DECLARE
 @GoodDateINTL NVARCHAR(30) = '2019-03-01 18:23:27.920',
 @GoodDateDE NVARCHAR(30) = '13.4.2019',
 @GoodDateUS NVARCHAR(30) = '4/13/2019',
 @BadDate NVARCHAR(30) = N'SOME BAD DATE';

-- The prior solution using TRY_CAST
SELECT
 TRY_CAST(@GoodDateINTL AS DATETIME2(3)) AS GoodDateINTL,
 TRY_CAST(@GoodDateDE AS DATE) AS GoodDateDE,
 TRY_CAST(@GoodDateUS AS DATE) AS GoodDateUS,
 TRY_CAST(@BadDate AS DATETIME2(3)) AS BadDate;

SELECT
 TRY_PARSE(@GoodDateINTL AS DATETIME2(3)) AS GoodDateINTL,
    -- Fill in the correct region based on our input
    -- Be sure to match these data types with the
    -- TRY_CAST() examples above!
 TRY_PARSE(@GoodDateDE AS DATE USING 'de-de') AS GoodDateDE,
 TRY_PARSE(@GoodDateUS AS DATE USING 'en-us') AS GoodDateUS,
    -- TRY_PARSE can't fix completely invalid dates
 TRY_PARSE(@BadDate AS DATETIME2(3) USING 'sk-sk') AS BadDate;
WITH EventDates AS
(
    SELECT
        -- Fill in the missing try-conversion function
        TRY_CONVERT(DATETIME2(3), it.EventDate) AT TIME ZONE it.TimeZone AS EventDateOffset,
        it.TimeZone
    FROM dbo.ImportedTime it
        INNER JOIN sys.time_zone_info tzi
   ON it.TimeZone = tzi.name
)
SELECT
    -- Fill in the approppriate event date to convert
 CONVERT(NVARCHAR(50), ed.EventDateOffset) AS EventDateOffsetString,
 CONVERT(DATETIME2(0), ed.EventDateOffset) AS EventDateLocal,
 ed.TimeZone,
    -- Convert from a DATETIMEOFFSET to DATETIME at UTC
 CAST(ed.EventDateOffset AT TIME ZONE 'UTC' AS DATETIME2(0)) AS EventDateUTC,
    -- Convert from a DATETIMEOFFSET to DATETIME with time zone
 CAST(ed.EventDateOffset AT TIME ZONE 'US Eastern Standard Time'  AS DATETIME2(0)) AS EventDateUSEast
FROM EventDates ed;
-- Try out how fast the TRY_CAST() function is
-- by try-casting each DateText value to DATE
DECLARE @StartTimeCast DATETIME2(7) = SYSUTCDATETIME();
SELECT TRY_CAST(DateText AS DATE) AS TestDate FROM #DateText;
DECLARE @EndTimeCast DATETIME2(7) = SYSUTCDATETIME();

-- Determine how much time the conversion took by
-- calculating the date difference from @StartTimeCast to @EndTimeCast
SELECT
    DATEDIFF(MILLISECOND, @StartTimeCast, @EndTimeCast) AS ExecutionTimeCast;
-- Try out how fast the TRY_PARSE() function is
-- by try-parsing each DateText value to DATE
DECLARE @StartTimeParse DATETIME2(7) = SYSUTCDATETIME();
SELECT TRY_PARSE(DateText AS DATE) AS TestDate FROM #DateText;
DECLARE @EndTimeParse DATETIME2(7) = SYSUTCDATETIME();

-- Determine how much time the conversion took by
-- calculating the difference from start time to end time
SELECT
    DATEDIFF(MILLISECOND, @StartTimeParse, @EndTimeParse) AS ExecutionTimeParse;

Aggregating Time Series Data

Basic Aggregate function

Exercise

-- Fill in the appropriate aggregate functions
SELECT
 it.IncidentType,
 COUNT(1) AS NumberOfRows,
 SUM(ir.NumberOfIncidents) AS TotalNumberOfIncidents,
 MIN(ir.NumberOfIncidents) AS MinNumberOfIncidents,
 MAX(ir.NumberOfIncidents) AS MaxNumberOfIncidents,
 MIN(ir.IncidentDate) As MinIncidentDate,
 MAX(ir.IncidentDate) AS MaxIncidentDate
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.IncidentType it
  ON ir.IncidentTypeID = it.IncidentTypeID
WHERE
 ir.IncidentDate BETWEEN '2019-08-01' AND '2019-10-31'
GROUP BY
 it.IncidentType;
-- Fill in the functions and columns
SELECT
 COUNT(DISTINCT ir.IncidentTypeID) AS NumberOfIncidentTypes,
 COUNT(DISTINCT ir.IncidentDate) AS NumberOfDaysWithIncidents
FROM dbo.IncidentRollup ir
WHERE
ir.IncidentDate BETWEEN '2019-08-01' AND '2019-10-31';
SELECT
 it.IncidentType,
    -- Fill in the appropriate expression
 SUM(CASE WHEN ir.NumberOfIncidents > 5 THEN 1 ELSE 0 END) AS NumberOfBigIncidentDays,
    -- Number of incidents will always be at least 1, so
    -- no need to check the minimum value, just that it's
    -- less than or equal to 5
    SUM(CASE WHEN ir.NumberOfIncidents <= 5 THEN 1 ELSE 0 END) AS NumberOfSmallIncidentDays
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.IncidentType it
  ON ir.IncidentTypeID = it.IncidentTypeID
WHERE
 ir.IncidentDate BETWEEN '2019-08-01' AND '2019-10-31'
GROUP BY
it.IncidentType;

Statistical aggreate functionality

Exercise

-- Fill in the missing function names
SELECT
 it.IncidentType,
 AVG(ir.NumberOfIncidents) AS MeanNumberOfIncidents,
 AVG(CAST(ir.NumberOfIncidents AS DECIMAL(4,2))) AS MeanNumberOfIncidents,
 STDEV(ir.NumberOfIncidents) AS NumberOfIncidentsStandardDeviation,
 VAR(ir.NumberOfIncidents) AS NumberOfIncidentsVariance,
 COUNT(1) AS NumberOfRows
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.IncidentType it
  ON ir.IncidentTypeID = it.IncidentTypeID
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 c.CalendarQuarter = 2
 AND c.CalendarYear = 2020
GROUP BY
it.IncidentType;
SELECT DISTINCT
 it.IncidentType,
 AVG(CAST(ir.NumberOfIncidents AS DECIMAL(4,2)))
     OVER(PARTITION BY it.IncidentType) AS MeanNumberOfIncidents,
    --- Fill in the missing value
 PERCENTILE_CONT(0.5)
     -- Inside our group, order by number of incidents DESC
     WITHIN GROUP (ORDER BY ir.NumberOfIncidents DESC)
        -- Do this for each IncidentType value
        OVER (PARTITION BY it.IncidentType) AS MedianNumberOfIncidents,
 COUNT(1) OVER (PARTITION BY it.IncidentType) AS NumberOfRows
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.IncidentType it
  ON ir.IncidentTypeID = it.IncidentTypeID
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 c.CalendarQuarter = 2
 AND c.CalendarYear = 2020;

Downsampling and upsampling date

Exercise

SELECT
 -- Downsample to a daily grain
    -- Cast CustomerVisitStart as a date
 CAST(dsv.CustomerVisitStart AS DATE) AS Day,
 SUM(dsv.AmenityUseInMinutes) AS AmenityUseInMinutes,
 COUNT(1) AS NumberOfAttendees
FROM dbo.DaySpaVisit dsv
WHERE
 dsv.CustomerVisitStart >= '2020-06-11'
 AND dsv.CustomerVisitStart < '2020-06-23'
GROUP BY
 -- When we use aggregation functions like SUM or COUNT,
    -- we need to GROUP BY the non-aggregated columns
 CAST(dsv.CustomerVisitStart AS DATE)
ORDER BY
 Day;
SELECT
 -- Downsample to a weekly grain
 DATEPART(WEEK, dsv.CustomerVisitStart) AS Week,
 SUM(dsv.AmenityUseInMinutes) AS AmenityUseInMinutes,
 -- Find the customer with the largest customer ID for that week
 MAX(dsv.CustomerID) AS HighestCustomerID,
 COUNT(1) AS NumberOfAttendees
FROM dbo.DaySpaVisit dsv
WHERE
 dsv.CustomerVisitStart >= '2020-01-01'
 AND dsv.CustomerVisitStart < '2021-01-01'
GROUP BY
 -- When we use aggregation functions like SUM or COUNT,
    -- we need to GROUP BY the non-aggregated columns
 DATEPART(WEEK, dsv.CustomerVisitStart)
ORDER BY
 Week;
SELECT
 -- Determine the week of the calendar year
 c.CalendarWeekOfYear,
 -- Determine the earliest DATE in this group
    -- This is NOT the DayOfWeek column
 MIN(c.Date) AS FirstDateOfWeek,
 ISNULL(SUM(dsv.AmenityUseInMinutes), 0) AS AmenityUseInMinutes,
 ISNULL(MAX(dsv.CustomerID), 0) AS HighestCustomerID,
 COUNT(dsv.CustomerID) AS NumberOfAttendees
FROM dbo.Calendar c
 LEFT OUTER JOIN dbo.DaySpaVisit dsv
  -- Connect dbo.Calendar with dbo.DaySpaVisit
  -- To join on CustomerVisitStart, we need to turn 
        -- it into a DATE type
  ON c.Date = CAST(dsv.CustomerVisitStart AS DATE)
WHERE
 c.CalendarYear = 2020
GROUP BY
 -- When we use aggregation functions like SUM or COUNT,
    -- we need to GROUP BY the non-aggregated columns
 c.CalendarWeekOfYear
ORDER BY
 c.CalendarWeekOfYear;

Grouping by Rollup, cube, grouping sets

Exercise

SELECT
 c.CalendarYear,
 c.CalendarQuarterName,
 c.CalendarMonth,
    -- Include the sum of incidents by day over each range
 SUM(ir.NumberOfIncidents) AS NumberOfIncidents
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 ir.IncidentTypeID = 2
GROUP BY
 -- GROUP BY needs to include all non-aggregated columns
 c.CalendarYear,
 c.CalendarQuarterName,
 c.CalendarMonth
-- Fill in your grouping operator
WITH ROLLUP
ORDER BY
 c.CalendarYear,
 c.CalendarQuarterName,
 c.CalendarMonth;
SELECT
 -- Use the ORDER BY clause as a guide for these columns
    -- Don't forget that comma after the third column if you
    -- copy from the ORDER BY clause!
 ir.IncidentTypeID,
 c.CalendarQuarterName,
 c.WeekOfMonth,
 SUM(ir.NumberOfIncidents) AS NumberOfIncidents
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 ir.IncidentTypeID IN (3, 4)
GROUP BY
 -- GROUP BY should include all non-aggregated columns
 ir.IncidentTypeID,
 c.CalendarQuarterName,
 c.WeekOfMonth
-- Fill in your grouping operator
WITH CUBE
ORDER BY
 ir.IncidentTypeID,
 c.CalendarQuarterName,
 c.WeekOfMonth;
SELECT
 c.CalendarYear,
 c.CalendarQuarterName,
 c.CalendarMonth,
 SUM(ir.NumberOfIncidents) AS NumberOfIncidents
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 ir.IncidentTypeID = 2
-- Fill in your grouping operator here
GROUP BY GROUPING SETS
(
   -- Group in hierarchical order:  calendar year,
    -- calendar quarter name, calendar month
 (c.CalendarYear, c.CalendarQuarterName, c.CalendarMonth),
   -- Group by calendar year
 (c.CalendarYear),
    -- This remains blank; it gives us the grand total
 ()
)
ORDER BY
 c.CalendarYear,
 c.CalendarQuarterName,
 c.CalendarMonth;
SELECT
 c.CalendarYear,
 c.CalendarMonth,
 c.DayOfWeek,
 c.IsWeekend,
 SUM(ir.NumberOfIncidents) AS NumberOfIncidents
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
GROUP BY GROUPING SETS
(
    -- Each non-aggregated column from above should appear once
   -- Calendar year and month
 (c.CalendarYear, c.CalendarMonth),
   -- Day of week
 (c.DayOfWeek),
   -- Is weekend or not
 (c.IsWeekend),
    -- This remains empty; it gives us the grand total
 ()
)
ORDER BY
 c.CalendarYear,
 c.CalendarMonth,
 c.DayOfWeek,
 c.IsWeekend;

Answering Time Series Questions with Window Functions

using aggregate functions over windows

Exercise

SELECT
 ir.IncidentDate,
 ir.NumberOfIncidents,
    -- Fill in each window function and ordering
 -- Note that all of these are in descending order!
 ROW_NUMBER() OVER (ORDER BY ir.NumberOfIncidents DESC) AS rownum,
 RANK() OVER (ORDER BY ir.NumberOfIncidents DESC) AS rk,
 DENSE_RANK() OVER (ORDER BY ir.NumberOfIncidents DESC) AS dr
FROM dbo.IncidentRollup ir
WHERE
 ir.IncidentTypeID = 3
 AND ir.NumberOfIncidents >= 8
ORDER BY
 ir.NumberOfIncidents DESC;
SELECT
 ir.IncidentDate,
 ir.NumberOfIncidents,
    -- Fill in the correct aggregate functions
    -- You do not need to fill in the OVER clause
 SUM(ir.NumberOfIncidents) OVER () AS SumOfIncidents,
 MIN(ir.NumberOfIncidents) OVER () AS LowestNumberOfIncidents,
 MAX(ir.NumberOfIncidents) OVER () AS HighestNumberOfIncidents,
 COUNT(ir.NumberOfIncidents) OVER () AS CountOfIncidents
FROM dbo.IncidentRollup ir
WHERE
 ir.IncidentDate BETWEEN '2019-07-01' AND '2019-07-31'
AND ir.IncidentTypeID = 3;

calculate running totals and moving averages

Exercise

SELECT
 ir.IncidentDate,
 ir.IncidentTypeID,
 ir.NumberOfIncidents,
    -- Get the total number of incidents
 SUM(ir.NumberOfIncidents) OVER (
       -- Do this for each incident type ID
  PARTITION BY ir.IncidentTypeID
       -- Sort by the incident date
  ORDER BY ir.IncidentDate
 ) AS NumberOfIncidents
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 c.CalendarYear = 2019
 AND c.CalendarMonth = 7
 AND ir.IncidentTypeID IN (1, 2)
ORDER BY
 ir.IncidentTypeID,
 ir.IncidentDate; 
SELECT
 ir.IncidentDate,
 ir.IncidentTypeID,
 ir.NumberOfIncidents,
    -- Fill in the correct window function
 AVG(ir.NumberOfIncidents) OVER (
  PARTITION BY ir.IncidentTypeID
  ORDER BY ir.IncidentDate
       -- Fill in the three parts of the window frame
  ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
 ) AS MeanNumberOfIncidents
FROM dbo.IncidentRollup ir
 INNER JOIN dbo.Calendar c
  ON ir.IncidentDate = c.Date
WHERE
 c.CalendarYear = 2019
 AND c.CalendarMonth IN (7, 8)
 AND ir.IncidentTypeID = 1
ORDER BY
 ir.IncidentTypeID,
 ir.IncidentDate;

Working with LAG() and LEAD()

Exercise

SELECT
 ir.IncidentDate,
 ir.IncidentTypeID,
    -- Get the prior day's number of incidents
 LAG(ir.NumberOfIncidents, 1) OVER (
       -- Partition by incident type ID
  PARTITION BY ir.IncidentTypeID
       -- Order by incident date
  ORDER BY ir.IncidentDate
 ) AS PriorDayIncidents,
 ir.NumberOfIncidents AS CurrentDayIncidents,
    -- Get the next day's number of incidents
 LEAD(ir.NumberOfIncidents, 1) OVER (
       -- Partition by incident type ID
  PARTITION BY ir.IncidentTypeID
       -- Order by incident date
  ORDER BY ir.IncidentDate
 ) AS NextDayIncidents
FROM dbo.IncidentRollup ir
WHERE
 ir.IncidentDate >= '2019-07-02'
 AND ir.IncidentDate <= '2019-07-31'
 AND ir.IncidentTypeID IN (1, 2)
ORDER BY
 ir.IncidentTypeID,
 ir.IncidentDate;
SELECT
 ir.IncidentDate,
 ir.IncidentTypeID,
    -- Fill in two periods ago
 LAG(ir.NumberOfIncidents, 2) OVER (
  PARTITION BY ir.IncidentTypeID
  ORDER BY ir.IncidentDate
 ) AS Trailing2Day,
    -- Fill in one period ago
 LAG(ir.NumberOfIncidents, 1) OVER (
  PARTITION BY ir.IncidentTypeID
  ORDER BY ir.IncidentDate
 ) AS Trailing1Day,
 ir.NumberOfIncidents AS CurrentDayIncidents,
    -- Fill in next period
 LEAD(ir.NumberOfIncidents, 1) OVER (
  PARTITION BY ir.IncidentTypeID
  ORDER BY ir.IncidentDate
 ) AS NextDay
FROM dbo.IncidentRollup ir
WHERE
 ir.IncidentDate >= '2019-07-01'
 AND ir.IncidentDate <= '2019-07-31'
 AND ir.IncidentTypeID IN (1, 2)
ORDER BY
 ir.IncidentTypeID,
 ir.IncidentDate;
SELECT
 ir.IncidentDate,
 ir.IncidentTypeID,
    -- Fill in the days since last incident
 DATEDIFF(DAY, LAG(ir.IncidentDate, 1) OVER (
  PARTITION BY ir.IncidentTypeID
  ORDER BY ir.IncidentDate
 ), ir.IncidentDate) AS DaysSinceLastIncident,
    -- Fill in the days until next incident
 DATEDIFF(DAY, ir.IncidentDate, LEAD(ir.IncidentDate, 1) OVER (
  PARTITION BY ir.IncidentTypeID
  ORDER BY ir.IncidentDate
 )) AS DaysUntilNextIncident
FROM dbo.IncidentRollup ir
WHERE
 ir.IncidentDate >= '2019-07-02'
 AND ir.IncidentDate <= '2019-07-31'
 AND ir.IncidentTypeID IN (1, 2)
ORDER BY
 ir.IncidentTypeID,
 ir.IncidentDate;

Finding maximum level of overlap

Exercise

-- This section focuses on entrances:  CustomerVisitStart
SELECT
 dsv.CustomerID,
 dsv.CustomerVisitStart AS TimeUTC,
 1 AS EntryCount,
    -- We want to know each customer's entrance stream
    -- Get a unique, ascending row number
 ROW_NUMBER() OVER (
      -- Break this out by customer ID
      PARTITION BY dsv.CustomerID
      -- Ordered by the customer visit start date
      ORDER BY dsv.CustomerVisitStart
    ) AS StartOrdinal
FROM dbo.DaySpaVisit dsv
UNION ALL
-- This section focuses on departures:  CustomerVisitEnd
SELECT
 dsv.CustomerID,
 dsv.CustomerVisitEnd AS TimeUTC,
 -1 AS EntryCount,
 NULL AS StartOrdinal
FROM dbo.DaySpaVisit dsv
SELECT s.*,
    -- Build a stream of all check-in and check-out events
 ROW_NUMBER() OVER (
      -- Break this out by customer ID
      PARTITION BY s.CustomerID
      -- Order by event time and then the start ordinal
      -- value (in case of exact time matches)
      ORDER BY s.TimeUTC, s.StartOrdinal
    ) AS StartOrEndOrdinal
FROM #StartStopPoints s;
SELECT
 s.CustomerID,
 MAX(2 * s.StartOrdinal - s.StartOrEndOrdinal) AS MaxConcurrentCustomerVisits
FROM #StartStopOrder s
WHERE s.EntryCount = 1
GROUP BY s.CustomerID
-- The difference between 2 * start ordinal and the start/end
-- ordinal represents the number of concurrent visits
HAVING MAX(2 * s.StartOrdinal - s.StartOrEndOrdinal) > 2
-- Sort by the largest number of max concurrent customer visits
ORDER BY MaxConcurrentCustomerVisits DESC;

Wrapping up

Last updated