SQL Hierarchy Comparative Performance

Posted on March 1, 2011

2



This is the last post I will be doing on SQL Hierarchies. Since I’ve covered the most common methods for dealing with hierarchies I thought it worthwhile to do some performance comparisons. I will be testing displaying the descendants of the root node, as well as testing aggregations from a sales table for both the root node and a node three levels from the bottom.

While I’ve already covered most of the code shown here, I will also show how to get aggregations for nodes that are not the root node.

So firstly we need a decent set of data to test. I modified the below code based on some code from this link:

http://www.sqlservercentral.com/columnists/phe/CTETestPopulateData2.txt

The base data set we will be using has 10 levels and 29,524 nodes.

Here’s the code to create and populate the product table with an adjacency list (parent-child), and the other hierarchy methods populated from the initial product table. I’ve broken some of the code into temp tables to improve the performance.

/**************************
-- Set up
**************************/
SET NOCOUNT ON;
IF OBJECT_ID('dbo.Product') IS NOT NULL DROP TABLE dbo.Product
GO
CREATE TABLE dbo.Product(ProductID INT IDENTITY(1,1) NOT NULL PRIMARY KEY
, ProductName VARCHAR(200)
, ParentProductID INT
, ProductDescr VARCHAR(200))
GO
CREATE INDEX IX_Product_ParentProductID ON dbo.Product (ParentProductID)
GO
CREATE INDEX IX_Product_Cover ON dbo.Product (ParentProductId)
INCLUDE (ProductId, ProductName)
GO
-- Populate Product Table
DECLARE  @TotalLevels int
SET @TotalLevels=10
DECLARE @Seed TABLE (ProductName VARCHAR(200) NOT NULL, ProductDescr VARCHAR(200))
DECLARE @Parents1 TABLE (ProductID int NOT NULL,ProductName VARCHAR(200) NOT NULL)
DECLARE @Parents2 TABLE (ProductID int NOT NULL,ProductName VARCHAR(200) NOT NULL)
DECLARE @Level int
INSERT @Seed (ProductName, ProductDescr) SELECT N'ProductX_','GroupX'
INSERT @Seed (ProductName, ProductDescr) SELECT N'ProductY_','GroupY'
INSERT @Seed (ProductName, ProductDescr) SELECT N'ProductZ_','GroupZ'

INSERT dbo.Product (ParentProductID,ProductName, ProductDescr)
OUTPUT INSERTED.ProductID,INSERTED.ProductName INTO @Parents1
SELECT NULL,'ProductA_L1','Root 0'

SET @Level=2
WHILE @Level<=@TotalLevels
BEGIN
	IF @Level%2=0
	BEGIN
		INSERT dbo.Product (ParentProductID,ProductName, ProductDescr)
		OUTPUT INSERTED.ProductID,INSERTED.ProductName INTO @Parents2
		SELECT P.ProductID,P.ProductName+N'.'+S.ProductName+CAST(@Level as VARCHAR(20)),S.ProductDescr+' in Level '+CAST(@Level as VARCHAR(20))
		FROM @Parents1 P CROSS JOIN @Seed S
		DELETE @Parents1
	END
	ELSE
	BEGIN
		INSERT dbo.Product (ParentProductID,ProductName, ProductDescr)
		OUTPUT INSERTED.ProductID,INSERTED.ProductName INTO @Parents1
		SELECT P.ProductID,P.ProductName+N'.'+S.ProductName+CAST(@Level as VARCHAR(20)),S.ProductDescr+' in Level '+CAST(@Level as VARCHAR(20))
		FROM @Parents2 P CROSS JOIN @Seed S
		DELETE @Parents2
	END
	SET @Level=@Level+1
END
GO
--====================
--Enumerated Path
--====================
IF OBJECT_ID('dbo.ProductEnumeratedPath') IS NOT NULL DROP TABLE ProductEnumeratedPath
GO
;WITH ProductLevels
AS
(	SELECT  p.ProductName
		, p.ProductID
		, 1 AS HLevel
		, '.' + CONVERT(VARCHAR(MAX), ProductID) AS TreePath
		, ParentProductID
	FROM dbo.Product p
	WHERE p.ParentProductID IS NULL
	UNION ALL
	SELECT p.ProductName
		, p.ProductID
		, x.HLevel+1
		, x.TreePath + '.' + CONVERT(VARCHAR(MAX), p.ProductID) AS TreePath
		, p.ParentProductID
	FROM dbo.Product p
	INNER JOIN ProductLevels x
	ON x.ProductID=p.ParentProductID
)
SELECT ProductName, ProductID, ParentProductID, Hlevel, CAST(TreePath AS VARCHAR(50))+ '.' AS TreePath
INTO ProductEnumeratedPath
FROM ProductLevels
GO
CREATE CLUSTERED INDEX CIX_ProdEnum ON ProductEnumeratedPath(TreePath, ProductID)
GO
CREATE INDEX IX_ProdEnum ON ProductEnumeratedPath(ProductID,ProductName)
GO
CREATE INDEX IX1 ON dbo.ProductEnumeratedPath(ParentProductID)
GO
--====================
--Nested Sets
--====================
IF OBJECT_ID('dbo.ProductsNested') IS NOT NULL DROP TABLE dbo.ProductsNested
GO
;WITH ProductLevels
AS
(	SELECT  p.ProductName
		, p.ProductID
		, 1 AS HLevel
		, CONVERT(VARCHAR(MAX), ProductID) AS TreePath
		, ParentProductID
	FROM dbo.Product p
	WHERE p.ParentProductID IS NULL
	UNION ALL
	SELECT p.ProductName
		, p.ProductID
		, x.HLevel+1
		, x.TreePath + '.' + CONVERT(VARCHAR(MAX), p.ProductID) AS TreePath
		, p.ParentProductID
	FROM dbo.Product p
	INNER JOIN ProductLevels x
	ON x.ProductID=p.ParentProductID
)
SELECT *
INTO #A
FROM ProductLevels
GO
ALTER TABLE #a
ALTER COLUMN TreePath varchar(500)
GO
CREATE INDEX ix ON #a(TreePath)
GO
SELECT
     *,
     ROW_NUMBER() OVER (ORDER BY TreePath) AS Row
into #ProductRows
FROM #a
GO
CREATE CLUSTERED INDEX cix ON #ProductRows(TreePath,Row, HLevel)
GO
SELECT ProductName
     ,ER.ProductID
     ,ParentProductID
     ,ER.TreePath
     ,ER.HLevel
     ,ER.Row
     ,(ER.Row * 2) - ER.HLevel AS Lft,
     ((ER.Row * 2) - ER.HLevel) +
        (
            SELECT COUNT(*) * 2
            FROM #ProductRows ER2
            WHERE ER2.TreePath LIKE ER.TreePath + '.%'
        ) + 1 AS Rgt
INTO dbo.ProductsNested
FROM #ProductRows ER
ORDER BY TreePath
GO
CREATE CLUSTERED INDEX CIX_Nested ON dbo.ProductsNested (lft, ProductID, rgt)
GO
CREATE INDEX IX_Nested ON dbo.ProductsNested (ProductID,ProductName)
GO
--====================
-- HierarchyID
--====================
IF OBJECT_ID('dbo.ProductHID') IS NOT NULL DROP TABLE ProductHID
GO
CREATE TABLE dbo.ProductHID(
ProductID int not null,
ProductName VARCHAR(250),
hid     HIERARCHYID NOT NULL,
lvl AS hid.GetLevel() PERSISTED)
GO
--Insert values from Adjacency List to HierarchyID
;WITH ProdRowNum AS
	(SELECT ProductID, ParentProductID, ProductName
	, ROW_NUMBER() OVER (PARTITION BY ParentProductID ORDER BY ProductID) AS rn
	FROM dbo.Product)
, Subs
AS
(
	SELECT  p.ProductName
		, p.ProductID
		, '/' + CAST(p.ProductID AS VARCHAR(MAX)) + '/'  AS TreePath
		, ParentProductID
	FROM dbo.Product p
	WHERE p.ParentProductID IS NULL
	UNION ALL
	SELECT p.ProductName
		, p.ProductID
		 ,CAST(H.TreePath + CAST(p.rn AS VARCHAR(100)) + '/' AS VARCHAR(MAX)) AS TreePath
		, p.ParentProductID
	FROM ProdRowNum p
	INNER JOIN subs H
	ON H.ProductID=p.ParentProductID
)
INSERT INTO dbo.ProductHID
SELECT ProductID, ProductName,CAST(Treepath AS HIERARCHYID) AS hid
FROM Subs
GO
CREATE CLUSTERED INDEX CIX_HID ON dbo.ProductHID(hid,ProductID)
GO
CREATE NONCLUSTERED INDEX ix_hid ON dbo.ProductHID(ProductID, ProductName)
GO
--======================
-- Kimball Bridge Table
--======================
IF OBJECT_ID('dbo.ProductBridge') IS NOT NULL DROP TABLE dbo.ProductBridge
GO
CREATE TABLE dbo.ProductBridge(
   AncestorProductID INT
 , ProductID INT
 , HLevel INT)
GO
DECLARE @Node INT
DECLARE cur1 CURSOR LOCAL FAST_FORWARD FOR
SELECT ProductID
FROM dbo.Product a
 OPEN cur1
 FETCH NEXT FROM cur1 INTO @Node
 WHILE @@FETCH_STATUS = 0
 BEGIN
  ;WITH Ancestors
  As
  ( SELECT  p.ProductID
    , ParentProductID
    , 0 AS Hlevel
   FROM dbo.Product p
   WHERE ProductID = @Node
   UNION ALL
   SELECT p.ProductID
    , p.ParentProductID
    , h.Hlevel + 1
   FROM dbo.Product p
   INNER JOIN Ancestors H
   ON H.ParentProductID = p.ProductID
  )
   , FixLevels AS (
   SELECT ParentProductID, ProductID, MAX(HLevel) OVER() AS MaxLvl
   FROM Ancestors c
   )
  INSERT INTO dbo.ProductBridge(AncestorProductID, ProductID, HLevel)
  SELECT COALESCE(ParentProductID, @Node) AS AncestorProductID
   , @Node AS ProductID
   , MaxLvl AS HLevel
  FROM FixLevels
  FETCH NEXT FROM cur1 INTO @Node
 END
 CLOSE cur1
 DEALLOCATE cur1
 GO
 CREATE CLUSTERED INDEX CIX ON dbo.ProductBridge(AncestorProductID, ProductID)
 GO
 

The first test was to measure the performance of displaying all of the descendants of the root node. To do this I set statistics time on, cleared the query plan cache and ran each query ten times and got an average.


SET STATISTICS TIME ON
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

--Recursive CTE on Adjacency List
;WITH Descendants
AS
(	SELECT  p.ProductName
		, p.ProductID
		, 0 AS HLevel
	FROM dbo.Product p
	WHERE p.ParentProductID IS NULL
	UNION ALL
	SELECT p.ProductName
		, p.ProductID
		, h.HLevel + 1
	FROM dbo.Product p
	INNER JOIN Descendants H
	ON H.ProductID=p.ParentProductID
)
SELECT ProductID, ProductName AS ProductName
FROM Descendants d
GO 10

--Enumerated Path
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

SELECT child.ProductID, child.ProductName AS ProductName
FROM ProductEnumeratedPath child
INNER JOIN  ProductEnumeratedPath parent
ON child.TreePath LIKE parent.TreePath + '%'
WHERE parent.ProductID = 1
GO 10

-- Nested Sets
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

SELECT child.ProductID, child.ProductName AS ProductName
FROM ProductsNested parent
INNER JOIN ProductsNested child
ON parent.ProductID = 1
AND child.lft BETWEEN parent.lft AND parent.rgt
GO 10

-- HierarchyID
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

SELECT c.ProductID, C.ProductName
FROM dbo.ProductHID c
JOIN dbo.ProductHID p
ON p.ProductID = 1
AND c.hid.IsDescendantOf(p.hid) = 1
GO 10

-- Kimball Product Bridge join to Dimension Table
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

SELECT p.ProductID, p.ProductName
FROM ProductBridge a
JOIN dbo.Product p
on a.ProductID = p.ProductID
WHERE a.AncestorProductID = 1
GO 10
  

The results returned were this:

We can see that the Nested Sets method is clearly the winner. So I guess there was a reason for Joe Celko banging on about them for years 🙂
Let’s look at creating a Product Sales table and including some random numbers on the leaf nodes of our hierarchy table.


--Create Product sale data
IF OBJECT_ID('dbo.ProductSale') IS NOT NULL DROP TABLE dbo.ProductSale
GO
;WITH Descendants
AS
(	SELECT  p.ProductName
		, p.ProductID
	FROM dbo.Product p
	WHERE p.ParentProductID IS NULL
	UNION ALL
	SELECT p.ProductName
		, p.ProductID
	FROM dbo.Product p
	INNER JOIN Descendants H
	ON H.ProductID=p.ParentProductID
)
SELECT ProductID, ABS(CHECKSUM(NEWID()))%30 AS SaleQty
INTO dbo.ProductSale
FROM Descendants d
WHERE NOT EXISTS (SELECT 1
				FROM dbo.Product p
				WHERE p.ParentProductID = d.ProductID)
GO
CREATE CLUSTERED INDEX CIX_ProdSale ON dbo.ProductSale(ProductID)
GO
 

So let’s test each of the hierarchy methods against this Sales Product table, providing aggregations all the way to the root node:

--======================
-- CTE ON Adjacency List
--======================
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH CTE
As
( SELECT  p.ProductName
       , p.ProductID
       , ParentProductID
       , ps.SaleQty
 FROM dbo.Product p
 LEFT OUTER JOIN dbo.ProductSale ps
 ON ps.ProductID = p.ProductID
 WHERE ParentProductID IS NOT NULL
 UNION ALL
 SELECT p.ProductName
       , p.ProductID
       , p.ParentProductID
       , H.SaleQty
 FROM dbo.Product p
 INNER JOIN CTE H
 ON H.ParentProductID=p.ProductID
)
SELECT c.ProductName, c.ProductID, SUM(SaleQty)  AS SaleQty
FROM CTE c
WHERE SaleQty IS NOT NULL
GROUP BY c.ProductName, c.ProductID
GO 10
--=================
-- Enumerated Path
--=================
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH CTE AS (
 SELECT a.ProductID, b.ParentProductID , b.ProductName
 FROM dbo.ProductEnumeratedPath a
 JOIN dbo.ProductEnumeratedPath b
 on a.TreePath LIKE b.TreePath + '%'
 WHERE NOT EXISTS (  SELECT 1
					  FROM dbo.ProductEnumeratedPath c
					  WHERE c.ParentProductID = a.ProductID)
    )
SELECT ProductName, SUM(ps.SaleQty) AS SaleQty
FROM dbo.ProductSale ps
JOIN CTE c
ON c.ProductID = ps.ProductID
GROUP BY c.ParentProductID,ProductName
GO 10

--=============
-- Nested Sets
--=============
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH Sales AS (
 SELECT lft, ProductName,COALESCE(ps.SaleQty,0) AS SaleQty
 FROM dbo.ProductsNested child
 LEFT OUTER JOIN dbo.ProductSale ps
 ON ps.ProductID = child.ProductID
 )
SELECT parent.ProductName AS ProductName
 , SUM(SaleQty) AS SaleQty, parent.lft
FROM ProductsNested parent
INNER JOIN Sales child
ON child.lft BETWEEN parent.lft AND parent.rgt
GROUP BY parent.ProductName, parent.lft
GO 10

--=============
-- HierarchyID
--=============
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

SELECT p.ProductID
, p.ProductName
, SUM(COALESCE(SaleQty,0)) AS SaleQty
FROM dbo.ProductHID c
INNER JOIN dbo.ProductHID p
ON c.hid.IsDescendantOf(p.hid) = 1
LEFT JOIN dbo.ProductSale s
ON s.ProductID = c.ProductID
GROUP BY p.ProductID,p.ProductName
GO 10

--===============================
-- Kimball Hierarchy Bridge Table
--===============================
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH BridgeCTE AS (
SELECT p.AncestorProductID
 , p.ProductID
 , COALESCE(SaleQty,0) AS SaleQty
FROM dbo.ProductBridge p
LEFT OUTER JOIN dbo.ProductSale ps
on p.ProductID = ps.ProductID
)
SELECT p.ProductName, SUM(SaleQty) AS SaleQty
FROM  BridgeCTE ps
LEFT OUTER JOIN Product p
ON ps.AncestorProductID = p.ProductID
GROUP BY p.ProductName
GO 10

 

I had originally used an INNER JOIN in the HierarchyID query which was quite unresponsive. I then used a temp table which gave a dramatic improvement before realizing my mistake and returning to the query shown with a LEFT OUTER JOIN. Even though the temp table gave far better performance, I didn’t record the results because I wanted to keep the tests similar.

Here’s the results:

We can see the Bridge table is the most performant, while the Nested Sets method is close behind.
Finally, let’s look at returning the aggregates for a node on Level 8 and all of its descendants.

--=========================
-- CTE ON Adjacency List
--=========================

DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH CTE
AS
( SELECT  p.ProductName
       , p.ProductID
       , ParentProductID
       , COALESCE(ps.SaleQty,0) AS SaleQty
 FROM dbo.Product p
 LEFT OUTER JOIN dbo.ProductSale ps
 ON ps.ProductID = p.ProductID
 WHERE ParentProductID IS NOT NULL
 UNION ALL
 SELECT p.ProductName
       , p.ProductID
       , p.ParentProductID
       , H.SaleQty
 FROM dbo.Product p
 INNER JOIN CTE H
 ON H.ParentProductID=p.ProductID
)
SELECT c.ProductName, c.ProductID, SUM(SaleQty)  AS SaleQty
FROM CTE c
WHERE ParentProductID = 3280 OR ProductID = 3280
GROUP BY c.ProductName, c.ProductID
GO 10

--=========================
-- Enumerated Path
--=========================
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH cte AS (
	SELECT a.ProductID, a.TreePath, COALESCE(ps.SaleQty,0) AS SaleQty
	FROM dbo.ProductEnumeratedPath a
	LEFT JOIN dbo.ProductSale ps
	ON ps.ProductID = a.ProductID
	)
, Sales AS (
SELECT p.ProductName,P.TreePath, SUM(c.SaleQty) AS SaleQty
FROM cte c
INNER JOIN dbo.ProductEnumeratedPath p
ON c.TreePath LIKE p.TreePath + '%'
GROUP BY p.ProductName,P.TreePath
)
SELECT c.ProductName, SaleQty
FROM Sales c
INNER JOIN dbo.ProductEnumeratedPath p
ON c.TreePath LIKE p.TreePath + '%'
WHERE p.ProductID = 3280
GO 10

--=========================
-- Nested Sets
--=========================
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH Sales AS (
 SELECT lft, ProductName,COALESCE(ps.SaleQty,0) AS SaleQty
 FROM dbo.ProductsNested child
 LEFT OUTER JOIN dbo.ProductSale ps
 ON ps.ProductID = child.ProductID
 )
SELECT parent.ProductName AS ProductName
 , SUM(SaleQty) AS SaleQty
FROM ProductsNested parent
INNER JOIN Sales child
ON child.lft BETWEEN parent.lft AND parent.rgt
WHERE parent.ProductID IN (SELECT child.ProductID
							FROM ProductsNested parent
							INNER JOIN ProductsNested child
							ON parent.ProductID = 3280
							AND child.lft BETWEEN parent.lft AND parent.rgt )
GROUP BY parent.ProductName
GO 10
--=========================
-- HierarchyID
--=========================
SET NOCOUNT ON;
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH CTE AS (SELECT c.hid
				, COALESCE(SaleQty,0) AS SaleQty
				FROM dbo.ProductHID c
				LEFT JOIN dbo.ProductSale s
				ON s.ProductID = c.ProductID )
SELECT p.ProductID
	, p.ProductName
	, SUM(SaleQty) AS SaleQty
FROM cte c
INNER JOIN dbo.ProductHID p
ON c.hid.IsDescendantOf(p.hid) = 1
WHERE P.ProductID IN (	SELECT c.ProductID
						FROM dbo.ProductHID c
						JOIN dbo.ProductHID p
						ON p.ProductID = 3280
						AND c.hid.IsDescendantOf(p.hid) = 1)
GROUP BY p.ProductID,p.ProductName
GO 10
--===============================
-- Kimball Hierarchy Bridge Table
--===============================
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

;WITH BridgeCTE AS (
	SELECT p.AncestorProductID
	 , p.ProductID
	 , COALESCE(SaleQty,0) AS SaleQty
	FROM dbo.ProductBridge p
	LEFT OUTER JOIN dbo.ProductSale ps
	ON p.ProductID = ps.ProductID
)
SELECT p.ProductName, SUM(SaleQty) AS SaleQty
FROM BridgeCTE ps
LEFT OUTER JOIN dbo.Product p
ON ps.AncestorProductID = p.ProductID
WHERE p.ProductID IN (SELECT p.ProductID
					FROM ProductBridge a
					JOIN dbo.Product p
					on a.ProductID = p.ProductID
					WHERE a.AncestorProductID  = 3280 )
GROUP BY p.ProductName
GO 10
 

Here are the results:

Again, the Kimball Bridge table is the fastest method to return results, followed by Nested Sets and a long way behind come the other methods.

Below is a graph showing the performance results.

It is quite clear that the Kimball Bridge table performs the fastest for the aggregations, and performs adequately to return the descendants of the root node. It is also clear that Nested Sets perform incredibly well, with the Recursive CTE performing the poorest.

Advertisements
Posted in: Data Modeling, SQL