How To Estimate The Compression Ratio For All Tables In Database

How To Estimate The Compression Ratio For All Tables In Database

Wondering how much space you could save by compression all those disk eating tables?

This snippet will look at all the tables in your database and estimate how much you would save by applying ROW or PAGE compression on them. There is nothing special about to it, it just makes use of the sp_estimate_data_compression_savings on all your user tables and organizes the results.

The Code

SET NOCOUNT ON;

DECLARE @Objects TABLE
(
	ItemKey INT IDENTITY (1, 1) PRIMARY KEY CLUSTERED,
	SchemaName SYSNAME NOT NULL,
	ObjectName SYSNAME NOT NULL,
	IndexID INT NOT NULL,
	PartitionNumber INT NOT NULL,
	CurrentCompressionType SYSNAME NOT NULL,
	EstimatedSizeWithNoCompressionKB INT NULL,
	EstimatedSizeWithRowCompressionKB INT NULL,
	EstimatedSizeWithPageCompressionKB INT NULL,
	EstimatedCompressionRatioRowVsNone AS 100 - ((EstimatedSizeWithRowCompressionKB * 100) / (NULLIF(EstimatedSizeWithNoCompressionKB, 0))),
	EstimatedCompressionRatioPageVsNone AS 100 - ((EstimatedSizeWithPageCompressionKB * 100) / (NULLIF(EstimatedSizeWithNoCompressionKB, 0)))
);

INSERT INTO @Objects
(
	SchemaName,
	ObjectName,
	IndexID,
	PartitionNumber,
	CurrentCompressionType
)
SELECT
	S.name,
	T.name,
	I.index_id,
	P.partition_number,
	P.data_compression_desc
FROM
	sys.tables AS T
	INNER JOIN sys.schemas AS S
		ON S.schema_id = T.schema_id
	INNER JOIN sys.indexes AS I
		ON I.object_id = T.object_id
	INNER JOIN sys.partitions AS P
		ON P.object_id = T.object_id
		AND P.index_id = I.index_id
WHERE
	T.type = 'U'
ORDER BY
	S.name,
	T.name,
	I.index_id,
	P.partition_number
;

DECLARE @Results TABLE
(
	ObjectName SYSNAME NOT NULL,
	SchemaName SYSNAME NOT NULL,
	IndexID INT NOT NULL,
	PartitionNumber INT NOT NULL,
	SizeWithCurrentCompression INT NOT NULL,
	SizeWithRequestedCompression INT NOT NULL,
	SampleSizeWithCurrentCompression INT NOT NULL,
	SampleSizeWithRequestedCompression INT NOT NULL
);

DECLARE @ItemKey INT = 1;
DECLARE @LastKey INT = (SELECT MAX(ItemKey) FROM @Objects);
WHILE @ItemKey <= @LastKey
BEGIN

	DECLARE @SchemaName SYSNAME, @ObjectName SYSNAME, @IndexID INT, @PartitionNumber INT;
	SELECT
		@SchemaName = SchemaName,
		@ObjectName = ObjectName,
		@IndexID = IndexID,
		@PartitionNumber = PartitionNumber
	FROM
		@Objects
	WHERE
		ItemKey = @ItemKey;

	DELETE FROM @Results;
	INSERT INTO @Results
	EXECUTE sys.sp_estimate_data_compression_savings
		@schema_name = @SchemaName,
		@object_name = @ObjectName,
		@index_id = @IndexID,
		@partition_number = @PartitionNumber,
		@data_compression = 'NONE';
	UPDATE O
	SET
		O.EstimatedSizeWithNoCompressionKB = R.SizeWithRequestedCompression
	FROM
		@Objects AS O
		CROSS JOIN @Results AS R
	WHERE
		O.ItemKey = @ItemKey

	DELETE FROM @Results;
	INSERT INTO @Results
	EXECUTE sys.sp_estimate_data_compression_savings
		@schema_name = @SchemaName,
		@object_name = @ObjectName,
		@index_id = @IndexID,
		@partition_number = @PartitionNumber,
		@data_compression = 'ROW';
	UPDATE O
	SET
		O.EstimatedSizeWithRowCompressionKB = R.SizeWithRequestedCompression
	FROM
		@Objects AS O
		CROSS JOIN @Results AS R
	WHERE
		O.ItemKey = @ItemKey

	DELETE FROM @Results;
	INSERT INTO @Results
	EXECUTE sys.sp_estimate_data_compression_savings
		@schema_name = @SchemaName,
		@object_name = @ObjectName,
		@index_id = @IndexID,
		@partition_number = @PartitionNumber,
		@data_compression = 'PAGE';
	UPDATE O
	SET
		O.EstimatedSizeWithPageCompressionKB = R.SizeWithRequestedCompression
	FROM
		@Objects AS O
		CROSS JOIN @Results AS R
	WHERE
		O.ItemKey = @ItemKey

	SET @ItemKey += 1;
END

SELECT
	ItemKey,
	SchemaName,
	ObjectName,
	IndexID,
	PartitionNumber,
	CurrentCompressionType,
	EstimatedSizeWithNoCompressionKB,
	EstimatedSizeWithRowCompressionKB,
	EstimatedSizeWithPageCompressionKB,
	EstimatedCompressionRatioRowVsNone,
	EstimatedCompressionRatioPageVsNone
FROM
	@Objects
ORDER BY
	ItemKey
;

Thoughts

Remember to evaluate CPU usage when choosing a compression type and be weary of it if you’re doing big loads into a table in an ETL process of some sort.

Jorge Candeias's Picture

About Jorge Candeias

Jorge helps organizations build high-performing solutions on the Microsoft tech stack.

London, United Kingdom https://jorgecandeias.github.io