Performance Issues using Aggregate MAX function within a temp table  
Author Message
kristi7717





PostPosted: Wed Apr 27 10:04:01 CDT 2005 Top

SQL Server Developer >> Performance Issues using Aggregate MAX function within a temp table

Is there a known performance issue using the aggregate SQL function MAX
within a temp table?

Assume that I create a temp table as follows:

create table #tempTable
(
value integer not null
)


and then SELECT the MAX value of the column within this table. If this
SELECT is performed a large number of times (i.e., within a loop) where the
table is "cleared" at the end (i.e., DELETE FROM #tempTable), it appears
that the average response time for using the aggregate function MAX is worse
than the average response time using the statement:

SELECT TOP 1 value from #tempTable ORDER BY value DESC



However, if I make one of the following changes:
(1) Create and then drop the table (i.e., DROP TABLE #tempTable) for each
iteration within the loop, or
(2) Eliminate the use of a temp table (i.e., change "#tempTable" to
"tempTable")

then the performance of the MAX function is as good as the using the "TOP 1"
statement above.


Is there any explanation for this? I cannot find any documented reason for
this behavior.

SQL code that exploits the performance issue is listed below. This defect
can be seen with as few as 1000 iterations and 5 rows. The response time is
measured in milliseconds. This may not seem like much, but even a small
number of rows over a high number of iterations (10,000 or higher) causes a
significant delay.



--
-- BEGIN SQL CODE BEGIN SQL CODE BEGIN SQL CODE
--

set nocount on

--
-- SQL Code used to exploit aggregate MAX function timing issue.
--

-- and then take obtain the maximum value using MAX and TOP 1 and
-- record the response time in milliseconds.
--

-- results (stored in #tempTimeResults) and obtain the MIN, MAX
-- and AVG times for using both MAX and TOP 1.
--

-- of rows on each pass.
--
-- This test in the following configuration will take approximately 2
minutes:
-- Max Passes = 5
-- Max Iterations = 1000
-- Rows incremented by an additional 5 each pass
--
--

--
-- Temp table to hold results of each pass
--
create table #tempResults
(
MaxRows integer,
MinDurationUsingMax numeric(13,2),
AvgDurationUsingMax numeric(13,2),
MaxDurationUsingMax numeric(13,2),
MinDurationUsingTop numeric(13,2),
AvgDurationUsingTop numeric(13,2),
MaxDurationUsingTop numeric(13,2)
)


--
-- Temp table to hold response time from each iteration
--
create table #tempTimeResults
(
DurationUsingMax integer,
DurationUsingTop integer
)



--
-- Temp table to hold the data on which the test will be performed
--

-- For better performance comment out the creation of the
-- following table:
--
create table #tempTable
(
value integer not null
)

-- Constants



pass

-- Loop control variables






















begin





pass




+ cast(CURRENT_TIMESTAMP as varchar)




begin

begin transaction

-- For better performance uncomment the following lines of code:
--
-- create table #tempTable
-- (
-- value integer not null
-- )




begin

--
-- Generate "random" integer
--
insert into #tempTable
(value)
values
(DATEPART(ms, GETDATE()))






--
-- Obtain the response time using MAX
--








--
-- Obtain the response time using TOP 1
--







--
-- Record the Response time for this iteration
--
insert into #tempTimeResults
(DurationUsingMax, DurationUsingTop)
values







delete from #tempTable

-- For better performance comment out the preceeding line
-- and uncomment the following line of code:
--
-- drop table #tempTable

commit




--
-- Record the statistics for this pass
--
insert into #tempResults

min(DurationUsingMax),
avg(DurationUsingMax),
max(DurationUsingMax),
min(DurationUsingTop),
avg(DurationUsingTop),
max(DurationUsingTop)
from #tempTimeResults


delete from #tempTimeResults








select * from #tempResults


-- For better performance comment the following line of code:
--
drop table #tempTable

drop table #tempTimeResults

drop table #tempResults

set nocount off

--
-- END SQL CODE END SQL CODE END SQL CODE
--

SQL Server215  
 
 
Uri





PostPosted: Wed Apr 27 10:04:01 CDT 2005 Top

SQL Server Developer >> Performance Issues using Aggregate MAX function within a temp table Hi
You can create an indexed view. See Steve's Kass example
create table T (
i int,
filler char(1000) default 'abc'
)
go

create view T_count with schemabinding as
select
cast(i as bit) as val,
count_big(*) T_count
from dbo.T group by cast(i as bit)
go

create unique clustered index T_count_uci on T_count(val)
go
insert into T(i)
select OrderID
from Northwind..[Order Details]
go
set statistics io on
select count(*) from T
go
select sum(T_count) from T_count with (noexpand)
go
set statistics io off
-- uses an efficient query plan on the materialized view

go
drop view T_count
drop table T







> Is there a known performance issue using the aggregate SQL function MAX
> within a temp table?
>
> Assume that I create a temp table as follows:
>
> create table #tempTable
> (
> value integer not null
> )
>
>
> and then SELECT the MAX value of the column within this table. If this
> SELECT is performed a large number of times (i.e., within a loop) where
the
> table is "cleared" at the end (i.e., DELETE FROM #tempTable), it appears
> that the average response time for using the aggregate function MAX is
worse
> than the average response time using the statement:
>
> SELECT TOP 1 value from #tempTable ORDER BY value DESC
>
>
>
> However, if I make one of the following changes:
> (1) Create and then drop the table (i.e., DROP TABLE #tempTable) for each
> iteration within the loop, or
> (2) Eliminate the use of a temp table (i.e., change "#tempTable" to
> "tempTable")
>
> then the performance of the MAX function is as good as the using the "TOP
1"
> statement above.
>
>
> Is there any explanation for this? I cannot find any documented reason
for
> this behavior.
>
> SQL code that exploits the performance issue is listed below. This defect
> can be seen with as few as 1000 iterations and 5 rows. The response time
is
> measured in milliseconds. This may not seem like much, but even a small
> number of rows over a high number of iterations (10,000 or higher) causes
a
> significant delay.
>
>
>
> --
> -- BEGIN SQL CODE BEGIN SQL CODE BEGIN SQL CODE
> --
>
> set nocount on
>
> --
> -- SQL Code used to exploit aggregate MAX function timing issue.
> --

> -- and then take obtain the maximum value using MAX and TOP 1 and
> -- record the response time in milliseconds.
> --

> -- results (stored in #tempTimeResults) and obtain the MIN, MAX
> -- and AVG times for using both MAX and TOP 1.
> --

> -- of rows on each pass.
> --
> -- This test in the following configuration will take approximately 2
> minutes:
> -- Max Passes = 5
> -- Max Iterations = 1000
> -- Rows incremented by an additional 5 each pass
> --
> --
>
> --
> -- Temp table to hold results of each pass
> --
> create table #tempResults
> (
> MaxRows integer,
> MinDurationUsingMax numeric(13,2),
> AvgDurationUsingMax numeric(13,2),
> MaxDurationUsingMax numeric(13,2),
> MinDurationUsingTop numeric(13,2),
> AvgDurationUsingTop numeric(13,2),
> MaxDurationUsingTop numeric(13,2)
> )
>
>
> --
> -- Temp table to hold response time from each iteration
> --
> create table #tempTimeResults
> (
> DurationUsingMax integer,
> DurationUsingTop integer
> )
>
>
>
> --
> -- Temp table to hold the data on which the test will be performed
> --
>
> -- For better performance comment out the creation of the
> -- following table:
> --
> create table #tempTable
> (
> value integer not null
> )
>
> -- Constants



> pass
>
> -- Loop control variables



>


>


>

>
>



>


>
>

> begin

>

>

> pass
>



> + cast(CURRENT_TIMESTAMP as varchar)
>
>
>

> begin
>
> begin transaction
>
> -- For better performance uncomment the following lines of code:
> --
> -- create table #tempTable
> -- (
> -- value integer not null
> -- )
>
>


> begin
>
> --
> -- Generate "random" integer
> --
> insert into #tempTable
> (value)
> values
> (DATEPART(ms, GETDATE()))
>

>

>
>
> --
> -- Obtain the response time using MAX
> --



>

>
>
>
> --
> -- Obtain the response time using TOP 1
> --



>

>
>
> --
> -- Record the Response time for this iteration
> --
> insert into #tempTimeResults
> (DurationUsingMax, DurationUsingTop)
> values

>
>

>
>
>
> delete from #tempTable
>
> -- For better performance comment out the preceeding line
> -- and uncomment the following line of code:
> --
> -- drop table #tempTable
>
> commit
>

>
>
> --
> -- Record the statistics for this pass
> --
> insert into #tempResults

> min(DurationUsingMax),
> avg(DurationUsingMax),
> max(DurationUsingMax),
> min(DurationUsingTop),
> avg(DurationUsingTop),
> max(DurationUsingTop)
> from #tempTimeResults
>
>
> delete from #tempTimeResults
>
>
>

>
>
>
>
> select * from #tempResults
>
>
> -- For better performance comment the following line of code:
> --
> drop table #tempTable
>
> drop table #tempTimeResults
>
> drop table #tempResults
>
> set nocount off
>
> --
> -- END SQL CODE END SQL CODE END SQL CODE
> --
>
>


 
 
Fred





PostPosted: Wed Apr 27 10:53:26 CDT 2005 Top

SQL Server Developer >> Performance Issues using Aggregate MAX function within a temp table Please pardon my ignorance, but I fail to see how this affects my orginal
problem with MAX.





> Hi
> You can create an indexed view. See Steve's Kass example
> create table T (
> i int,
> filler char(1000) default 'abc'
> )
> go
>
> create view T_count with schemabinding as
> select
> cast(i as bit) as val,
> count_big(*) T_count
> from dbo.T group by cast(i as bit)
> go
>
> create unique clustered index T_count_uci on T_count(val)
> go
> insert into T(i)
> select OrderID
> from Northwind..[Order Details]
> go
> set statistics io on
> select count(*) from T
> go
> select sum(T_count) from T_count with (noexpand)
> go
> set statistics io off
> -- uses an efficient query plan on the materialized view
>
> go
> drop view T_count
> drop table T
>
>
>
>
>


> > Is there a known performance issue using the aggregate SQL function MAX
> > within a temp table?
> >
> > Assume that I create a temp table as follows:
> >
> > create table #tempTable
> > (
> > value integer not null
> > )
> >
> >
> > and then SELECT the MAX value of the column within this table. If this
> > SELECT is performed a large number of times (i.e., within a loop) where
> the
> > table is "cleared" at the end (i.e., DELETE FROM #tempTable), it appears
> > that the average response time for using the aggregate function MAX is
> worse
> > than the average response time using the statement:
> >
> > SELECT TOP 1 value from #tempTable ORDER BY value DESC
> >
> >
> >
> > However, if I make one of the following changes:
> > (1) Create and then drop the table (i.e., DROP TABLE #tempTable) for
each
> > iteration within the loop, or
> > (2) Eliminate the use of a temp table (i.e., change "#tempTable" to
> > "tempTable")
> >
> > then the performance of the MAX function is as good as the using the
"TOP
> 1"
> > statement above.
> >
> >
> > Is there any explanation for this? I cannot find any documented reason
> for
> > this behavior.
> >
> > SQL code that exploits the performance issue is listed below. This
defect
> > can be seen with as few as 1000 iterations and 5 rows. The response
time
> is
> > measured in milliseconds. This may not seem like much, but even a small
> > number of rows over a high number of iterations (10,000 or higher)
causes
> a
> > significant delay.
> >
> >
> >
> > --
> > -- BEGIN SQL CODE BEGIN SQL CODE BEGIN SQL CODE
> > --
> >
> > set nocount on
> >
> > --
> > -- SQL Code used to exploit aggregate MAX function timing issue.
> > --

> > -- and then take obtain the maximum value using MAX and TOP 1 and
> > -- record the response time in milliseconds.
> > --

> > -- results (stored in #tempTimeResults) and obtain the MIN, MAX
> > -- and AVG times for using both MAX and TOP 1.
> > --

> > -- of rows on each pass.
> > --
> > -- This test in the following configuration will take approximately 2
> > minutes:
> > -- Max Passes = 5
> > -- Max Iterations = 1000
> > -- Rows incremented by an additional 5 each pass
> > --
> > --
> >
> > --
> > -- Temp table to hold results of each pass
> > --
> > create table #tempResults
> > (
> > MaxRows integer,
> > MinDurationUsingMax numeric(13,2),
> > AvgDurationUsingMax numeric(13,2),
> > MaxDurationUsingMax numeric(13,2),
> > MinDurationUsingTop numeric(13,2),
> > AvgDurationUsingTop numeric(13,2),
> > MaxDurationUsingTop numeric(13,2)
> > )
> >
> >
> > --
> > -- Temp table to hold response time from each iteration
> > --
> > create table #tempTimeResults
> > (
> > DurationUsingMax integer,
> > DurationUsingTop integer
> > )
> >
> >
> >
> > --
> > -- Temp table to hold the data on which the test will be performed
> > --
> >
> > -- For better performance comment out the creation of the
> > -- following table:
> > --
> > create table #tempTable
> > (
> > value integer not null
> > )
> >
> > -- Constants

#tempTable


each
> > pass
> >
> > -- Loop control variables



> >


> >


> >

> >
> >



> >


> >
> >

> > begin

> >

> >

> > pass
> >



> > + cast(CURRENT_TIMESTAMP as varchar)
> >
> >
> >

> > begin
> >
> > begin transaction
> >
> > -- For better performance uncomment the following lines of code:
> > --
> > -- create table #tempTable
> > -- (
> > -- value integer not null
> > -- )
> >
> >


> > begin
> >
> > --
> > -- Generate "random" integer
> > --
> > insert into #tempTable
> > (value)
> > values
> > (DATEPART(ms, GETDATE()))
> >

> >

> >
> >
> > --
> > -- Obtain the response time using MAX
> > --



> >

@endTime)
> >
> >
> >
> > --
> > -- Obtain the response time using TOP 1
> > --



> >

@endTime)
> >
> >
> > --
> > -- Record the Response time for this iteration
> > --
> > insert into #tempTimeResults
> > (DurationUsingMax, DurationUsingTop)
> > values

> >
> >

> >
> >
> >
> > delete from #tempTable
> >
> > -- For better performance comment out the preceeding line
> > -- and uncomment the following line of code:
> > --
> > -- drop table #tempTable
> >
> > commit
> >

> >
> >
> > --
> > -- Record the statistics for this pass
> > --
> > insert into #tempResults

> > min(DurationUsingMax),
> > avg(DurationUsingMax),
> > max(DurationUsingMax),
> > min(DurationUsingTop),
> > avg(DurationUsingTop),
> > max(DurationUsingTop)
> > from #tempTimeResults
> >
> >
> > delete from #tempTimeResults
> >
> >
> >

> >
> >
> >
> >
> > select * from #tempResults
> >
> >
> > -- For better performance comment the following line of code:
> > --
> > drop table #tempTable
> >
> > drop table #tempTimeResults
> >
> > drop table #tempResults
> >
> > set nocount off
> >
> > --
> > -- END SQL CODE END SQL CODE END SQL CODE
> > --
> >
> >
>
>


 
 
Gert-Jan





PostPosted: Thu Apr 28 18:56:50 CDT 2005 Top

SQL Server Developer >> Performance Issues using Aggregate MAX function within a temp table Hi Fred,

What I saw when I looked at the query plan, I saw a Sort for the TOP 1
query and an Aggregate for the MAX query. In both plans, the estimated
table scan cost 13%. The estimation of the Sort was 4% and the
estimation of the Aggregate was 0%. This means that SQL-Server thinks
the Aggregate is faster than the Sort.

I ran your repro script, but I am very sceptical about the results. They
do not seem correct, and I think it has to do with the way the time
measurements are done. When I did something like this (pseudo code)

@Start = current_timestamp
SELECT TOP 1
Loop 1000 times: SELECT MAX
SELECT TOP 1
@Stop = current_timestamp

and

@Start = current_timestamp
SELECT MAX
Loop 1000 times: SELECT TOP 1
SELECT MAX
@Stop = current_timestamp

I got quite different results. Most of the times, the MAX approach is
marginally faster. But really, the only thing I could conclude was that
there is no significant performance difference.

Gert-Jan



>
> Is there a known performance issue using the aggregate SQL function MAX
> within a temp table?
>
> Assume that I create a temp table as follows:
>
> create table #tempTable
> (
> value integer not null
> )
>
> and then SELECT the MAX value of the column within this table. If this
> SELECT is performed a large number of times (i.e., within a loop) where the
> table is "cleared" at the end (i.e., DELETE FROM #tempTable), it appears
> that the average response time for using the aggregate function MAX is worse
> than the average response time using the statement:
>
> SELECT TOP 1 value from #tempTable ORDER BY value DESC
>
> However, if I make one of the following changes:
> (1) Create and then drop the table (i.e., DROP TABLE #tempTable) for each
> iteration within the loop, or
> (2) Eliminate the use of a temp table (i.e., change "#tempTable" to
> "tempTable")
>
> then the performance of the MAX function is as good as the using the "TOP 1"
> statement above.
>
> Is there any explanation for this? I cannot find any documented reason for
> this behavior.
>
> SQL code that exploits the performance issue is listed below. This defect
> can be seen with as few as 1000 iterations and 5 rows. The response time is
> measured in milliseconds. This may not seem like much, but even a small
> number of rows over a high number of iterations (10,000 or higher) causes a
> significant delay.
>
> --
> -- BEGIN SQL CODE BEGIN SQL CODE BEGIN SQL CODE
> --
>
> set nocount on
>
> --
> -- SQL Code used to exploit aggregate MAX function timing issue.
> --

> -- and then take obtain the maximum value using MAX and TOP 1 and
> -- record the response time in milliseconds.
> --

> -- results (stored in #tempTimeResults) and obtain the MIN, MAX
> -- and AVG times for using both MAX and TOP 1.
> --

> -- of rows on each pass.
> --
> -- This test in the following configuration will take approximately 2
> minutes:
> -- Max Passes = 5
> -- Max Iterations = 1000
> -- Rows incremented by an additional 5 each pass
> --
> --
>
> --
> -- Temp table to hold results of each pass
> --
> create table #tempResults
> (
> MaxRows integer,
> MinDurationUsingMax numeric(13,2),
> AvgDurationUsingMax numeric(13,2),
> MaxDurationUsingMax numeric(13,2),
> MinDurationUsingTop numeric(13,2),
> AvgDurationUsingTop numeric(13,2),
> MaxDurationUsingTop numeric(13,2)
> )
>
> --
> -- Temp table to hold response time from each iteration
> --
> create table #tempTimeResults
> (
> DurationUsingMax integer,
> DurationUsingTop integer
> )
>
> --
> -- Temp table to hold the data on which the test will be performed
> --
>
> -- For better performance comment out the creation of the
> -- following table:
> --
> create table #tempTable
> (
> value integer not null
> )
>
> -- Constants



> pass
>
> -- Loop control variables



>


>


>

>



>


>

> begin

>

>

> pass
>



> + cast(CURRENT_TIMESTAMP as varchar)
>

> begin
>
> begin transaction
>
> -- For better performance uncomment the following lines of code:
> --
> -- create table #tempTable
> -- (
> -- value integer not null
> -- )
>


> begin
>
> --
> -- Generate "random" integer
> --
> insert into #tempTable
> (value)
> values
> (DATEPART(ms, GETDATE()))
>

>

>
> --
> -- Obtain the response time using MAX
> --



>

>
> --
> -- Obtain the response time using TOP 1
> --



>

>
> --
> -- Record the Response time for this iteration
> --
> insert into #tempTimeResults
> (DurationUsingMax, DurationUsingTop)
> values

>

>
> delete from #tempTable
>
> -- For better performance comment out the preceeding line
> -- and uncomment the following line of code:
> --
> -- drop table #tempTable
>
> commit
>

>
> --
> -- Record the statistics for this pass
> --
> insert into #tempResults

> min(DurationUsingMax),
> avg(DurationUsingMax),
> max(DurationUsingMax),
> min(DurationUsingTop),
> avg(DurationUsingTop),
> max(DurationUsingTop)
> from #tempTimeResults
>
> delete from #tempTimeResults
>

>
> select * from #tempResults
>
> -- For better performance comment the following line of code:
> --
> drop table #tempTable
>
> drop table #tempTimeResults
>
> drop table #tempResults
>
> set nocount off
>
> --
> -- END SQL CODE END SQL CODE END SQL CODE
> --