Sunday, August 31, 2014

Looping is very slow on large data set

Question: Can there ever be more than two rows you have to merge in a single data set?


I have a simplified example. You will have to forgive me for not using your exact table structure for this example. I just wanted something simple to illustrate the concept.


you should be able to adapt this approach to your solution.








set ansi_warnings off

set nocount on



declare @t as table (

JobKey int not null

,colA char(1)

,colB char(1)

,colC char(1)

)



--these will merge
insert into @t values (1,'a' ,'' ,null)

insert into @t values (1,'a' ,null,'c' )

insert into @t values (1,' ' ,'b' ,null)



--these will NOT merge (i suppose you could argue that the 1st two could be merged)
insert into @t values (2,'x' ,null,null)

insert into @t values (2,'x' ,null,'z' )

insert into @t values (2,'w' ,'y' ,'K' )



--these will merge
insert into @t values (3,'c' ,'' ,' ' )

insert into @t values (3,'c' ,'d' ,' ' )

insert into @t values (3,'c' ,null,'e' )





--first, let's get replaces the spaces with null
update @t

set colA=case when LEN (ltrim (rtrim (colA))) > 0 then colA end

,colB=case when LEN (ltrim (rtrim (colB))) > 0 then colB end

,colC=case when LEN (ltrim (rtrim (colC))) > 0 then colC end

;

with cteClean(JobKey, colA, colB, colC)

AS

(

select JobKey,MIN(colA),MIN(colB),MIN(colC)

from @t

group by JobKey

having COUNT(distinct(colA))=1

and COUNT(distinct(colB))=1

and COUNT(distinct(colC))=1

)

select * from cteClean

union

select * from @t t where t.JobKey not in (select c.jobkey from cteClean c)

;






No comments:

Post a Comment