Friday, March 30, 2012

More ntext trouble

I'm trying to build a REPLACE() function for NTEXT values.
I'm facing a lot of trouble doing it, and I think it could be a very common problem.
I think the biggest problem is when one READTEXT a chunk of a NTEXT value, I couldn't find any way to put that chunk into an NVARCHAR in order to postprocess it.
Let me show you some source code:

BEGIN TRAN
DECLARE @.ptrval varbinary(16)
DECLARE @.datalen integer
DECLARE @.chunk nvarchar(4000)
DECLARE @.i integer
DECLARE @.q integer
DECLARE @.maxlenvarchar integer

-- LET THE POINTER @.ptrval POINTS TO THE NTEXT VALUE, GATHER THE REAL LENGTH INTO @.datalen (300000 bytes approx.)
SELECT @.ptrval = TEXTPTR(ntext_value), @.datalen = DATALENGTH(ntext_value)/2
FROM my_table
WHERE id = 8293

SELECT @.i = 0, @.maxlenvarchar = 4000

-- I WANT TO REPLACE ALL OCCURRENCIES OF CHARACTER 'A' WITH CHARACTER 'B'
-- INTO THE NTEXT FIELD. SO, I THOUGHT THAT IT COULD BE POSSIBLE DOING IT THIS WAY:
-- 1) READ THE NTEXT VALUE IN CHUNKS OF 4000 BYTES
-- 2) PUT EACH CHUNK INTO AN NVARCHAR VARIABLE (@.chunk)
-- 3) APPLY THE REPLACE FUNCTION TO EACH @.chunk
-- 4) REBUILD THE NTEXT VALUE AND STORE IT AGAIN INTO THE TABLE

-- HERE, IN THE WHILE LOOP IS PART OF THE JOB, I COULDN'T GO BEYOND THIS
WHILE @.datalen > 0
BEGIN
IF @.datalen > @.maxlenvarchar
SELECT @.q = @.maxlenvarchar
ELSE
SELECT @.q = @.datalen

-- THIS COMMAND READS 4000 BYTES OF THE NTEXT VALUE,..
-- BUT HOW CAN I STORE IT INTO A NVARCHAR IN ORDER TO USE REPLACE() ??
READTEXT my_table.ntext_value @.ptrval @.i @.q

SELECT @.i = @.i + @.maxlenvarchar, @.datalen = @.datalen - @.q

-- I DON'T KNOW HOW TO "PRINT" THE CHUNK, THIS COMMAND PRINTS THE POINTER VALUE (HEX)
PRINT @.ptrval

-- I CAN SEE THE TEXT POINTER IS ALWAYS VALID
PRINT TEXTVALID ('my_table.ntext_value', @.ptrval)
END

COMMIT

-- WHAT I HAVEN'T SEE YET IS THE PART OF REBUILDING THE NTEXT VAL AND WRITE IT AGAIN INTO THE DATABASE.Have you tried using the PATINDEX and UPDATETEXT functions ?|||I've read about them ... but I cannot imagine in which way I should use them in order to do what I want.|||Here you go:

code example (http://www.1aspstreet.com/xq/ASP/txtCodeId.395/lngWid.5/qx/vb/scripts/ShowCode.htm)|||/*
SP on WWW page linked in rnealejr's message would process only a table with one record.
It also "rereplace".
*/

/* DESCRIPTION
Replacing is thought to be very simple, but this is not right in blobs.

I use overlaying SUBSTRING frame of 4000 chars
sliding by (4000+1-len(@.OldStr))
-> 4000 chars sure
Cursor *MUST* be used, because of statement UPDATETEXT,
SQL pseudoprocedure, which does not have FROM part
and needs poiter to blob, unique for each blob column and table row.

Temp tables:
#BlobTable - table with Blobs (to be updated)
#BlobPos - current pos to be sure not to "rereplace" 'A'->'AA' ('XXAXX'->'XXAAAAAAAAAAAA...XX')
#BlobRes - results of single find
#BlobUpd - list for blob updates

Beware of replacing data with table option 'text in row' set !!!
4000 limit is for nvarchar(international,UNICODE), use 8000 varchar for single language.

*/

--INITIALIZATION (creating temp tables, generating some 16kB blob data)
set nocount on
set textsize 2147483647
if object_id('tempdb..#BlobTable') is not null drop table #BlobTable
if object_id('tempdb..#BlobPos') is not null drop table #BlobPos
if object_id('tempdb..#BlobRes') is not null drop table #BlobRes
if object_id('tempdb..#BlobUpd') is not null drop table #BlobUpd
GO
create table #BlobTable (
id int identity(1,1) primary key
,Blob ntext not null
,BlobCopy ntext not null
)
create table #BlobPos (
id int primary key
,ptr varbinary(16) not null
,BlobLen int not null
,BlobPos int not null
,BlobDelta int not null
)
create table #BlobRes (
id int primary key
,Pos int not null
)
create table #BlobUpd (
id int not null
,Pos int not null
)
GO
--generating test data (10 different blobs, about 80000 chars each)
declare @.ptr varbinary(16)
declare @.ptrCopy varbinary(16)
declare @.index int
declare @.addtext nvarchar(4000)
declare @.Counter int
set @.addtext=replicate('X',4000)
while 10>(select count(*) from #BlobTable) begin
insert #BlobTable(Blob,BlobCopy)
select
replicate('X',count(*))+'abcdefghijklmabcdefghijkl m'
,replicate('X',count(*))+'abcdefghijklmabcdefghijk lm'
from #BlobTable
select @.ptr=textptr(Blob),@.index=datalength(Blob)/2,@.ptrCopy=textptr(BlobCopy)
from #BlobTable
where id=SCOPE_IDENTITY()
set @.Counter=0
while @.Counter<20 begin
set @.index=@.index
updatetext #BlobTable.Blob @.ptr @.index 0 @.addtext
set @.Counter=@.Counter+1
end
end
update #BlobTable set
BlobCopy=Blob
GO
--INITIALIZATION-END

--REPLACE ('efgh' WITH 'efgh?')
declare @.OldStr nvarchar(4000) --up to 4000 UNICODE chars
declare @.OldStrLike nvarchar(4000)
declare @.length int
declare @.lengthDelta int
declare @.ptr varbinary(16)
declare @.NewStr nvarchar(4000) --up to 4000 UNICODE chars
declare @.index int
declare @.c cursor
--init of replace
set @.OldStr='efgh'
set @.NewStr='efgh?'
set @.lengthDelta=len(@.NewStr)-len(@.OldStr)
set @.length=len(@.OldStr)
insert #BlobPos(id,ptr,BlobLen,BlobPos,BlobDelta)
select id,ptr=textptr(Blob),BlobLen=datalength(Blob)/2, BlobPos=0,BlobDelta=0
from #BlobTable
--loop of finding
insert #BlobRes(id,Pos)
select bt.id,charindex(@.OldStr,substring(Blob,bp.BlobPos+ 1,4000))
from #BlobTable bt
join #BlobPos bp
on bt.id=bp.id and (bp.BlobPos+@.length)<=bp.BlobLen
while @.@.rowcount>0 begin
insert #BlobUpd(id,Pos)
select br.id,bp.BlobPos+br.Pos-1+bp.BlobDelta
from #BlobRes br
join #BlobPos bp
on br.id=bp.id and br.Pos>0
update bp set
bp.BlobPos=bp.BlobPos+case when br.Pos>0 then br.Pos else 4000+1-len(@.OldStr) end
,bp.BlobDelta=bp.BlobDelta+case when br.Pos>0 then @.lengthDelta else 0 end
from #BlobPos bp
join #BlobRes br
on bp.id=br.id
delete #BlobRes
insert #BlobRes(id,Pos)
select bt.id,charindex(@.OldStr,substring(Blob,bp.BlobPos+ 1,4000))
from #BlobTable bt
join #BlobPos bp
on bt.id=bp.id and (bp.BlobPos+@.length)<=bp.BlobLen
end
--update of blob by list
set @.c=cursor local forward_only static for
select bu.Pos,bp.ptr
from #BlobUpd bu
join #BlobPos bp
on bu.id=bp.id
open @.c
fetch next from @.c into @.index,@.ptr
while @.@.fetch_status=0 begin
updatetext #BlobTable.Blob @.ptr @.index @.length @.NewStr
fetch next from @.c into @.index,@.ptr
end
close @.c
deallocate @.c
GO
--END OF REPLACE

--FINALIZE
set nocount off
select * from #BlobTable
drop table #BlobTable
GO

/*
This algorithm is really slow, especially on large blobs.
I found many fast optimalizations for this almost simple algorithm,
but I realized that I am not so much interested in blobs.
I should spend a week of freetime or more to merge ideas and optimize, too much for fun.
*/

No comments:

Post a Comment