on 2009 Dec 01 6:26 PM
create function bludata.onlyNumber(in val long varchar)
returns long varchar
begin
declare sTemp long varchar;
declare iCount integer;
set iCount=1;
set sTemp='';
while iCount <= length(val) loop
if substring(val,iCount,1) in( '0','1','2','3','4','5','6','7','8','9') then
set sTemp=sTemp+substring(val,iCount,1)
end if;
set iCount=iCount+1
end loop;
return sTemp
end
Actually, set sTemp='' consumes 13.6% of exec time and if line 86.4%
I need to improve because this stored function is executed more than 10k times/minute.
I have rewrited using your tips. Thank you all!
ALTER FUNCTION "bludata"."onlyNumber"(in val long varchar)
RETURNS long varchar
DETERMINISTIC
BEGIN
declare sTemp long varchar;
declare iCount integer;
declare currentSize integer;
declare currentChar char(1);
if patindex('%[^0-9]%', val) = 0 then
return val;
end if;
set iCount = 1;
set sTemp = '';
set currentSize = length(val);
while iCount <= currentSize loop
set currentChar = substring(val, iCount, 1);
if isNumeric(currentChar) = 1 then
set sTemp = sTemp + currentChar;
end if;
set iCount = iCount + 1;
end loop;
return sTemp;
END
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
Did you run some tests and get give us a hint of the performance improvements ?
As usual, there are few "right" answers, only tradeoffs.
The direction to move to optimize this function depends on both your requirements and knowledge of the domain of input strings the function is to process.
For example, if a large proportion of the inputs are 100% numeric, and punctuation is not permitted, then a cheap way to optimize the execution is
create function bludata.onlyNumber(in val long varchar)
returns long varchar
begin
declare sTemp long varchar;
declare iCount integer;
if isnumeric(val) = 1 then return val endif;
set iCount=1;
set sTemp='';
while iCount <= length(val) loop
if substring(val,iCount,1) in( '0','1','2','3','4','5','6','7','8','9') then
set sTemp=sTemp+substring(val,iCount,1)
end if;
set iCount=iCount+1
end loop;
return sTemp
end
But the isnumeric() function only tests if conversion to a numeric is possible, not an integer - instead you can use a REGEXP search on the string to look for anything that is not the digits 0-9.
Another cheap test would be to attempt the conversion to a BIGINT if the input string is shorter than 19 characters (assuming a single-byte charset). Attempt the CAST, if it succeeds return val, otherwise process the string as you are.
Another possibility is to not consider the string character-by-character. If non-numeric characters are rare, use a substring search to find the next "chunk" of numbers; this will result in fewer concatentation operations, which will mean fewer memory allocations.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
I tried Bruce's variant with the LIST aggregate and I was surprised that it is actually faster (with my test data):
create function BH_onlyNumber(in val long varchar)
returns long varchar
begin
declare @result long varchar;
if isnumeric( val ) = 1 and patindex('%[^0-9]%',val) = 0 then
return val;
end if;
select list( ch,'' order by row_num ) into @result
from (
select substr(val,row_num,1) as ch,row_num
from sa_rowgenerator(1,length(val))
where ch between '0' and '9') as T;
return @result;
end
I timed the two variants using fetchtst on 11.0.1.2001 using some fabricated strings from systabcol:
drop table if exists TestStrings;
create table TestStrings( s long varchar );
insert into TestStrings
select column_name s from systabcol
union all select column_name||object_id from systabcol
union all select object_id||column_name from systabcol;
commit;
The following file was used by fetchtst -ga:
select max( OnlyNumber( string(s,row_num) ) )
from TestStrings, sa_rowgenerator(1,10)
go
select max( BH_OnlyNumber( string(s,row_num) ) )
from TestStrings, sa_rowgenerator(1,10)
go
Calling the function 63180 times took 39.2s for the onlyNumber() implementation and 3.2s for the BH_onlyNumber() variant. These results may not hold if your data is distributed in a different way or if you are not calling many times from a single query.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
Another approach is to convert the string's characters into a set of row values and use a WHERE clause to eliminate the non-numeric ones, then put the result back together using the LIST aggregate function.
create function bludata.onlyNumber( val long varchar )
returns long varchar
begin
declare @result long varchar;
if isnumeric( val ) = 1 and patindex('%[^0-9]%',val) = 0 then
return val;
end if;
select list( ch,'' order by row_num ) into @result
from (
select substr(val,row_num,1) as ch,row_num
from sa_rowgenerator(1,length(val))
where ch between '0' and '9') as T;
return @result;
end
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
You could try to find the next occurrence of an unwanted character with PATINDEX( '%[^0-9]%', val)
instead of examin each charater one after another.
If nothing unwanted is found in the string you can return immediately.
HTH
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
Similar to Bruce's approach, this solution may prove useful or inspire solutions to other problems:
CREATE OR REPLACE FUNCTION onlyNumber(IN val LONG VARCHAR) RETURNS LONG VARCHAR DETERMINISTIC BEGIN DECLARE ret LONG VARCHAR; SELECT LIST( IF isNumeric(SUBSTRING(val,row_num,1)) = 1 THEN SUBSTRING(val,row_num,1) //preserve digit ELSE '' //remove non-digit END IF ,'' ORDER BY row_num) INTO ret FROM sa_rowgenerator(1,LENGTH(val)); //1 "row" for each character RETURN ret; END;
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
I missed a couple days so didn't see this. I think the patindex is definitely the way to go. Although I don't like the loop at the end.
ALTER FUNCTION onlyNumber(in val long varchar)
RETURNS long varchar
DETERMINISTIC
BEGIN
declare sOut long varchar;
declare iCurPos integer;
declare iNonNum integer;
set sOut = '';
set iCurPos = 1;
set iNonNum = patindex('%[^0-9]%', val);
while iNonNum <> 0 loop
// pick up everything to the left of the non numeric
set sOut = sOut || substr(val, iCurPos, iNonNum - 1);
// check starting at next numeric
set iCurPos = iCurPos + iNonNum + patindex('%[0-9]%', substr(val, iNonNum + 1)) - 1;
set iNonNum = patindex('%[^0-9]%', substr(val, iCurPos));
end loop;
// return our string plus any remaining chars
return sOut || substr(val, iCurPos);
END
This might change based upon whether we expected lots of non numerics and whether non numerics might show up grouped together or not which is what I designed for above. If we're just looking at something like SSN 999-99-9999 then the set iCurPos should just be set iCurPos = iCurPos + iNonNum;
Jon
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
67 | |
8 | |
8 | |
6 | |
6 | |
6 | |
6 | |
6 | |
5 | |
5 |
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.