Thursday, 7 April 2011

Levenshtein Distance in SQL

For a description of Levenshtein distance see wikipedia article

CREATE Function lev_distance ( @source VARCHAR(100) ,@target VARCHAR(100) ) RETURNS int AS BEGIN DECLARE @distance varchar(30) ,@len_source int ,@len_target int ,@counter int DECLARE @matrix TABLE(x int, y int, value int) SET @len_source = (SELECT len(@source)) SET @len_target = (SELECT len(@target)) -- initialize the first row 0..len_source SET @counter = 0 WHILE @counter <= @len_source + 1 BEGIN INSERT INTO @matrix VALUES (@counter, 0, @counter) SET @counter = @counter + 1 END -- initialize the first column 0..len_target SET @counter = 1 WHILE @counter <= @len_target + 1 BEGIN INSERT INTO @matrix VALUES (0, @counter, @counter) SET @counter = @counter + 1 END -- evaluate character distance DECLARE @cost int ,@up int ,@left int ,@diag int ,@source_index int ,@target_index int SET @source_index = 1 SET @target_index = 1 WHILE @source_index <= @len_source + 1 BEGIN WHILE @target_index <= @len_target + 1 BEGIN SET @cost = (CASE WHEN SUBSTRING(@source, @source_index, 1) = SUBSTRING(@target, @target_index, 1) THEN 0 ELSE 1 END) SET @up = (SELECT [value] FROM @matrix WHERE [x] = @source_index AND [y] = @target_index - 1) + 1 SET @left = (SELECT [value] FROM @matrix WHERE [x] = @source_index - 1 AND [y] = @target_index) + 1 SET @diag = (SELECT [value] FROM @matrix WHERE [x] = @source_index - 1 AND [y] = @target_index - 1) + @cost SET @cost = (CASE WHEN (@up <= @left) AND (@up <= @diag) THEN @up WHEN (@left <= @up) AND (@left <= @diag) THEN @left WHEN (@diag <= @up) AND (@diag <= @left) THEN @diag END) INSERT INTO @matrix VALUES (@source_index, @target_index, @cost) SET @target_index = @target_index + 1 END SET @target_index = 1 SET @source_index = @source_index + 1 END -- get result SELECT @distance = value FROM @matrix WHERE x = len(@source) AND y = len(@target) RETURN @distance END GO

No comments:

Post a Comment