Here’s a simple fuzzy-search algorithm you can plug in to whatever you

decide to use.

Most of this code was stolen from php: http://us2.php.net/similar_text

(in comments section).

O(m*n), where m and n are lengths of the two target strings.

You may want to rewrite it to make it look more Rubyish.

# this is the main algorithm.

# compute longest common subsequence btn strings s1 and s2

def lcs_length(s1, s2)

m = [s1.length,128].min

n = [s2.length,128].min

x = 1 + [m,n].max

# this table will be used to compute the LCS-Length,

# only the first 128 chars per string are considered

lcs_tbl = []

# initialize 2-dimensional array to 0

(0…x).each { || lcs_tbl << Array.new(x,0) }

(1…m).each { |i|

(1…n).each { |j|

if (s1[i-1] == s2[j-1])

lcs_tbl[i][j] = lcs_tbl[i-1][j-1] + 1

elsif (lcs_tbl[i-1][j] >= lcs_tbl[i][j-1])

lcs_tbl[i][j] = lcs_tbl[i-1][j]

else

lcs_tbl[i][j] = lcs_tbl[i][j-1]

end

}

}

return lcs_tbl[m][n]

end

def get_lcs(s1, s2, verbose=false)

# ok, now replace all spaces and weird characters with ‘normal’

ascii-7

s1 = normalize_string(s1);

s2 = normalize_string(s2);

# check for pathological cases

return 0 if [s1.length, s2.length].max < 1

puts “comparing strings #{s1} and #{s2}” if verbose

lcs = lcs_length(s1,s2) # compute longest common subsequence

# average string length

ms = (s1.length + s2.length) / 2.0

(puts “ms: #{ms} lcs: #{lcs}”;puts) if verbose

return (lcs*100)/ms

end

# code stolen from one of the ruby MLs.

def normalize_string(foo)

foo = foo.downcase.strip

foo.gsub!(/[Ã?ÃÃ?Ã?Ã¢Ã¤Ã Ã£Ã¡Ã¤Ã¥ÄÄ?Ä?Ç?Ç?Ç¡Ç»ÈÈ?È§áºµáº·]/,‘a’)

foo.gsub!(/[Ã«ÃªÃ©Ã¨áº½Ä?Ä?Ä?áº»È?È?áº¹È©Ä?á¸?á¸?á»áº¿á»?á»?á¸?á¸?á»?á¸]/,‘e’)

foo.gsub!(/[Ã?ÃÃ?Ä¨ÃiÃ¬ÃÃ®Ä©Ä«ÄÃ¯á»?Çá»?Ä¯È?È?á¸É¨á¸¯]/,‘i’)

foo.gsub!(/[Ã?Ã?Ã?Ã?Ã?Ã²Ã³Ã´ÃµÅÅÈ¯Ã¶á»Å?Ç?ÈÈÆ¡Ç«á»ÉµÃ¸á»?á»?á»?á»?È±È«Èá¹á¹á¹?á¹?á»á»?á»¡á»?á»£Çá»?Ç¿]/,‘o’)

foo.gsub!(/[Ã?Ã?Ã?Å¨Ã?Ã¹ÃºÃ»Å©Å«ÅÃ¼á»§Å¯Å±Ç?È?È?Æ°á»¥á¹³Å³á¹·á¹µá¹¹á¹»Ç?Ç?Ç?Ç?Ç?á»«á»©á»¯á»á»±]/,‘u’)

foo.gsub!(/[á»³Ã½Å·á»¹È³áºÃ¿á»·áº?Æ´á»µ]/,‘y’)

foo.gsub!(/[Å?]/,‘oe’)

foo.gsub!(/[Ã?Ç¼Ç¢Ã¦]/,‘ae’)

foo.gsub!(/[Ã±Ç¹Å?]/,‘n’)

foo.gsub!(/[Ã?Ã§]/,‘c’)

foo.gsub!(/[Ã?]/,‘b’)

foo.gsub!(/[Å?]/,‘oe’)

foo.gsub!(/[Ä³]/,‘ij’)

foo.gsub!(/[\s’"\/?.=+&%]/,’*’)*

foo.gsub!(/+/,’_’)

foo

end

# a little test

[

[“a”,“a”],[“ab”,“ab”],[“abc”,“abc”],

[“a”,“b”],[“ab”,“bb”],[“abc”,“abb”],

[“a”,“1”],[“ab”,"_b"],[“abcd”,“a cd”],

[“abcd”,“abcde”],[“abcd”,“abcdefghi”],[“abcde”,“abcdf”],

[“abcd”,“zbcd”],[“abcd”,“efghiabcd”],[“abcde”,“fabcd”],

[“abc”,“acb”],[“abcde”,“abzde”],[“abcde”,“abde”],

[“abc”,“ac”],[“abcde”,“ab1de”],[“abcde”,“ab de”]

].each { |e|

puts get_lcs(e[0],e[1],true)

puts

}