module Ferret::Analysis
class TokenFilter < TokenStream
# Construct a token stream filtering the given input.
def initialize(input) @input = input
end
end
replace accentuated chars with ASCII one
class ToASCIIFilter < TokenFilter
def next()
token = @input.next()
unless token.nil?
token.text = token.text.downcase.tr(ACCENTUATED_CHARS,
REPLACEMENT_CHARS)
end
token
end
end
class EuropeanAnalyzer
def token_stream(field, string)
return ToASCIIFilter.new(StandardTokenizer.new(string))
end
end
end
analyzer = Ferret::Analysis::EuropeanAnalyzer.new
ts = analyzer.token_stream(‘xxx’, "Let’s see what " +
“happens to
ÅÄÀAÂåäàâaÖÔôöÉÈÊËéèêëÜüùç”)while t = ts.next
puts t
end