To answer my own question…
This is a hack to get unicode to work, and relies on the unicode gem.
Also, this, as opposed to my previous code listing, should work out of
the box… except that the constant INDEX_PATH must be set before,
preferable in environment.rb
CODE for acts_as_ferret.rb
require ‘active_record’
require ‘ferret’
require ‘unicode’
class UnicodeLowerCaseFilter < Ferret::Analysis::TokenFilter
def next()
t = @input.next()
if (t == nil)
return nil
end
t.term_text = Unicode::downcase(t.term_text)
return t
end
end
class SwedishTokenizer < Ferret::Analysis::RegExpTokenizer
P = /[_\/.,-]/
HASDIGIT = /\w*\d\w*/
def token_re()
%r([[:alpha:]���åöä]+(('[[:alpha:]���åöä]+)+
|\.([[:alpha:]���åöä]\.)+
|(@|\&)\w+([-.]\w+)*
)
|\w+(([\-._]\w+)*\@\w+([-.]\w+)+
|#{P}#{HASDIGIT}(#{P}\w+#{P}#{HASDIGIT})*(#{P}\w+)?
|(\.\w+)+
|
)
)x
end
end
class SwedishAnalyzer < Ferret::Analysis::Analyzer
def token_stream(field, string)
return UnicodeLowerCaseFilter.new(SwedishTokenizer.new(string))
end
end
module FerretMixin
module Acts #:nodoc:
module ARFerret #:nodoc:
def self.append_features(base)
super
base.extend(MacroMethods)
end
# declare the class level helper methods
# which will load the relevant instance methods defined below
when invoked
module MacroMethods
def acts_as_ferret
extend FerretMixin::Acts::ARFerret::ClassMethods
class_eval do
include FerretMixin::Acts::ARFerret::ClassMethods
after_create :ferret_create
after_update :ferret_update
after_destroy :ferret_destroy
end
end
end
module ClassMethods
include Ferret
def self.reloadable?; false end
# Finds instances by file contents.
def find_by_ferret(query, options = {})
index_searcher ||= Search::IndexSearcher.new(INDEX_PATH)
query_parser ||=
QueryParser.new(index_searcher.reader.get_field_names.to_a, {:analyzer
=> SwedishAnalyzer.new()})
query = query_parser.parse(query)
result = []
conditions = {}
conditions[:num_docs] = options[:limit] unless
options[:limit].blank?
conditions[:first_doc] = options[:offset] unless
options[:offset].blank?
hits = index_searcher.search(query, conditions)
hits.each do |hit, score|
id = index_searcher.reader.get_document(hit)['id']
result << self.find(id) unless id.nil?
end
return result
end
def count_by_ferret(query)
index_searcher ||=
Search::IndexSearcher.new(INDEX_PATH)
query_parser ||=
QueryParser.new(index_searcher.reader.get_field_names.to_a, {:analyzer
=> SwedishAnalyzer.new()})
query = query_parser.parse(query)
return index_searcher.search(query).total_hits
end
# private
def ferret_create
# code to update or add to the index
index ||= Index::Index.new(:key => [:id, :ferret_table],
:path => INDEX_PATH,
:auto_flush => true,
:analyzer =>
SwedishAnalyzer.new())
index << self.to_doc
end
def ferret_update
index ||= Index::Index.new( :key => [:id,
:ferret_table],
:path => INDEX_PATH,
:auto_flush => true,
:analyzer =>
SwedishAnalyzer.new())
index.query_delete("+id:#{self.id.to_s}
+ferret_table:#{self.class.table_name}")
index << self.to_doc
end
def ferret_destroy
# code to delete from index
index ||= Index::Index.new(:key => [:id, :ferret_table],
:path => INDEX_PATH,
:auto_flush => true,
:analyzer =>
SwedishAnalyzer.new())
index.query_delete("+id:#{self.id.to_s}
+ferret_table:#{self.class.table_name}")
end
def to_doc
# Churn through the complete Active Record and add it to
the Ferret document
doc = Ferret::Document::Document.new
doc << Ferret::Document::Field.new(‘ferret_table’,
self.class.table_name, Ferret::Document::Field::Store::YES,
Ferret::Document::Field::Index::UNTOKENIZED)
self.attributes.each_pair do |key,val|
if key == ‘id’
doc << Ferret::Document::Field.new(“id”, val.to_s,
Ferret::Document::Field::Store::YES,
Ferret::Document::Field::Index::UNTOKENIZED)
else
doc << Ferret::Document::Field.new(key, val.to_s,
Ferret::Document::Field::Store::NO,
Ferret::Document::Field::Index::TOKENIZED)
end
end
return doc
end
end
end
end
end
reopen ActiveRecord and include all the above to make
them available to all our models if they want it
ActiveRecord::Base.class_eval do
include FerretMixin::Acts::ARFerret
end
END acts_as_ferret.rb
And the rake task:
include FileUtils
desc “Perform ferret index”
task :indexer => :environment do
if !File.exist?(INDEX_PATH)
puts “Creating index dir in #{INDEX_PATH}”
FileUtils.mkdir_p(INDEX_PATH)
end
classes = []
Dir.glob(File.join(RAILS_ROOT,"app","models","*.rb")).each do
|rbfile|
bname = File.basename(rbfile,’.rb’)
classname = Inflector.camelize(bname)
classes.push(classname)
end
classes.each do |class_obj|
c = eval(class_obj)
if c.respond_to?(:ferret_create)
puts “REBUILDING #{c.name}”
c.find_all.each{|cn|cn.save}
end
end
end