On Aug 8, 6:49 am, Damjan R. [email protected] wrote:
I would baisicly like to have somekind of fulltext search.
In case it helps, here is my own ruby script for searching for files
with names and/or contents matching a particular regex:
Slim2:~ phrogz$ cat /usr/local/bin/findfile
#!/usr/bin/env ruby
USAGE = <<ENDUSAGE
Usage:
findfile [-d max_depth] [-a] [-c] [-i] name_regexp
[content_regexp]
-d,–depth the maximum depth to recurse to (defaults to no
limit)
-a,–showall with content_regexp, show every match per file
(defaults to only show the first-match per file)
-c,–usecase with content_regexp, use case-sensitive matching
(defaults to case-insensitive)
-i,–includedirs also find directories matching name_regexp
(defaults to files only; not with content_regexp)
-h,–help show some help examples
ENDUSAGE
EXAMPLES = <<ENDEXAMPLES
Examples:
findfile foo
Print the path to all files with ‘foo’ in the name
findfile -i foo
Print the path to all files and directories with ‘foo’ in the name
findfile js$
Print the path to all files whose name ends in “js”
findfile js$ vector
Print the path to all files ending in “js” with “Vector” or
“vector”
(or “vEcTOr”, “VECTOR”, etc.) in the contents, and print some of
the
first line that has that content.
findfile js$ -c Vector
Like above, but must match exactly “Vector”
(not ‘vector’ or ‘VECTOR’).
findfile . vector -a
Print the path to every file with “Vector” (any case) in it
somewhere
printing every line (with line numbers) with that content.
findfile -d 0 .
Print the path to every file that is in the current directory.
findfile -d 1 .
Print the path to every file that is in the current directory or
any
of its child directories (but no subdirectories of the children).
ENDEXAMPLES
ARGS = {}
UNFLAGGED_ARGS = [ :name_regexp, :content_regexp ]
next_arg = UNFLAGGED_ARGS.first
ARGV.each{ |arg|
case arg
when ‘-d’,‘–depth’
next_arg = :max_depth
when ‘-a’,‘–showall’
ARGS[:showall] = true
when ‘-c’,‘–usecase’
ARGS[:usecase] = true
when ‘-i’,‘–includedirs’
ARGS[:includedirs] = true
when ‘-h’,‘–help’
ARGS[:help] = true
else
if next_arg
if next_arg==:max_depth
arg = arg.to_i + 1
end
ARGS[next_arg] = arg
UNFLAGGED_ARGS.delete( next_arg )
end
next_arg = UNFLAGGED_ARGS.first
end
}
if ARGS[:help] or !ARGS[:name_regexp]
puts USAGE
puts EXAMPLES if ARGS[:help]
exit
end
class Dir
def
self.crawl(path,max_depth=nil,include_directories=false,depth=0,&blk)
return if max_depth && depth > max_depth
begin
if File.directory?( path )
yield( path, depth ) if include_directories
files = Dir.entries( path ).select{ |f| true unless f=~/^.
{1,2}$/ }
unless files.empty?
files.collect!{ |file_path|
Dir.crawl( path+‘/’+file_path, max_depth,
include_directories, depth+1, &blk )
}.flatten!
end
return files
else
yield( path, depth )
end
rescue SystemCallError => the_error
warn “ERROR: #{the_error}”
end
end
end
start_time = Time.new
name_match = Regexp.new(ARGS[:name_regexp], true )
content_match = ARGS[:content_regexp] && Regexp.new( “.
{0,20}#{ARGS[:content_regexp]}.{0,20}”, !ARGS[:usecase] )
file_count = 0
matching_count = 0
Dir.crawl(
‘.’,
ARGS[:max_depth],
ARGS[:includedirs] && !content_match
){ |file_path, depth|
if File.split( file_path )[ 1 ] =~ name_match
if content_match
if ARGS[:showall]
shown_file = false
IO.readlines( file_path ).each_with_index{ |
line_text,line_number|
if match = line_text[content_match]
unless shown_file
puts file_path
matching_count += 1
shown_file = true
end
puts ( “%5d: " % (line_number+1) ) + match
end
}
puts " " if shown_file
elsif IO.read( file_path ) =~ content_match
puts file_path,” #{$~}“,” "
matching_count += 1
end
else
puts file_path
matching_count += 1
end
end
file_count += 1
}
elapsed = Time.new - start_time
puts “Found %d file%s (out of %d) in %.2f seconds” % [
matching_count,
matching_count==1 ? ‘’ : ‘s’,
file_count,
elapsed
]