On Oct 2, 6:17 am, Remco Hh [email protected] wrote:
I want to search in a directory for files, matching a certain regular
expression. The script should not return true or false, but should give
me a list (array) of filenames which are found.
Here’s my ‘findfile’ script that I use daily. It lets you use a regexp
for the filename, file content, specify depth of search, whether or
not to show all matches inside a file, and so on.
(You may need to unwrap some of the longer lines after copy/paste.)
See additional notes at the end.
Slim2:/usr/local/bin phrogz$ cat findfile
#!/usr/bin/env ruby
USAGE = <<ENDUSAGE
Usage:
findfile [-d max_depth] [-a] [-c] [-i] name_regexp
[content_regexp]
-d,–depth the maximum depth to recurse to (defaults to no
limit)
-a,–showall with content_regexp, show every match per file
(defaults to only show the first-match per file)
-c,–usecase with content_regexp, use case-sensitive matching
(defaults to case-insensitive)
-i,–includedirs also find directories matching name_regexp
(defaults to files only; incompatible with
content_regexp)
-h,–help show some help examples
ENDUSAGE
EXAMPLES = <<ENDEXAMPLES
Examples:
findfile foo
Print the path to all files with ‘foo’ in the name
findfile -i foo
Print the path to all files and directories with ‘foo’ in the
name
findfile js$
Print the path to all files whose name ends in “js”
findfile js$ vector
Print the path to all files ending in “js” with “Vector” or
“vector”
(or “vEcTOr”, “VECTOR”, etc.) in the contents, and print some of
the
first line that has that content.
findfile js$ -c Vector
Like above, but must match exactly “Vector” (not ‘vector’ or
‘VECTOR’).
findfile . vector -a
Print the path to every file with “Vector” (any case) in it
somewhere
printing every line in those files (with line numbers) with that
content.
findfile -d 0 .
Print the path to every file that is in the current directory.
findfile -d 1 .
Print the path to every file that is in the current directory or
any
of its child directories (but no subdirectories of the children).
ENDEXAMPLES
ARGS = {}
UNFLAGGED_ARGS = [ :name_regexp, :content_regexp ]
next_arg = UNFLAGGED_ARGS.first
ARGV.each{ |arg|
case arg
when ‘-d’,‘–depth’
next_arg = :max_depth
when ‘-a’,‘–showall’
ARGS[:showall] = true
when ‘-c’,‘–usecase’
ARGS[:usecase] = true
when ‘-i’,‘–includedirs’
ARGS[:includedirs] = true
when ‘-h’,‘–help’
ARGS[:help] = true
else
if next_arg
if next_arg==:max_depth
arg = arg.to_i + 1
end
ARGS[next_arg] = arg
UNFLAGGED_ARGS.delete( next_arg )
end
next_arg = UNFLAGGED_ARGS.first
end
}
if ARGS[:help] or !ARGS[:name_regexp]
puts USAGE
puts EXAMPLES if ARGS[:help]
exit
end
class Dir
def self.crawl( path, max_depth=nil, include_directories=false,
depth=0, &block )
return if max_depth && depth > max_depth
begin
if File.directory?( path )
yield( path, depth ) if include_directories
files = Dir.entries( path ).select{ |f| true unless f=~/^.
{1,2}$/ }
unless files.empty?
files.collect!{ |file_path|
Dir.crawl( path+‘/’+file_path, max_depth,
include_directories, depth+1, &block )
}.flatten!
end
return files
else
yield( path, depth )
end
rescue SystemCallError => the_error
warn “ERROR: #{the_error}”
end
end
end
start_time = Time.new
name_match = Regexp.new(ARGS[:name_regexp], true )
content_match = ARGS[:content_regexp] && Regexp.new( “.
{0,20}#{ARGS[:content_regexp]}.{0,20}”, !ARGS[:usecase] )
file_count = 0
matching_count = 0
Dir.crawl( ‘.’, ARGS[:max_depth], ARGS[:includedirs] && !
content_match){ |file_path, depth|
if File.split( file_path )[ 1 ] =~ name_match
if content_match
if ARGS[:showall]
shown_file = false
IO.readlines( file_path ).each_with_index{ |
line_text,line_number|
if match = line_text[content_match]
unless shown_file
puts file_path
matching_count += 1
shown_file = true
end
puts ( “%5d: " % line_number ) + match
end
}
puts " " if shown_file
elsif IO.read( file_path ) =~ content_match
puts file_path,” #{$~}“,” "
matching_count += 1
end
else
puts file_path
matching_count += 1
end
end
file_count += 1
}
elapsed = Time.new - start_time
puts “Found #{matching_count} file#{matching_count==1?‘’:‘s’} (out of
#{file_count}) in #{elapsed} seconds”
You do have to watch for shell escaping of the regexp, either escaping
chars as needed or quoting your regexp:
Slim2:/usr/local/bin phrogz$ findfile \d
./findfile
./index_gem_repository.rb
./p4d
./rdoc
./rdoc-osa
./svnadmin
./svndumpfilter
./update_rubygems
Found 8 files (out of 40) in 0.001228 seconds
Slim2:/usr/local/bin phrogz$ findfile \d
./p4
./p4d
./rot13
./sqlite3
Found 4 files (out of 40) in 0.001088 seconds
Slim2:/usr/local/bin phrogz$ findfile \d$
./p4
./rot13
./sqlite3
Found 3 files (out of 40) in 0.001118 seconds
Slim2:/usr/local/bin phrogz$ findfile “\d$”
./p4
./rot13
./sqlite3
Found 3 files (out of 40) in 0.001298 seconds