Here is the test. I’m kind of a newbie at writing tests, so it could
just be a newbie mistake. (FYI, “bawal” means “prohibited” in Tagalog
c.f. bawal.html below):
describe "Define a blacklist function that inputs a url and returns
true or false. If true, the page is on the blacklist and shouldn’t be
crawled: " do
before do
# Create the fake webpages
@html_start = File.read( RAILS_ROOT +
“/spec/fixtures/feeds/start_page.html” )
@html_okay = File.read( RAILS_ROOT +
“/spec/fixtures/feeds/okay.html” )
@html_bawal = File.read( RAILS_ROOT +
“/spec/fixtures/feeds/bawal.html” )
# Create the fake HTTP response objects for each webpage.
@response_start = mock_model( Net::HTTP, :body => @html_start,
:nil? => false )
@response_okay = mock_model( Net::HTTP, :body => @html_okay,
:nil? => false )
@response_bawal = mock_model( Net::HTTP, :body => @html_bawal,
:nil? => false )
# Create fake URL strings as names of the webpages.
@url_start = "http://sfbay.craigslist.org/start_page.html"
@url_okay = "http://sfbay.craigslist.org/okay.html"
@url_bawal = "http://sfbay.craigslist.org/bawal.html"
# Create the Ruby URI versions of the string URLs
@uri_start = URI.parse(@url_start)
@uri_okay = URI.parse(@url_okay)
@uri_bawal = URI.parse(@url_bawal)
# Create fake Hawlee instances for each page.
@hawlee_start = Hawlee.new(@uri_start, nil, 0)
@hawlee_okay = Hawlee.new(@uri_okay, nil, 0)
@hawlee_bawal = Hawlee.new(@uri_bawal, nil, 0)
@hawlee_start = mock_model( Hawlee, :uri => @uri_start, :referer
=> nil)
@hawlee_okay = mock_model( Hawlee, :uri => @uri_okay, :referer
=> @hawlee_start)
@hawlee_bawal = mock_model( Hawlee, :uri => @uri_bawal,
:referer => @hawlee_start)
# Create the fake Htgrep instance
@htgrep = Htgrep.new(@url_start.to_s)
# Create the Hawler instance that will be used by @htgrep
@hawler = Hawler.new(@url_start, @htgrep.method(:process_page),
@htgrep.method(:blacklist?))
@hawler.stub!(:uri).and_return(@url_start)
@hawler.stub!(:get_it).and_return(@response_start)
@hawler.stub!(:get_it).with(anything(),
@hawlee_start).and_return(@response_start)
@hawler.stub!(:get_it).with(anything(),
@hawlee_okay).and_return(@response_okay)
@hawler.stub!(:get_it).with(anything(),
@hawlee_bawal).and_return(@response_bawal)
# Assign the partial Hawler mock to be contained by the Htgrep
instance
@htgrep.hawler = @hawler
# Make sure that they pages are always considered to be onsite.
HawlerHelper.stub!(:offsite?).and_return(false)
# Make Hawler.new return the hawler stub that we created.
# If we don't do this, a new Hawler will be created in
Htgrep#start for every site in site_list.
Hawler.stub!(:new).and_return(@hawler)
# Skip peeking at it with HEAD.
@hawler.stub!(:peek_at_it).and_return(true)
# Make sure that the HARVEST step is not skipped.
Net::HTTPSuccess.stub!("===".to_sym).and_return(true)
end
it "Hawler's get_it method should be called with okay.html at
least once." do
@htgrep.hawler.should_receive(:get_it).with(anything(),
@hawlee_start).and_return(@response_start)
@htgrep.start
end
end
Spec::Mocks::MockExpectationError in ‘Htgrep Define a blacklist
function that inputs a url and returns true or false. If true, the
page is on the blacklist and shouldn’t be crawled: Hawler’s get_it
method should be called with okay.html at least once.’
Mock ‘Hawler’ expected :get_it with
(#Spec::Mocks::AnyArgConstraint:0x2ef093c, #<Hawlee:0x2f02088
@harvest=false, @depth=0, @uri=#<URI::HTTP:0x1781436
URL:http://sfbay.craigslist.org/start_page.html>, @get=false,
@analyze=false, @head=false, @referer=nil>) but received it with
(#<URI::HTTP:0x1777bfc
URL:http://sfbay.craigslist.org/start_page.html>, #<Hawlee:0x2eef488
@harvest=false, @depth=0, @uri=#<URI::HTTP:0x1777bfc
URL:http://sfbay.craigslist.org/start_page.html>, @get=false,
@analyze=false, @head=false, @referer=nil>)
/Users/dbit/Sites/scraper/vendor/plugins/hawler/lib/hawler.rb:84:in
hawl' /Users/dbit/Sites/scraper/vendor/plugins/hawler/lib/hawler.rb:129:in
start’
/Users/dbit/Sites/scraper/app/models/htgrep.rb:47:in start' /Users/dbit/Sites/scraper/app/models/htgrep.rb:43:in
each’
/Users/dbit/Sites/scraper/app/models/htgrep.rb:43:in `start’
./spec/models/htgrep_spec.rb:197:
./script/spec:4: