Hi,
I created and mechanize action and fetched raw html to folder and
then parsed using nokogiri to store the results in database below are
the action given how i did it tell me how to use/connect the database to
my rails app and displays the requred result for i want to enter the
page search something submit it and display records. below are the
actions:
parsing action:
require ‘rubygems’
require ‘nokogiri’
require ‘sqlite3’
FIELD_NAMES = [[‘selectcity’, ‘VARCHAR’],[‘match’, ‘VARCHAR’],
[‘phone_no’, ‘NUMERIC’], [‘name’, ‘VARCHAR’],[‘address’, ‘VARCHAR’] ]
TABLE_DIV_ID = “#dgrSearch”
OFILE = File.open(‘data-hold/tel-directory.txt’, ‘w’)
OFILE.puts( FIELD_NAMES.map{|f| f[0]}.join(“\t”) )
DBNAME = “data-hold/tel-directory.sqlite”
File.delete(DBNAME) if File.exists?DBNAME
DB = SQLite3::Database.new( DBNAME )
TABLE_NAME = “telephone_records”
DB_INSERT_STATEMENT = “INSERT into #{TABLE_NAME} values
(#{FIELD_NAMES.map{‘?’}.join(‘,’)})”
DB.execute “CREATE TABLE #{TABLE_NAME}(#{FIELD_NAMES.map{|f| “#{f[0]}
#{f[1]}”}.join(', ')});”
FIELD_NAMES.each do |fn|
DB.execute “CREATE INDEX #{fn[2]} ON #{TABLE_NAME}(#{fn[0]})” unless
fn[2].nil?
end
Dir.glob(“data-hold/pages/*.html”).each do |fname|
meta_info = File.basename(fname, ‘.html’).split(‘–’)
page = Nokogiri::HTML(open(fname))
page.css(“#{TABLE_DIV_ID} tr”).each do |tr|
data_tds = tr.css(‘td’).map{ |td|
td.text.gsub(/$,/, ‘’).gsub(/\302\240|\s/, ‘’).strip
}
data_row = meta_info + data_tds
OFILE.puts( data_row.join(“\t”))
DB.execute(DB_INSERT_STATEMENT, data_row)
end
end
OFILE.close
mechanize action:
require ‘rubygems’
require ‘mechanize’
require ‘fileutils’
DIR = ‘data-hold/pages’
FileUtils.makedirs(DIR)
class GoogleController < ApplicationController
def index
home_url =
“http://www.chhattisgarh.bsnl.co.in/(S(jinhnz45memp4b2mtgdgk3ab))/directory_services/AreaWiseSearch.aspx?Area=04”
select_field_names = {
‘selectcity’=>‘DropDownList2’,
‘match’=>‘drpMatch’
}
def form_submit_w_exception_handling(frm)
retries = 3
begin
frm.submit(frm.button_with(:value=>‘Search’))
rescue Exception=>e
puts “Problem: #{e}”
if retries < 0
retries -= 1
puts “Sleeping…#{retries} left”
retry
else
raise “Unexpected, repeated errors. Shutting down”
end
else
return frm
end
end
agent = Mechanize.new
agent.get(home_url)
form = agent.page.form_with(:action=>/AreaWiseSearch.aspx/)
form.field_with(:name=>select_field_names[‘selectcity’]).options[1…-1].each
do |selc_opt|
form[select_field_names[‘selectcity’]] = selc_opt.value
#form.submit(form.button_with(:value=>‘Search’))
form = form_submit_w_exception_handling(form)
puts “selectcity #{selc_opt.value}:
#{agent.page.parser.css(‘tr’).length}”
form.field_with(:name=>select_field_names[‘match’]).options[1…-1].each
do |mat_opt|
form[select_field_names[‘match’]] = mat_opt.value
#form.submit(form.button_with(:value=>‘Search’))
form = form_submit_w_exception_handling(form)
puts “match #{mat_opt.value}: #{agent.page.parser.css(‘tr’).length}”
fname = “#{DIR}/#{selc_opt.value}–#{mat_opt.value}.html”
File.open(fname, ‘w’){|f| f.puts agent.page.parser.to_html}
end
end
end
end