Improving Code

Question

→ How would you improve this code

  • The Error Handling sucks
  • Bad Usage of Net::HTTP
  • Bad Usage of Classes and Hierachy
  • etc

Thanks in advance.

require ‘rubygems’
require ‘hpricot’

require ‘net/http’
class ProxyList < Net::HTTP

URL = 'www.samair.ru'
FILE = File.dirname(__FILE__) + '/proxylist.dat'
IPADDR_AT = 4

@port_values = {}
@proxylist = []

def self.random_useragent

USER_AGENT[rand(USER_AGENT.size - 1)]
end

def self.strip_html(str)

str.gsub(/</?[^>]*>/, ‘’)
end

def self.retreive_proxylist(n = 10)

begin
puts “Retreiving proxylist from server: #{URL}”
http = self.new(URL, self.default_port)
begin
@port_values.clear if @port_values.length > 0
file = File.new(FILE, ‘w’)
1.upto(n) do |i|
page = “/proxy/proxy-%02d.htm” % i
puts “Retreiving #{URL}#{page} from #{URL}”

    response = http.get2(page)
    document = Hpricot(response.body)
    # Hidden Keys?
    script =

document.search(‘//head/script[@type=“text/javascript”]’)
script.each do |element|
str = element.inner_html
# /([_a-zA-Z][_a-zA-Z])(?:\s=\s*([^$]+))?;/
str.gsub(/([^;]+)/) {
|match|
var, val = *(match.split(/=/)[0, 2])
@port_values[var.to_sym] = val
}
end
# Proxies
td = document.search(‘//table.tablelist/tr/td’)
td.each_with_index do |element, index|
# Hijos de la riviera maya
if index % IPADDR_AT == 0 then
# wtf!!!
s = element.inner_html
ip, port = *(s.split(/<script/, 2)[0, 2])

      next if port == nil

      port = port.scan(/(?:\+[a-z])+/).to_s
      port.gsub!('+', '')
      ip_port = ""
      port.each_byte {
    |c|
    begin
        ip_port << @port_values[c.chr.to_sym]
    rescue TypeError => e
        # do nothing
    end
      }
      @proxylist << "#{ip}:#{ip_port}"
  end
    end
end
@proxylist.each do |proxy|
    file.write(proxy + "\r\n")
end
  rescue Exception => e
puts "[file=#{e.inspect}]"
  ensure
file.close
  end

rescue Exception => e
puts “[http=#{e.inspect}]”
end
end

end

Oops, sorry: http://paste-it.net/7646