want to store the data - instead of printing out
there fore i set up a mysqldb on the suse
import urllib
import urlparse
import re
url = “http://search.cpan.org/author/?W”
html = urllib.urlopen(url).read()
for lk, capname, name in re.findall(’(.?)</
a>
(.?)’, html):
alk = urlparse.urljoin(url, lk)
data = { 'url':alk, 'name':name, 'cname':capname }
phtml = urllib.urlopen(alk).read()
memail = re.search('<a href="mailto:(.*?)">', phtml)
if memail:
data['email'] = memail.group(1)
print data