well fairly new to python i want to store the results of a parsing job
in db.I heard of peewee which is told to be very useful and handy for
such tasks.
i want to use python and peewee, I think i have to do something like the
following:- after insalling peewee correctly i runned the script and now
see what happened.
import urllib
import urlparse
import re
import peewee
import json
db = MySQLDatabase('cpan', user='root',passwd='rimbaud')
class User(Model):
name = TextField()
cname = TextField()
email = TextField()
url = TextField()
class Meta:
database = db # this model uses the cpan database
User.create_table() #ensure table is created
url = "http://search.cpan.org/author/?W"
html = urllib.urlopen(url).read()
for lk, capname, name in re.findall('<a
href="(/~.*?/)"><b>(.*?)</b></a><br/><small>(.*?)</small>', html):
alk = urlparse.urljoin(url, lk)
data = { 'url':alk, 'name':name, 'cname':capname }
phtml = urllib.urlopen(alk).read()
memail = re.search('<a href="mailto:(.*?)">', phtml)
if memail:
data['email'] = memail.group(1)
data = json.load() #your json data file here
for entry in data: #assuming your data is an array of JSON objects
user = User.create(name=entry["name"], cname=entry["cname"],
email=entry["email"], url=entry["url"])
user.save()
i got back this error.
Traceback (most recent call last):
File "cpan5.py", line 10, in <module>
db = MySQLDatabase('cpan', user='root',passwd='rimbaud')
NameError: name 'MySQLDatabase' is not defined
linux-70ce:/home/martin/perl #
assuming this is all right now - i have set up this…
so well - but it fails at a certain point.
import urllib
import urlparse
import re
# import peewee
import json
from peewee import *
#from peewee import MySQLDatabase ('cpan', user='root',passwd='rimbaud')
db = MySQLDatabase('cpan', user='root',passwd='rimbaud')
class User(Model):
name = TextField()
cname = TextField()
email = TextField()
url = TextField()
class Meta:
database = db # this model uses the cpan database
User.create_table() #ensure table is created
url = "http://search.cpan.org/author/?W"
html = urllib.urlopen(url).read()
for lk, capname, name in re.findall('<a
href="(/~.*?/)"><b>(.*?)</b></a><br/><small>(.*?)</small>', html):
alk = urlparse.urljoin(url, lk)
data = { 'url':alk, 'name':name, 'cname':capname }
phtml = urllib.urlopen(alk).read()
memail = re.search('<a href="mailto:(.*?)">', phtml)
if memail:
data['email'] = memail.group(1)
data = json.load('emailyour json data file here
for entry in data: #assuming your data is an array of JSON objects
user = User.create(name=entry["name"], cname=entry["cname"],
email=entry["email"], url=entry["url"])
user.save()
guess that there a data-file must exist: one that have been created by
the script during the parsing… is this right?
martin@linux-70ce:~/perl> python cpan_100.py
Traceback (most recent call last):
File "cpan_100.py", line 47, in <module>
data = json.load('emailyour json data file here
File "/usr/lib/python2.7/json/__init__.py", line 286, in load
return loads(fp.read(),
AttributeError: 'str' object has no attribute 'read'
martin@linux-70ce:~/perl>
well - atm i do not know why i get so much errory.
I would be happy for any and all hints.
love to hear from you