Scrapy 实现MySQLdb Pipline存储数据
这里使用的是MySQLdb-Python,用着还行,嘿嘿
代码看这里的
from scrapy.contrib.exporter import XmlItemExporter,JsonItemExporter,JsonLinesItemExporter,CsvItemExporter from scrapy import signals,log from scrapy.exceptions import DropItem import datetime,pymongo import MySQLdb class MysqlDBPipline(object): def __init__(self,mysql_host,mysql_db,mysql_user,mysql_passwd): self.mysql_host = mysql_host self.mysql_db = mysql_db self.mysql_user = mysql_user self.mysql_passwd = mysql_passwd @classmethod def from_crawler(cls,crawler): return cls( mysql_host=crawler.settings.get('MYSQL_HOST'), mysql_user=crawler.settings.get('MYSQL_USER'), mysql_passwd=crawler.settings.get('MYSQL_PASSWD'), mysql_db=crawler.settings.get('MYSQL_DB') ) def open_spider(self,spider): try: self.conn = MySQLdb.connect( user=self.mysql_user, passwd=self.mysql_passwd, db=self.mysql_db, host=self.mysql_host, charset="utf8", use_unicode=True ) self.cursor = self.conn.cursor() except MySQLdb.Error,e: print "Mysql Error %d: %s" % (e.args[0], e.args[1]) def close_spider(self,spider): self.cursor.close() self.conn.close() def process_item(self,item,spider): valid = True for data in item: if not data: valid = False raise DropItem("Missing {0}!".format(data)) if valid: for n in range(len(item['title'])): try: value = [item['title'][n],item['url'][n],item['create_time']] self.cursor.execute('INSERT INTO itunes (title,url,create_time) VALUES (%s,%s,%s)',value) self.conn.commit() except MySQLdb.Error,e: print "Mysql Error %d: %s" % (e.args[0], e.args[1]) log.msg("Question added to MySQLdb database!", level=log.DEBUG, spider=spider) return item
配置文件加入
ITEM_PIPELINES = { 'apple.pipelines.MysqlDBPipline':100 }
版权声明
由 durban创作并维护的 Gowhich博客采用创作共用保留署名-非商业-禁止演绎4.0国际许可证。
本文首发于 博客( https://www.gowhich.com ),版权所有,侵权必究。
本文永久链接: https://www.gowhich.com/blog/625
版权声明
由 durban创作并维护的 Gowhich博客采用创作共用保留署名-非商业-禁止演绎4.0国际许可证。
本文首发于 Gowhich博客( https://www.gowhich.com ),版权所有,侵权必究。
本文永久链接: https://www.gowhich.com/blog/625