• items.py

from scrapy import Item,Field

class MoocspiderItem(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    Url = Field()
    Title = Field()
    Image_Url = Field()
    Student = Field()
    Introduction = Field()

  • settings.py

BOT_NAME = 'MoocSpider'

SPIDER_MODULES = ['MoocSpider.spiders']
NEWSPIDER_MODULE = 'MoocSpider.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'MoocSpider (+http://www.yourdomain.com)'

# Obey robots.txt rules

MYSQL_DB_NAME = 'python_data'

  • pipelines.py

# -*- coding: utf-8 -*-

# Define your item pipelines here
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import json
from twisted.enterprise import adbapi
import MySQLdb

class MysqlPipeline(object):
    #def __init__(self):
        #self.file = open('MoocSpider1.json','w',encoding='utf-8')

    def open_spider(self,spider):
        db = spider.settings.get('MYSQL_DB_NAME','python_data')
        host = spider.settings.get('MYSQL_HOST','localhost')
        port = 3306
        user = spider.settings.get('MYSQL_DB_USER','root')
        passwd = spider.settings.get('MYSQL_DB_PASSWORD','root')

        self.dbpool = adbapi.ConnectionPool('MySQLdb',host=host,db=db,user=user,passwd=passwd,charset='utf8')

    def close_spider(self,spider):

    def process_item(self, item, spider):
        #line = json.dumps(dict(item), ensure_ascii=False) + "\n"
        #return item

    def insert_db(self,tx,item):
        values = (

        sql = 'INSERT INTO books VALUES (%s,%s,%s,%s,%s)'

class MysqlPipeline1(object):
    def open_spider(self,spider):
        db = spider.settings.get('MYSQL_DB_NAME','python_data')
        host = spider.settings.get('MYSQL_HOST','localhost')
        port = 3306
        user = spider.settings.get('MYSQL_DB_USER','root')
        passwd = spider.settings.get('MYSQL_DB_PASSWORD','root')

        self.db_conn = MySQLdb.connect(host=host,port=port,db=db,user=user,passwd=passwd,charset='utf8')
        self.db_cur = self.db_conn.cursor()

    def process_item(self, item, spider):
        #line = json.dumps(dict(item), ensure_ascii=False) + "\n"
        #return item
        except Exception as error:
        return item

    def insert_db(self,item):
        values = (

        sql = 'INSERT INTO books VALUES (%s,%s,%s,%s,%s)'

  • MoocSpider.py

import scrapy
from MoocSpider.items import MoocspiderItem
from scrapy.selector import Selector

class MoocSpider(scrapy.Spider):
    name = 'MoocSpider'
    allowed_domains = ['imooc.com']
    start_urls = ['http://www.imooc.com/course/list']

    def parse(self, response):
        html = Selector(response)
        item = MoocspiderItem()
        base_url = 'http://www.imooc.com'

        content = html.xpath('//div[@class="course-card-container"]')

        for each in content:
            item = MoocspiderItem()
            item['Url'] = base_url + each.xpath('.//a/@href').extract_first()
            item['Title'] = each.xpath('.//a/div/h3/text()').extract_first()
            item['Image_Url'] = each.xpath('.//a/div/img/@src').extract_first()
            item['Student'] = each.xpath('.//a/div/div/div/span/text()').extract()[1]
            item['Introduction'] = each.xpath('.//a/div/div/p/text()').extract_first()
            yield item

        #url = response.xpath('//a[contains(text(),"下一页")]/@href').extract()
        #if url:
            #page = 'http://www.imooc.com' + url[0]
            #yield scrapy.Request(page,callback = self.parse)



  1. 腾讯云教程说道:

    朋友 交换链接吗

    1. walks说道:


  2. adidas nmd r1说道:

    I definitely wanted to jot down a brief message to express gratitude to you for those remarkable ideas you are posting here. My long internet look up has at the end of the day been rewarded with good strategies to exchange with my friends and family. I 'd claim that many of us site visitors are extremely fortunate to dwell in a perfect website with so many perfect individuals with helpful strategies. I feel quite fortunate to have encountered the web page and look forward to tons of more awesome moments reading here. Thanks again for everything.

  3. I simply wanted to post a small comment to appreciate you for these splendid tactics you are writing on this site. My incredibly long internet search has at the end of the day been compensated with awesome concept to talk about with my family. I would mention that many of us readers actually are unquestionably fortunate to exist in a good place with so many marvellous individuals with useful points. I feel really fortunate to have discovered the web pages and look forward to so many more cool minutes reading here. Thanks once again for all the details.

  4. I and also my pals were following the best tips located on your web page and so then came up with an awful suspicion I never expressed respect to you for those techniques. The young men became excited to study all of them and now have in reality been taking pleasure in them. Appreciate your actually being well accommodating and then for opting for some terrific tips millions of individuals are really needing to discover. My personal sincere apologies for not expressing appreciation to earlier.

  5. ultra boost说道:

    I want to get across my passion for your kindness giving support to those people who should have assistance with the field. Your personal dedication to getting the message across was particularly powerful and have without exception encouraged somebody like me to achieve their goals. Your personal important advice indicates much a person like me and a whole lot more to my office workers. With thanks; from all of us.

  6. yeezy boost说道:

    I definitely wanted to write down a comment to say thanks to you for those awesome facts you are writing on this site. My incredibly long internet lookup has now been compensated with excellent concept to talk about with my neighbours. I would suppose that many of us visitors are extremely endowed to exist in a perfect network with so many special people with valuable pointers. I feel very much happy to have come across your entire webpages and look forward to really more enjoyable minutes reading here. Thank you again for all the details.

  7. I in addition to my pals appeared to be taking note of the excellent tips and hints from your site while instantly I got a horrible suspicion I never expressed respect to the blog owner for those tips. My boys are already for that reason stimulated to read all of them and now have absolutely been having fun with them. We appreciate you indeed being considerably helpful as well as for figuring out this form of tremendous themes most people are really desperate to understand about. My very own honest apologies for not expressing appreciation to you sooner.


电子邮件地址不会被公开。 必填项已用*标注