Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)

Paste

Pasted as Python by Belxjander ( 15 years ago )
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector

from Arachnid.items import ArachnidItem

class AnimeWallpaperSpider(CrawlSpider):
    name = "www.animewallpapers.com"
    allowed_domains = ["www.animewallpapers.com",
                                 "media.animewallpapers.com"]
    start_urls = [ "http://www.animewallpapers.com/wallpapers/",
                        "http://www.animewallpapers.com/ecchi/",
                        "http://www.animewallpapers.com/game/" ]
    rules = (
        Rule(SgmlLinkExtractor(allow='', deny='')),
        Rule(SgmlLinkExtractor(allow=''),callback='parse')
    )

    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        selection = hxs.select('//@href')
        items = []
        for site in selection:
            item = ArachnidItem()
            item['link']=site.select('//@href').extract()
            items.append(item)
        return items

 

Revise this Paste

Your Name: Code Language: