pip install scrapy
对于一个这样组织的网页(scrapy shell 'https://quotes.toscrape.com'):
<div class="quote">
<span class="text">“The world as we have created it is a process of our
thinking. It cannot be changed without changing our thinking.”</span>
<span>
by <small class="author">Albert Einstein</small>
<a href="/author/Albert-Einstein">(about)</a>
</span>
<div class="tags">
Tags:
<a class="tag" href="/tag/change/page/1/">change</a>
<a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
<a class="tag" href="/tag/thinking/page/1/">thinking</a>
<a class="tag" href="/tag/world/page/1/">world</a>
</div>
</div>
<div class="quote">
……
</div>
……
quote = response.css("div.quote")[0] -> [<Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
<Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
...]
quote.css("span.text") -> [<Selector xpath="descendant-or-self::span[@class and contains(concat(' ', normalize-space(@class), ' '), ' text ')]" data='<span class="text" itemprop="text">“T...'>]
quote.css("span.text").get() -> '<span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>'
quote.css("span.text").getall() -> '<span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>'
quote.css("span.text::text") -> [<Selector xpath="descendant-or-self::span[@class and contains(concat(' ', normalize-space(@class), ' '), ' text ')]/text()" data='“The world as we have created it is a...'>]
quote.css("span.text::text").get() -> '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'
quote.css("span.text::text").getall() -> ['“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”']
quote.css("div.tags a.tag::text").get() -> 'change'
quote.css("div.tags a.tag::text").getall() -> ['change', 'deep-thoughts', 'thinking', 'world']
response.css('li.next a::attr(href)').get() -> '/page/2'