{"id":518,"date":"2023-02-28T22:47:07","date_gmt":"2023-02-28T14:47:07","guid":{"rendered":"https:\/\/www.gladguang.cn\/?p=518"},"modified":"2023-02-28T22:47:54","modified_gmt":"2023-02-28T14:47:54","slug":"%e5%88%9d%e8%af%86scrapy","status":"publish","type":"post","link":"https:\/\/www.gladguang.cn\/index.php\/2023\/02\/28\/%e5%88%9d%e8%af%86scrapy\/","title":{"rendered":"Scrapy\u5b66\u4e60\u7b14\u8bb0"},"content":{"rendered":"<h2>\u5b89\u88c5<\/h2>\n<pre><code class=\"language-python line-numbers\">pip install scrapy\n<\/code><\/pre>\n<h2>\u57fa\u672c\u542f\u52a8\u547d\u4ee4<\/h2>\n<pre><code class=\"language-Scrapy line-numbers\"># \u521b\u5efa\u722c\u866b\u9879\u76ee\u540d\u79f0\nscrapy startproject \u9879\u76ee\u540d\u79f0\n# \u521b\u5efa\u722c\u866b\u7684\u540d\u5b57\nscrapy genspider \u722c\u866b\u540d\u5b57 \u57df\u540d\n# \u8fd0\u884c\u722c\u866b\nscrapy crawl \u722c\u866b\u540d\u5b57\n<\/code><\/pre>\n<h2>scrapy\u9879\u76ee\u7ed3\u6784<\/h2>\n<p><div class='fancybox-wrapper lazyload-container-unload' data-fancybox='post-images' href='https:\/\/cdn.jsdelivr.net\/gh\/gladguang\/photo\/markdown\/typora\/photo\/20230228223535.png'><img class=\"lazyload lazyload-style-1\" src=\"data:image\/svg+xml;base64,PCEtLUFyZ29uTG9hZGluZy0tPgo8c3ZnIHdpZHRoPSIxIiBoZWlnaHQ9IjEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgc3Ryb2tlPSIjZmZmZmZmMDAiPjxnPjwvZz4KPC9zdmc+\"  decoding=\"async\" data-original=\"https:\/\/cdn.jsdelivr.net\/gh\/gladguang\/photo\/markdown\/typora\/photo\/20230228223535.png\" src=\"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsQAAA7EAZUrDhsAAAANSURBVBhXYzh8+PB\/AAffA0nNPuCLAAAAAElFTkSuQmCC\" alt=\"\u9879\u76ee\u7ed3\u6784\" \/><\/div><\/p>\n<h3>\u5168\u5c40\u914d\u7f6e\u6587\u4ef6settings.py\u8be6\u89e3<\/h3>\n<ul>\n<li>\u5b9a\u4e49User-Agent<\/li>\n<\/ul>\n<p>\u3000\u3000USER_AGENT = &#8216;Mozilla\/5.0&#8217;<\/p>\n<ul>\n<li>\u662f\u5426\u9075\u5faarobots\u534f\u8bae\uff0c\u4e00\u5b9a\u8981\u8bbe\u7f6e\u4e3aFalse<\/li>\n<\/ul>\n<p>\u3000\u3000ROBOTSTXT_OBEY = False<\/p>\n<ul>\n<li>\u6700\u5927\u5e76\u53d1\u91cf\uff0c\u9ed8\u8ba4\u4e3a16<\/li>\n<\/ul>\n<p>\u3000\u3000CONCURRENT_REQUESTS = 32<\/p>\n<ul>\n<li>\u4e0b\u8f7d\u5ef6\u8fdf\u65f6\u95f4<\/li>\n<\/ul>\n<p>\u3000\u3000DOWNLOAD_DELAY = 1<\/p>\n<ul>\n<li>\u8bf7\u6c42\u5934\uff0c\u6b64\u5904\u4e5f\u53ef\u4ee5\u6dfb\u52a0User-Agent<\/li>\n<\/ul>\n<p>\u3000\u3000DEFAULT_REQUEST_HEADERS={}<\/p>\n<ul>\n<li>\u9879\u76ee\u7ba1\u9053\uff0c\u8fd0\u884c\u7ba1\u9053\u51fd\u6570<\/li>\n<\/ul>\n<p>\u3000\u3000ITEM_PIPELINES={<\/p>\n<p>\u3000\u3000&#8217;\u9879\u76ee\u76ee\u5f55\u540d.pipelines.\u7c7b\u540d&#8217;:300<\/p>\n<h2>    }<\/h2>\n<h3>\u521b\u5efa\u722c\u866b\u9879\u76ee\u6b65\u9aa4<\/h3>\n<ol>\n<li>\u65b0\u5efa\u9879\u76ee \uff1ascrapy startproject \u9879\u76ee\u540d<\/li>\n<li>cd \u9879\u76ee\u6587\u4ef6\u5939<\/li>\n<li>\u65b0\u5efa\u722c\u866b\u6587\u4ef6 \uff1ascrapy genspider \u6587\u4ef6\u540d \u57df\u540d<\/li>\n<li>\u660e\u786e\u76ee\u6807(items.py)<\/li>\n<li>\u5199\u722c\u866b\u7a0b\u5e8f(\u6587\u4ef6\u540d.py)<\/li>\n<li>\u7ba1\u9053\u6587\u4ef6(pipelines.py)<\/li>\n<li>\u5168\u5c40\u914d\u7f6e(settings.py)<\/li>\n<li>\u8fd0\u884c\u722c\u866b \uff1ascrapy crawl \u722c\u866b\u540d<\/li>\n<\/ol>\n","protected":false},"excerpt":{"rendered":"<p>Scrapy\u5b89\u88c5\u4e0e\u57fa\u672c\u542f\u52a8\u547d\u4ee4<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[59,64,6,7,76],"tags":[23,24,84,32],"class_list":{"0":"post-518","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"hentry","6":"category-59","7":"category-64","8":"category-python","10":"category-programming-language","11":"tag-python","13":"tag-scrapy","14":"tag-32"},"_links":{"self":[{"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/posts\/518","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/comments?post=518"}],"version-history":[{"count":3,"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/posts\/518\/revisions"}],"predecessor-version":[{"id":552,"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/posts\/518\/revisions\/552"}],"wp:attachment":[{"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/media?parent=518"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/categories?post=518"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.gladguang.cn\/index.php\/wp-json\/wp\/v2\/tags?post=518"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}