99import json
1010import scrapy
1111from scrapy .selector import Selector
12- from ..items import StatisticsItem , NoticeItem , WHOArticleItem , \
13- RecommendItem , ProvinceItem , CountryItem , CityItem , \
14- TimelineItem , WikiItem , GoodsGuideItem , RumorItem
12+ from .. import items
1513
1614from django .utils .timezone import datetime , make_aware
1715
@@ -31,13 +29,15 @@ def parse(self, response):
3129 provinces = self .get_list (scripts , '#getAreaStat' )
3230 for province in provinces :
3331 cities = province .pop ('cities' , [])
34- province = ProvinceItem (** province )
32+ province = items . ProvinceItem (** province )
3533 yield province
3634 for city in cities :
37- yield CityItem (province = province ['locationId' ], ** city )
35+ location_id = province ['locationId' ]
36+ yield items .CityItem (province = location_id , ** city )
3837
3938 # 国外数据
40- countries = self .get_list (scripts , '#getListByCountryTypeService2true' )
39+ countries = self .get_list (
40+ scripts , '#getListByCountryTypeService2true' )
4141 for country in countries :
4242 country .pop ('id' , None )
4343 country ['countryName' ] = country .pop ('provinceName' , None )
@@ -46,7 +46,7 @@ def parse(self, response):
4646 country .pop ('provinceId' )
4747 country .pop ('provinceName' )
4848 country .pop ('provinceShortName' )
49- yield CountryItem (** country )
49+ yield items . CountryItem (** country )
5050
5151 # 统计信息
5252 statistics = self .get_statistics (scripts , '#getStatisticsService' )
@@ -60,29 +60,24 @@ def parse(self, response):
6060 for key in ('title' , 'summary' , 'infoSource' , 'sourceUrl' ,
6161 'pubDate' , 'pubDateStr' ):
6262 timeline [key ] = item .get (key )
63- yield TimelineItem (** timeline )
64-
63+ yield items .TimelineItem (** timeline )
6564
6665 # 建议,id=“#getIndexRecommendList2” 为英文内容
6766 recommends = self .get_list (
6867 scripts , '#getIndexRecommendListundefined' )
6968 for item in recommends :
70- recommend = {
71- 'title' : item ['title' ],
72- 'linkUrl' : item ['linkUrl' ],
73- 'imgUrl' : item ['imgUrl' ],
74- 'countryType' : item ['countryType' ],
75- 'contentType' : item ['contentType' ],
76- 'recordStatus' : item ['recordStatus' ],
77- 'sort' : item ['sort' ]
78- }
79- yield RecommendItem (** recommend )
80-
81- # # WHO 文章
82- article = self .get_dict (scripts , '#fetchWHOArticle' )
83- yield WHOArticleItem (
84- title = article ['title' ], linkUrl = article ['linkUrl' ],
85- imgUrl = article ['imgUrl' ])
69+ recommend = {}
70+ for key in ('title' , 'linkUrl' , 'imgUrl' , 'countryType' ,
71+ 'contentType' , 'recordStatus' , 'sort' ):
72+ recommend [key ] = item .get (key )
73+ yield items .RecommendItem (** recommend )
74+
75+ # WHO 文章
76+ item = self .get_dict (scripts , '#fetchWHOArticle' )
77+ article = {}
78+ for key in ('title' , 'linkUrl' , 'imgUrl' ):
79+ article [key ] = item .get (key )
80+ yield items .WHOArticleItem (** article )
8681
8782 # wiki
8883 wiki_result = self .get_dict (scripts , '#getWikiList' )
@@ -91,7 +86,7 @@ def parse(self, response):
9186 wiki = {}
9287 for key in ('title' , 'linkUrl' , 'imgUrl' , 'description' ):
9388 wiki [key ] = item .get (key )
94- yield WikiItem (** wiki )
89+ yield items . WikiItem (** wiki )
9590
9691 # 购物指南
9792 guides = self .get_list (scripts , '#fetchGoodsGuide' )
@@ -100,7 +95,7 @@ def parse(self, response):
10095 for key in ('categoryName' , 'title' , 'recordStatus' ,
10196 'contentImgUrls' ):
10297 guide [key ] = item .get (key )
103- yield GoodsGuideItem (** guide )
98+ yield items . GoodsGuideItem (** guide )
10499
105100 # 辟谣与防护
106101 rumors = self .get_list (scripts , '#getIndexRumorList' )
@@ -109,15 +104,7 @@ def parse(self, response):
109104 for key in ('title' , 'mainSummary' , 'summary' , 'body' ,
110105 'sourceUrl' , 'score' , 'rumorType' ):
111106 rumor [key ] = item .get (key )
112- yield RumorItem (** rumor )
113-
114- def get_list (self , scripts , data_id ):
115- ret = scripts .css (data_id ).re (r'(\[.+\])' )
116- return json .loads (ret [0 ])
117-
118- def get_dict (self , scripts , data_id ):
119- ret = scripts .css (data_id ).re (r'\=\s*(\{.+\})\}catch\(e\)\{\}' )
120- return json .loads (ret [0 ])
107+ yield items .RumorItem (** rumor )
121108
122109 def get_statistics (self , scripts , data_id ):
123110 data = self .get_dict (scripts , data_id )
@@ -127,29 +114,27 @@ def get_statistics(self, scripts, data_id):
127114 'currentConfirmedCount' , 'curedCount' , 'confirmedCount' ,
128115 'seriousCount' , 'suspectedCount' , 'deadCount' ):
129116 item [key ] = statistics .get (key , 0 )
130- item ['countryType' ] = StatisticsItem .django_model .GLOBAL
131- yield StatisticsItem (** item )
132-
117+ item ['countryType' ] = items .StatisticsItem .django_model .GLOBAL
118+ yield items .StatisticsItem (** item )
133119
134120 statistics = data ['foreignStatistics' ]
135121 item = {}
136122 for key in (
137123 'currentConfirmedCount' , 'curedCount' , 'confirmedCount' ,
138124 'seriousCount' , 'suspectedCount' , 'deadCount' ):
139125 item [key ] = statistics .get (key , 0 )
140- item ['countryType' ] = StatisticsItem . django_model . INTERNATIONAL
141- yield StatisticsItem ( ** item )
142-
126+ item ['countryType' ] \
127+ = items . StatisticsItem . django_model . INTERNATIONAL
128+ yield items . StatisticsItem ( ** item )
143129
144130 statistics = data
145131 item = {}
146132 for key in (
147133 'currentConfirmedCount' , 'curedCount' , 'confirmedCount' ,
148134 'seriousCount' , 'suspectedCount' , 'deadCount' ):
149135 item [key ] = statistics .get (key , 0 )
150- item ['countryType' ] = StatisticsItem .django_model .DOMESTIC
151- yield StatisticsItem (** item )
152-
136+ item ['countryType' ] = items .StatisticsItem .django_model .DOMESTIC
137+ yield items .StatisticsItem (** item )
153138
154139 # Remark and Note
155140 remarks = []
@@ -169,11 +154,18 @@ def get_statistics(self, scripts, data_id):
169154 'notes' : notes ,
170155 'generalRemark' : data .get ('generalRemark' )
171156 }
172- yield NoticeItem (** item )
173-
157+ yield items .NoticeItem (** item )
174158
175- self .crawler .createTime \
176- = make_aware ( datetime .fromtimestamp (data ['createTime' ] / 1000.0 ))
177- self .crawler .modifyTime \
178- = make_aware ( datetime .fromtimestamp (data ['modifyTime' ] / 1000.0 ))
159+ self .crawler .createTime = make_aware (
160+ datetime .fromtimestamp (data ['createTime' ] / 1000.0 ))
161+ self .crawler .modifyTime = make_aware (
162+ datetime .fromtimestamp (data ['modifyTime' ] / 1000.0 ))
179163 self .crawler .save ()
164+
165+ def get_list (self , scripts , data_id ):
166+ ret = scripts .css (data_id ).re (r'(\[.+\])' )
167+ return json .loads (ret [0 ])
168+
169+ def get_dict (self , scripts , data_id ):
170+ ret = scripts .css (data_id ).re (r'\=\s*(\{.+\})\}catch\(e\)\{\}' )
171+ return json .loads (ret [0 ])
0 commit comments