Skip to content
This repository was archived by the owner on May 5, 2023. It is now read-only.

Commit be16a2a

Browse files
committed
Merge branch 'develop'
2 parents 24c91cd + f70b611 commit be16a2a

File tree

3 files changed

+51
-103
lines changed

3 files changed

+51
-103
lines changed

README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@
1313
$ ./manage.py makemigrations ncovapi
1414
$ ./manage.py migrate
1515

16-
## 创建缓存表
17-
18-
$ ./manage.py createcachetable
19-
2016
## 搜集静态文件
2117

2218
$ ./manage.py collectstatic

spider/nCoV/pipelines.py

Lines changed: 8 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,7 @@ def open_spider(self, spider):
2020
spider.crawler = items.CrawlerItem.django_model.objects.create()
2121

2222
def process_item(self, item, spider):
23-
if isinstance(item, items.ProvinceItem):
24-
items.ProvinceItem.django_model.objects.create(
25-
crawler=spider.crawler, **item
26-
)
27-
return item
28-
elif isinstance(item, items.CityItem):
23+
if isinstance(item, items.CityItem):
2924
provice_location_id = item.pop('province')
3025
province = items.ProvinceItem.django_model.objects.filter(
3126
locationId=provice_location_id,
@@ -35,48 +30,13 @@ def process_item(self, item, spider):
3530
crawler=spider.crawler, **item
3631
)
3732
return item
38-
elif isinstance(item, items.CountryItem):
39-
items.CountryItem.django_model.objects.create(
40-
crawler=spider.crawler, **item
41-
)
42-
return item
43-
elif isinstance(item, items.StatisticsItem):
44-
items.StatisticsItem.django_model.objects.create(
45-
crawler=spider.crawler, **item
46-
)
47-
return item
48-
elif isinstance(item, items.NoticeItem):
49-
items.NoticeItem.django_model.objects.create(
50-
crawler=spider.crawler, **item
51-
)
52-
return item
53-
elif isinstance(item, items.WHOArticleItem):
54-
items.WHOArticleItem.django_model.objects.create(
55-
crawler=spider.crawler, **item
56-
)
57-
return item
58-
elif isinstance(item, items.RecommendItem):
59-
items.RecommendItem.django_model.objects.create(
60-
crawler=spider.crawler, **item
61-
)
62-
return item
63-
elif isinstance(item, items.TimelineItem):
64-
items.TimelineItem.django_model.objects.create(
65-
crawler=spider.crawler, **item
66-
)
67-
return item
68-
elif isinstance(item, items.WikiItem):
69-
items.WikiItem.django_model.objects.create(
70-
crawler=spider.crawler, **item
71-
)
72-
return item
73-
elif isinstance(item, items.GoodsGuideItem):
74-
items.GoodsGuideItem.django_model.objects.create(
75-
crawler=spider.crawler, **item
76-
)
77-
return item
78-
elif isinstance(item, items.RumorItem):
79-
items.RumorItem.django_model.objects.create(
33+
elif isinstance(item, (items.ProvinceItem, items.CountryItem,
34+
items.StatisticsItem, items.NoticeItem,
35+
items.WHOArticleItem, items.RecommendItem,
36+
items.TimelineItem, items.WikiItem,
37+
items.GoodsGuideItem, items.RumorItem)):
38+
klass = item.__class__
39+
klass.django_model.objects.create(
8040
crawler=spider.crawler, **item
8141
)
8242
return item

spider/nCoV/spiders/dxy.py

Lines changed: 43 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
import json
1010
import scrapy
1111
from scrapy.selector import Selector
12-
from ..items import StatisticsItem, NoticeItem, WHOArticleItem, \
13-
RecommendItem, ProvinceItem, CountryItem, CityItem, \
14-
TimelineItem, WikiItem, GoodsGuideItem, RumorItem
12+
from .. import items
1513

1614
from django.utils.timezone import datetime, make_aware
1715

@@ -31,13 +29,15 @@ def parse(self, response):
3129
provinces = self.get_list(scripts, '#getAreaStat')
3230
for province in provinces:
3331
cities = province.pop('cities', [])
34-
province = ProvinceItem(**province)
32+
province = items.ProvinceItem(**province)
3533
yield province
3634
for city in cities:
37-
yield CityItem(province=province['locationId'], **city)
35+
location_id = province['locationId']
36+
yield items.CityItem(province=location_id, **city)
3837

3938
# 国外数据
40-
countries = self.get_list(scripts, '#getListByCountryTypeService2true')
39+
countries = self.get_list(
40+
scripts, '#getListByCountryTypeService2true')
4141
for country in countries:
4242
country.pop('id', None)
4343
country['countryName'] = country.pop('provinceName', None)
@@ -46,7 +46,7 @@ def parse(self, response):
4646
country.pop('provinceId')
4747
country.pop('provinceName')
4848
country.pop('provinceShortName')
49-
yield CountryItem(**country)
49+
yield items.CountryItem(**country)
5050

5151
# 统计信息
5252
statistics = self.get_statistics(scripts, '#getStatisticsService')
@@ -60,29 +60,24 @@ def parse(self, response):
6060
for key in ('title', 'summary', 'infoSource', 'sourceUrl',
6161
'pubDate', 'pubDateStr'):
6262
timeline[key] = item.get(key)
63-
yield TimelineItem(**timeline)
64-
63+
yield items.TimelineItem(**timeline)
6564

6665
# 建议,id=“#getIndexRecommendList2” 为英文内容
6766
recommends = self.get_list(
6867
scripts, '#getIndexRecommendListundefined')
6968
for item in recommends:
70-
recommend = {
71-
'title': item['title'],
72-
'linkUrl': item['linkUrl'],
73-
'imgUrl': item['imgUrl'],
74-
'countryType': item['countryType'],
75-
'contentType': item['contentType'],
76-
'recordStatus': item['recordStatus'],
77-
'sort': item['sort']
78-
}
79-
yield RecommendItem(**recommend)
80-
81-
# # WHO 文章
82-
article = self.get_dict(scripts, '#fetchWHOArticle')
83-
yield WHOArticleItem(
84-
title=article['title'], linkUrl=article['linkUrl'],
85-
imgUrl=article['imgUrl'])
69+
recommend = {}
70+
for key in ('title', 'linkUrl', 'imgUrl', 'countryType',
71+
'contentType', 'recordStatus', 'sort'):
72+
recommend[key] = item.get(key)
73+
yield items.RecommendItem(**recommend)
74+
75+
# WHO 文章
76+
item = self.get_dict(scripts, '#fetchWHOArticle')
77+
article = {}
78+
for key in ('title', 'linkUrl', 'imgUrl'):
79+
article[key] = item.get(key)
80+
yield items.WHOArticleItem(**article)
8681

8782
# wiki
8883
wiki_result = self.get_dict(scripts, '#getWikiList')
@@ -91,7 +86,7 @@ def parse(self, response):
9186
wiki = {}
9287
for key in ('title', 'linkUrl', 'imgUrl', 'description'):
9388
wiki[key] = item.get(key)
94-
yield WikiItem(**wiki)
89+
yield items.WikiItem(**wiki)
9590

9691
# 购物指南
9792
guides = self.get_list(scripts, '#fetchGoodsGuide')
@@ -100,7 +95,7 @@ def parse(self, response):
10095
for key in ('categoryName', 'title', 'recordStatus',
10196
'contentImgUrls'):
10297
guide[key] = item.get(key)
103-
yield GoodsGuideItem(**guide)
98+
yield items.GoodsGuideItem(**guide)
10499

105100
# 辟谣与防护
106101
rumors = self.get_list(scripts, '#getIndexRumorList')
@@ -109,15 +104,7 @@ def parse(self, response):
109104
for key in ('title', 'mainSummary', 'summary', 'body',
110105
'sourceUrl', 'score', 'rumorType'):
111106
rumor[key] = item.get(key)
112-
yield RumorItem(**rumor)
113-
114-
def get_list(self, scripts, data_id):
115-
ret = scripts.css(data_id).re(r'(\[.+\])')
116-
return json.loads(ret[0])
117-
118-
def get_dict(self, scripts, data_id):
119-
ret = scripts.css(data_id).re(r'\=\s*(\{.+\})\}catch\(e\)\{\}')
120-
return json.loads(ret[0])
107+
yield items.RumorItem(**rumor)
121108

122109
def get_statistics(self, scripts, data_id):
123110
data = self.get_dict(scripts, data_id)
@@ -127,29 +114,27 @@ def get_statistics(self, scripts, data_id):
127114
'currentConfirmedCount', 'curedCount', 'confirmedCount',
128115
'seriousCount', 'suspectedCount', 'deadCount'):
129116
item[key] = statistics.get(key, 0)
130-
item['countryType'] = StatisticsItem.django_model.GLOBAL
131-
yield StatisticsItem(**item)
132-
117+
item['countryType'] = items.StatisticsItem.django_model.GLOBAL
118+
yield items.StatisticsItem(**item)
133119

134120
statistics = data['foreignStatistics']
135121
item = {}
136122
for key in (
137123
'currentConfirmedCount', 'curedCount', 'confirmedCount',
138124
'seriousCount', 'suspectedCount', 'deadCount'):
139125
item[key] = statistics.get(key, 0)
140-
item['countryType'] = StatisticsItem.django_model.INTERNATIONAL
141-
yield StatisticsItem(**item)
142-
126+
item['countryType'] \
127+
= items.StatisticsItem.django_model.INTERNATIONAL
128+
yield items.StatisticsItem(**item)
143129

144130
statistics = data
145131
item = {}
146132
for key in (
147133
'currentConfirmedCount', 'curedCount', 'confirmedCount',
148134
'seriousCount', 'suspectedCount', 'deadCount'):
149135
item[key] = statistics.get(key, 0)
150-
item['countryType'] = StatisticsItem.django_model.DOMESTIC
151-
yield StatisticsItem(**item)
152-
136+
item['countryType'] = items.StatisticsItem.django_model.DOMESTIC
137+
yield items.StatisticsItem(**item)
153138

154139
# Remark and Note
155140
remarks = []
@@ -169,11 +154,18 @@ def get_statistics(self, scripts, data_id):
169154
'notes': notes,
170155
'generalRemark': data.get('generalRemark')
171156
}
172-
yield NoticeItem(**item)
173-
157+
yield items.NoticeItem(**item)
174158

175-
self.crawler.createTime \
176-
= make_aware(datetime.fromtimestamp(data['createTime'] / 1000.0))
177-
self.crawler.modifyTime \
178-
= make_aware(datetime.fromtimestamp(data['modifyTime'] / 1000.0))
159+
self.crawler.createTime = make_aware(
160+
datetime.fromtimestamp(data['createTime'] / 1000.0))
161+
self.crawler.modifyTime = make_aware(
162+
datetime.fromtimestamp(data['modifyTime'] / 1000.0))
179163
self.crawler.save()
164+
165+
def get_list(self, scripts, data_id):
166+
ret = scripts.css(data_id).re(r'(\[.+\])')
167+
return json.loads(ret[0])
168+
169+
def get_dict(self, scripts, data_id):
170+
ret = scripts.css(data_id).re(r'\=\s*(\{.+\})\}catch\(e\)\{\}')
171+
return json.loads(ret[0])

0 commit comments

Comments
 (0)