1+ # -*- coding: utf-8 -*-
2+ # @Author: zhanglei3
3+ # @Date: 2020-04-08 09:08:13
4+ # @Last Modified by: leafcoder
5+ # @Last Modified time: 2020-05-30 19:02:49
6+
7+ """美国各州疫情数据源"""
8+
9+ import json
10+ import scrapy
11+ import logging
12+ from scrapy .selector import Selector
13+
14+ from .. import items
15+
16+ from django .core .cache import cache
17+ from django .utils .timezone import datetime , make_aware
18+ from django .utils .translation import ugettext_lazy as _
19+
20+ logger = logging .getLogger ()
21+
22+ # For state i18n
23+ STATES = {
24+ "Alabama" : _ ("Alabama" ),
25+ "Alaska" : _ ("Alaska" ),
26+ "AmericanSamoa" : _ ("AmericanSamoa" ),
27+ "Arizona" : _ ("Arizona" ),
28+ "Arkansas" : _ ("Arkansas" ),
29+ "California" : _ ("California" ),
30+ "Colorado" : _ ("Colorado" ),
31+ "Connecticut" : _ ("Connecticut" ),
32+ "Delaware" : _ ("Delaware" ),
33+ "DistrictOfColumbia" : _ ("DistrictOfColumbia" ),
34+ "Florida" : _ ("Florida" ),
35+ "Georgia" : _ ("Georgia" ),
36+ "Guam" : _ ("Guam" ),
37+ "Hawaii" : _ ("Hawaii" ),
38+ "Idaho" : _ ("Idaho" ),
39+ "Illinois" : _ ("Illinois" ),
40+ "Indiana" : _ ("Indiana" ),
41+ "Iowa" : _ ("Iowa" ),
42+ "Kansas" : _ ("Kansas" ),
43+ "Kentucky" : _ ("Kentucky" ),
44+ "Louisiana" : _ ("Louisiana" ),
45+ "Maine" : _ ("Maine" ),
46+ "Maryland" : _ ("Maryland" ),
47+ "Massachusetts" : _ ("Massachusetts" ),
48+ "Michigan" : _ ("Michigan" ),
49+ "Minnesota" : _ ("Minnesota" ),
50+ "Mississippi" : _ ("Mississippi" ),
51+ "Missouri" : _ ("Missouri" ),
52+ "Montana" : _ ("Montana" ),
53+ "Nebraska" : _ ("Nebraska" ),
54+ "Nevada" : _ ("Nevada" ),
55+ "NewHampshire" : _ ("NewHampshire" ),
56+ "NewJersey" : _ ("NewJersey" ),
57+ "NewMexico" : _ ("NewMexico" ),
58+ "NewYork" : _ ("NewYork" ),
59+ "NorthCarolina" : _ ("NorthCarolina" ),
60+ "NorthDakota" : _ ("NorthDakota" ),
61+ "NorthernMarianaIslands" : _ ("NorthernMarianaIslands" ),
62+ "Ohio" : _ ("Ohio" ),
63+ "Oklahoma" : _ ("Oklahoma" ),
64+ "Oregon" : _ ("Oregon" ),
65+ "Pennsylvania" : _ ("Pennsylvania" ),
66+ "PuertoRico" : _ ("PuertoRico" ),
67+ "RhodeIsland" : _ ("RhodeIsland" ),
68+ "SouthCarolina" : _ ("SouthCarolina" ),
69+ "SouthDakota" : _ ("SouthDakota" ),
70+ "Tennessee" : _ ("Tennessee" ),
71+ "Texas" : _ ("Texas" ),
72+ "USVirginIslands" : _ ("USVirginIslands" ),
73+ "Utah" : _ ("Utah" ),
74+ "Vermont" : _ ("Vermont" ),
75+ "Virginia" : _ ("Virginia" ),
76+ "Washington" : _ ("Washington" ),
77+ "WestVirginia" : _ ("WestVirginia" ),
78+ "Wisconsin" : _ ("Wisconsin" ),
79+ "Wyoming" : _ ("Wyoming" )
80+ }
81+
82+ class CovidTrackingSpider (scrapy .Spider ):
83+
84+ """data source: https://covidtracking.com/api"""
85+
86+ name = "covidtracking"
87+ allowed_domains = ["covidtracking.com" ]
88+ country_short_code = 'USA'
89+ states = {}
90+
91+ def start_requests (self ):
92+ apis = [
93+ 'https://covidtracking.com/api/v1/states/current.json' ,
94+ 'https://covidtracking.com/api/v1/states/daily.json' ,
95+ 'https://covidtracking.com/api/v1/states/info.json' ,
96+ 'https://covidtracking.com/api/v1/us/daily.json' ,
97+ ]
98+ yield scrapy .Request (
99+ 'https://covidtracking.com/api/v1/states/info.json' ,
100+ self .parse_info )
101+
102+ def parse_states_current (self , response ):
103+ countryShortCode = self .country_short_code
104+ states = self .states
105+ result = json .loads (response .text )
106+ for item in result :
107+ state = item ['state' ]
108+ state_item = states [state ]
109+ state_item .update (item )
110+ state_item .pop ('grade' , None )
111+ state_item .pop ('total' , None )
112+ state_item ['countryShortCode' ] = countryShortCode
113+ yield scrapy .Request (
114+ 'https://covidtracking.com/api/v1/states/%s/daily.json' \
115+ % state ,
116+ self .parse_state_daily ,
117+ meta = {
118+ 'state_item' : state_item
119+ })
120+
121+ def parse_state_daily (self , response ):
122+ meta = response .meta
123+ state_item = meta ['state_item' ]
124+ state_item ['dailyData' ] = json .dumps (
125+ json .loads (response .text )[::- 1 ])
126+ yield items .StateItem (** state_item )
127+
128+ def parse_info (self , response ):
129+ countryShortCode = self .country_short_code
130+ states = self .states
131+ result = json .loads (response .text )
132+ for item in result :
133+ state = item ['state' ]
134+ stateName = item ['name' ]
135+ stateName = '' .join (stateName .split ())
136+ states [state ] = {
137+ 'state' : state ,
138+ 'countryShortCode' : countryShortCode ,
139+ 'stateName' : stateName
140+ }
141+ yield scrapy .Request (
142+ 'https://covidtracking.com/api/v1/states/current.json' ,
143+ self .parse_states_current )
0 commit comments