[douyutv] Improve extraction and update tests

The JSON API sometimes return HTML pages with errors
This commit is contained in:
Yen Chi Hsuan 2016-04-24 23:52:17 +08:00
parent 51762e1a31
commit aa9dc24f5a
No known key found for this signature in database
GPG Key ID: 3FDDD575826C5C30
1 changed files with 24 additions and 9 deletions

View File

@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
'display_id': 'iseven', 'display_id': 'iseven',
'ext': 'flv', 'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:f34981259a03e980a3c6404190a3ed61', 'description': 're:.*m7show@163\.com.*',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅', 'uploader': '7师傅',
'uploader_id': '431925', 'uploader_id': '431925',
@ -43,7 +43,7 @@ class DouyuTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Romm not found', 'skip': 'Room not found',
}, { }, {
'url': 'http://www.douyutv.com/17732', 'url': 'http://www.douyutv.com/17732',
'info_dict': { 'info_dict': {
@ -51,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
'display_id': '17732', 'display_id': '17732',
'ext': 'flv', 'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:f34981259a03e980a3c6404190a3ed61', 'description': 're:.*m7show@163\.com.*',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅', 'uploader': '7师傅',
'uploader_id': '431925', 'uploader_id': '431925',
@ -75,13 +75,28 @@ class DouyuTVIE(InfoExtractor):
room_id = self._html_search_regex( room_id = self._html_search_regex(
r'"room_id"\s*:\s*(\d+),', page, 'room id') r'"room_id"\s*:\s*(\d+),', page, 'room id')
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( config = None
room_id, int(time.time())) # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
# Retry with different parameters - same parameters cause same errors
for i in range(5):
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
room_id, int(time.time()))
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() config_page = self._download_webpage(
config = self._download_json( 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), video_id)
video_id) try:
config = self._parse_json(config_page, video_id, fatal=False)
except ExtractorError:
# Wait some time before retrying to get a different time() value
self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
'Waiting for %(timeout)s seconds before retrying')
continue
else:
break
if config is None:
raise ExtractorError('Unable to fetch API result')
data = config['data'] data = config['data']