X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fiqiyi.py;h=9046705a54aac3eb37bed44792ac38b41239563e;hb=ed8648a322f8ec55957db0d4a959868807c38ebb;hp=840cc9a4d8cc69bf083492004b9858848a9bf390;hpb=c4ee87022bd18863fc3f22f80064453e272d956f;p=youtube-dl.git diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 840cc9a4d..9046705a5 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -3,23 +3,32 @@ from __future__ import unicode_literals import hashlib import math +import os import random -import re import time import uuid -import zlib from .common import InfoExtractor -from ..compat import compat_urllib_parse -from ..utils import ExtractorError +from ..compat import ( + compat_parse_qs, + compat_urllib_parse, + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + sanitized_Request, + urlencode_postdata, + url_basename, +) class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' + IE_DESC = '爱奇艺' - _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' + _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' - _TEST = { + _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', 'md5': '2cb594dc2781e6c941a110d8f358118b', 'info_dict': { @@ -27,9 +36,144 @@ class IqiyiIE(InfoExtractor): 'title': '美国德州空中惊现奇异云团 酷似UFO', 'ext': 'f4v', } - } + }, { + 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb', + 'title': '名侦探柯南第752集', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }], + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', + 'only_matching': True, + }, { + 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', + 'only_matching': True, + }, { + 'url': 'http://yule.iqiyi.com/pcb.html', + 'only_matching': True, + }, { + # VIP-only video. The first 2 parts (6 minutes) are available without login + # MD5 sums omitted as values are different on Travis CI and my machine + 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', + 'info_dict': { + 'id': 'f3cf468b39dddb30d676f89a91200dc1', + 'title': '泰坦尼克号', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1', + 'ext': 'f4v', + 'title': '泰坦尼克号', + }, + }, { + 'info_dict': { + 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2', + 'ext': 'f4v', + 'title': '泰坦尼克号', + }, + }], + 'expected_warnings': ['Needs a VIP account for full video'], + }] + + _FORMATS_MAP = [ + ('1', 'h6'), + ('2', 'h5'), + ('3', 'h4'), + ('4', 'h3'), + ('5', 'h2'), + ('10', 'h1'), + ] + + @staticmethod + def md5_text(text): + return hashlib.md5(text.encode('utf-8')).hexdigest() + + def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning): + auth_params = { + # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as + 'version': '2.0', + 'platform': 'b6c13e26323c537d', + 'aid': tvid, + 'tvid': tvid, + 'uid': '', + 'deviceId': _uuid, + 'playType': 'main', # XXX: always main? + 'filename': os.path.splitext(url_basename(api_video_url))[0], + } - def construct_video_urls(self, data, video_id, _uuid): + qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query) + for key, val in qd_items.items(): + auth_params[key] = val[0] + + auth_req = sanitized_Request( + 'http://api.vip.iqiyi.com/services/ckn.action', + urlencode_postdata(auth_params)) + # iQiyi server throws HTTP 405 error without the following header + auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded') + auth_result = self._download_json( + auth_req, video_id, + note='Downloading video authentication JSON', + errnote='Unable to download video authentication JSON') + if auth_result['code'] == 'Q00506': # requires a VIP account + if do_report_warning: + self.report_warning('Needs a VIP account for full video') + return False + + return auth_result + + def construct_video_urls(self, data, video_id, _uuid, tvid): def do_xor(x, y): a = y % 3 if a == 1: @@ -55,9 +199,10 @@ class IqiyiIE(InfoExtractor): note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) )['t'] t = str(int(math.floor(int(tm) / (600.0)))) - return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() + return self.md5_text(t + mg + x) video_urls_dict = {} + need_vip_warning_report = True for format_item in data['vp']['tkl'][0]['vs']: if 0 < int(format_item['bid']) <= 10: format_id = self.get_format(format_item['bid']) @@ -76,11 +221,13 @@ class IqiyiIE(InfoExtractor): vl = segment['l'] if not vl.startswith('/'): vl = get_encode_code(vl) - key = get_path_key( - vl.split('/')[-1].split('.')[0], format_id, segment_index) + is_vip_video = '/vip/' in vl filesize = segment['b'] base_url = data['vp']['du'].split('/') - base_url.insert(-1, key) + if not is_vip_video: + key = get_path_key( + vl.split('/')[-1].split('.')[0], format_id, segment_index) + base_url.insert(-1, key) base_url = '/'.join(base_url) param = { 'su': _uuid, @@ -91,8 +238,23 @@ class IqiyiIE(InfoExtractor): 'ct': '', 'tn': str(int(time.time())) } - api_video_url = base_url + vl + '?' + \ - compat_urllib_parse.urlencode(param) + api_video_url = base_url + vl + if is_vip_video: + api_video_url = api_video_url.replace('.f4v', '.hml') + auth_result = self._authenticate_vip_video( + api_video_url, video_id, tvid, _uuid, need_vip_warning_report) + if auth_result is False: + need_vip_warning_report = False + break + param.update({ + 't': auth_result['data']['t'], + # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as + 'cid': 'afbe8fd3d73448c9', + 'vid': video_id, + 'QY00001': auth_result['data']['u'], + }) + api_video_url += '?' if '?' not in api_video_url else '&' + api_video_url += compat_urllib_parse.urlencode(param) js = self._download_json( api_video_url, video_id, note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) @@ -104,44 +266,29 @@ class IqiyiIE(InfoExtractor): return video_urls_dict def get_format(self, bid): - _dict = { - '1': 'h6', - '2': 'h5', - '3': 'h4', - '4': 'h3', - '5': 'h2', - '10': 'h1' - } - return _dict.get(str(bid), None) + matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] + return matched_format_ids[0] if len(matched_format_ids) else None def get_bid(self, format_id): - _dict = { - 'h6': '1', - 'h5': '2', - 'h4': '3', - 'h3': '4', - 'h2': '5', - 'h1': '10', - 'best': 'best' - } - return _dict.get(format_id, None) + matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] + return matched_bids[0] if len(matched_bids) else None def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) + tail = tm + tvid param = { 'key': 'fvip', - 'src': hashlib.md5(b'youtube-dl').hexdigest(), + 'src': self.md5_text('youtube-dl'), 'tvId': tvid, 'vid': video_id, 'vinfo': 1, 'tm': tm, - 'enc': hashlib.md5( - (enc_key + tm + tvid).encode('utf8')).hexdigest(), + 'enc': self.md5_text(enc_key + tail), 'qyid': _uuid, 'tn': random.random(), 'um': 0, - 'authkey': hashlib.md5( - (tm + tvid).encode('utf8')).hexdigest() + 'authkey': self.md5_text(self.md5_text('') + tail), + 'k_tag': 1, } api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ @@ -150,12 +297,9 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): - req = self._request_webpage( - swf_url, video_id, note='download swf content') - cn = req.read() - cn = zlib.decompress(cn[8:]) - pt = re.compile(b'MixerRemote\x08(?P.+?)\$&vv') - enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') + # TODO: automatic key extraction + # last update at 2016-01-22 for Zombie::bite + enc_key = '6ab6d0280511493ba85594779759d4ed' return enc_key def _real_extract(self, url): @@ -166,7 +310,7 @@ class IqiyiIE(InfoExtractor): video_id = self._search_regex( r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') swf_url = self._search_regex( - r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL') + r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex enc_key = self.get_enc_key(swf_url, video_id) @@ -176,16 +320,13 @@ class IqiyiIE(InfoExtractor): if raw_data['code'] != 'A000000': raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) - if not raw_data['data']['vp']['tkl']: - raise ExtractorError('No support iQiqy VIP video') - data = raw_data['data'] title = data['vi']['vn'] # generate video_urls_dict video_urls_dict = self.construct_video_urls( - data, video_id, _uuid) + data, video_id, _uuid, tvid) # construct info entries = []