2 from __future__ import unicode_literals
7 from .common import InfoExtractor
18 class YoukuIE(InfoExtractor):
23 http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
25 (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
29 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
30 'md5': '5f3af4192eabacc4501508d54a8cabd7',
32 'id': 'XMTc1ODE5Njcy_part1',
33 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
37 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
38 'only_matching': True,
40 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
42 'id': 'XODgxNjg1Mzk2',
47 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
49 'id': 'XMTI1OTczNDM5Mg',
53 'skip': 'Available in China only',
55 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
56 'note': 'Video protected with password',
58 'id': 'XNjA1NzA2Njgw',
59 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
63 'videopassword': '100600',
67 def construct_video_urls(self, data):
73 t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
74 ls[i], ls[t] = ls[t], ls[i]
77 for i in range(len(s2)):
80 ls[x], ls[y] = ls[y], ls[x]
81 s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
85 b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
86 ).decode('ascii').split('_')
89 oip = data['security']['ip']
93 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
96 for stream in data['stream']:
97 format = stream.get('stream_type')
98 fileid = stream['stream_fileid']
99 fileid_dict[format] = fileid
101 def get_fileid(format, n):
102 number = hex(int(str(n), 10))[2:].upper()
104 number = '0' + number
105 streamfileids = fileid_dict[format]
106 fileid = streamfileids[0:8] + number + streamfileids[10:]
110 def generate_ep(format, n):
111 fileid = get_fileid(format, n)
114 ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
116 ep = base64.b64encode(ep_t).decode('ascii')
119 # generate video_urls
121 for stream in data['stream']:
122 format = stream.get('stream_type')
124 for dt in stream['segs']:
125 #n = str(int(dt['size']))
126 n = str(stream['segs'].index(dt))
129 'hd': self.get_hd(format),
131 #'ts': dt['total_milliseconds_video'],
137 'ep': generate_ep(format, n)
140 'http://k.youku.com/player/getFlvPath/' + \
143 '/st/' + self.parse_ext_l(format) + \
144 '/fileid/' + get_fileid(format, n) + '?' + \
145 compat_urllib_parse.urlencode(param)
146 video_urls.append(video_url)
147 video_urls_dict[format] = video_urls
149 return video_urls_dict
151 def get_hd(self, fm):
163 return hd_id_dict[fm]
165 def parse_ext_l(self, fm):
180 def get_format_name(self, fm):
195 def _real_extract(self, url):
196 video_id = self._match_id(url)
198 def retrieve_data(req_url, note):
204 self._set_cookie('youku.com','xreferrer','http://www.youku.com')
205 req = sanitized_Request(req_url,headers=headers)
207 cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
208 if cn_verification_proxy:
209 req.add_header('Ytdl-request-proxy', cn_verification_proxy)
211 raw_data = self._download_json(req, video_id, note=note)
212 js = json.dumps(raw_data)
214 return raw_data['data']
217 video_password = self._downloader.params.get('videopassword', None)
220 basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id
222 basic_data_url += '&pwd=%s' % video_password
224 data = retrieve_data(
226 'Downloading JSON metadata 1')
228 error = data.get('error')
230 error_note = error.get('note')
231 if error_note is not None and '因版权原因无法观看此视频' in error_note:
232 raise ExtractorError(
233 'Youku said: Sorry, this video is available in China only', expected=True)
235 msg = 'Youku server reported error %i' % error.get('code')
236 if error is not None:
237 msg += ': ' + error_note
238 raise ExtractorError(msg)
241 title = data['video']['title']
244 # generate video_urls_dict
245 video_urls_dict = self.construct_video_urls(data)
249 'id': '%s_part%d' % (video_id, i + 1),
252 # some formats are not available for all parts, we have to detect
254 } for i in range(max(len(v.get('segs')) for v in data['stream']))]
255 for stream in data['stream']:
256 fm = stream.get('stream_type')
257 video_urls = video_urls_dict[fm]
258 for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
259 entry['formats'].append({
261 'format_id': self.get_format_name(fm),
262 'ext': self.parse_ext_l(fm),
263 'filesize': int(seg['size']),
267 '_type': 'multi_video',