X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmit.py;h=76b717fe5dbac08b8b103a1e44192a6fbf6d2a55;hb=1052d2bfeca49900df67d15886eea31f671cdc8c;hp=d09d03e36e8be030b889ba9fd4912f5df128a5c2;hpb=591078babff1d783bed872c5b441dc570d354448;p=youtube-dl.git diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index d09d03e36..76b717fe5 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -25,28 +25,35 @@ class TechTVMITIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage( + raw_page = self._download_webpage( 'http://techtv.mit.edu/videos/%s' % video_id, video_id) - embed_page = self._download_webpage( - 'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, - note=u'Downloading embed page') + clean_page = re.compile(u'', re.S).sub(u'', raw_page) base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', - embed_page, u'base url') - formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, + raw_page, u'base url') + formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, u'video formats') - formats = json.loads(formats_json) - formats = sorted(formats, key=lambda f: f['bitrate']) + formats_mit = json.loads(formats_json) + formats = [ + { + 'format_id': f['label'], + 'url': base_url + f['url'].partition(':')[2], + 'ext': f['url'].partition(':')[0], + 'format': f['label'], + 'width': f['width'], + 'vbr': f['bitrate'], + } + for f in formats_mit + ] - title = get_element_by_id('edit-title', webpage) - description = clean_html(get_element_by_id('edit-description', webpage)) + title = get_element_by_id('edit-title', clean_page) + description = clean_html(get_element_by_id('edit-description', clean_page)) thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', - embed_page, u'thumbnail', flags=re.DOTALL) + raw_page, u'thumbnail', flags=re.DOTALL) return {'id': video_id, 'title': title, - 'url': base_url + formats[-1]['url'].replace('mp4:', ''), - 'ext': 'mp4', + 'formats': formats, 'description': description, 'thumbnail': thumbnail, }