Merge branch 'ted_subtitles'

author Ismaël Mejía <iemejia@gmail.com>

Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)

committer Ismaël Mejía <iemejia@gmail.com>

Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
author Ismaël Mejía <iemejia@gmail.com>
Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
committer Ismaël Mejía <iemejia@gmail.com>
Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
diff --combined youtube_dl/extractor/dailymotion.py

index 355b4ed0a028540c81a486ac4f389d50cbf48987,3aef82bcf402e0a8795a71cc6210596f811e954d..e87690f9d288103ea222e1c216786b42e89364de
--- 1/youtube_dl/extractor/dailymotion.py
--- 2/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@@ -21,7 -21,6 +21,7 @@@ class DailymotionBaseInfoExtractor(Info
           """Build a request with the family filter disabled"""
           request = compat_urllib_request.Request(url)
           request.add_header('Cookie', 'family_filter=off')
+ +        request.add_header('Cookie', 'ff=off')
           return request
   
   class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
@@@ -29,15 -28,6 +29,15 @@@
   
       _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
       IE_NAME = u'dailymotion'
+ +
+ +    _FORMATS = [
+ +        (u'stream_h264_ld_url', u'ld'),
+ +        (u'stream_h264_url', u'standard'),
+ +        (u'stream_h264_hq_url', u'hq'),
+ +        (u'stream_h264_hd_url', u'hd'),
+ +        (u'stream_h264_hd1080_url', u'hd180'),
+ +    ]
+ +
       _TESTS = [
           {
               u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@@@ -62,18 -52,6 +62,18 @@@
               },
               u'skip': u'VEVO is only available in some countries',
           },
+ +        # age-restricted video
+ +        {
+ +            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+ +            u'file': u'xyh2zz.mp4',
+ +            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
+ +            u'info_dict': {
+ +                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+ +                u'uploader': 'HotWaves1012',
+ +                u'age_limit': 18,
+ +            }
+ +
+ +        }
       ]
   
       def _real_extract(self, url):
@@@ -82,6 -60,7 +82,6 @@@
   
           video_id = mobj.group(1).split('_')[0].split('?')[0]
   
- -        video_extension = 'mp4'
           url = 'http://www.dailymotion.com/video/%s' % video_id
   
           # Retrieve video webpage to extract further information
@@@ -103,8 -82,7 +103,8 @@@
           video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
                                                # Looking for official user
                                                r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
- -                                            webpage, 'video uploader')
+ +                                            webpage, 'video uploader', fatal=False)
+ +        age_limit = self._rta_search(webpage)
   
           video_upload_date = None
           mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@@ -121,43 -99,37 +121,43 @@@
               msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
               raise ExtractorError(msg, expected=True)
   
- -        # TODO: support choosing qualities
- -
- -        for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
- -                    'stream_h264_hq_url','stream_h264_url',
- -                    'stream_h264_ld_url']:
- -            if info.get(key):#key in info and info[key]:
- -                max_quality = key
- -                self.to_screen(u'Using %s' % key)
- -                break
- -        else:
+ +        formats = []
+ +        for (key, format_id) in self._FORMATS:
+ +            video_url = info.get(key)
+ +            if video_url is not None:
+ +                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
+ +                if m_size is not None:
+ +                    width, height = m_size.group(1), m_size.group(2)
+ +                else:
+ +                    width, height = None, None
+ +                formats.append({
+ +                    'url': video_url,
+ +                    'ext': 'mp4',
+ +                    'format_id': format_id,
+ +                    'width': width,
+ +                    'height': height,
+ +                })
+ +        if not formats:
               raise ExtractorError(u'Unable to extract video URL')
- -        video_url = info[max_quality]
   
           # subtitles
-         video_subtitles = self.extract_subtitles(video_id)
+         video_subtitles = self.extract_subtitles(video_id, webpage)
           if self._downloader.params.get('listsubtitles', False):
-             self._list_available_subtitles(video_id)
+             self._list_available_subtitles(video_id, webpage)
               return
   
- -        return [{
+ +        return {
               'id':       video_id,
- -            'url':      video_url,
+ +            'formats': formats,
               'uploader': video_uploader,
               'upload_date':  video_upload_date,
               'title':    self._og_search_title(webpage),
- -            'ext':      video_extension,
               'subtitles':    video_subtitles,
- -            'thumbnail': info['thumbnail_url']
- -        }]
+ +            'thumbnail': info['thumbnail_url'],
+ +            'age_limit': age_limit,
+ +        }
   
-     def _get_available_subtitles(self, video_id):
+     def _get_available_subtitles(self, video_id, webpage):
           try:
               sub_list = self._download_webpage(
                   'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
diff --combined youtube_dl/extractor/youtube.py

index dc601de520bae7f83b5e64aa87a1f7e77161385c,d7c9b38f9da7054a43827a5e44572986717714a7..9053f3ead8ee81b490fdc9982359465e29d965db
--- 1/youtube_dl/extractor/youtube.py
--- 2/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@@ -74,8 -74,14 +74,8 @@@ class YoutubeBaseInfoExtractor(InfoExtr
               self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
               return False
   
- -        galx = None
- -        dsh = None
- -        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
- -        if match:
- -          galx = match.group(1)
- -        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
- -        if match:
- -          dsh = match.group(1)
+ +        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
+ +                                  login_page, u'Login GALX parameter')
   
           # Log in
           login_form_strs = {
@@@ -89,6 -95,7 +89,6 @@@
                   u'checkConnection': u'',
                   u'checkedDomains': u'youtube',
                   u'dnConn': u'',
- -                u'dsh': dsh,
                   u'pstMsg': u'0',
                   u'rmShown': u'1',
                   u'secTok': u'',
@@@ -229,13 -236,11 +229,13 @@@ class YoutubeIE(YoutubeBaseInfoExtracto
           '136': 'mp4',
           '137': 'mp4',
           '138': 'mp4',
- -        '139': 'mp4',
- -        '140': 'mp4',
- -        '141': 'mp4',
           '160': 'mp4',
   
+ +        # Dash mp4 audio
+ +        '139': 'm4a',
+ +        '140': 'm4a',
+ +        '141': 'm4a',
+ +
           # Dash webm
           '171': 'webm',
           '172': 'webm',
@@@ -341,7 -346,7 +341,7 @@@
           },
           {
               u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
- -            u"file":  u"1ltcDfZMA3U.flv",
+ +            u"file":  u"1ltcDfZMA3U.mp4",
               u"note": u"Test VEVO video (#897)",
               u"info_dict": {
                   u"upload_date": u"20070518",
@@@ -1094,7 -1099,7 +1094,7 @@@
           else:
               raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
   
-     def _get_available_subtitles(self, video_id):
+     def _get_available_subtitles(self, video_id, webpage):
           try:
               sub_list = self._download_webpage(
                   'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@@ -1111,7 -1116,7 +1111,7 @@@
                   'lang': lang,
                   'v': video_id,
                   'fmt': self._downloader.params.get('subtitlesformat'),
- -                'name': l[0],
+ +                'name': l[0].encode('utf-8'),
               })
               url = u'http://www.youtube.com/api/timedtext?' + params
               sub_lang_list[lang] = url
@@@ -1145,7 -1150,7 +1145,7 @@@
               list_page = self._download_webpage(list_url, video_id)
               caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
               original_lang_node = caption_list.find('track')
- -            if original_lang_node.attrib.get('kind') != 'asr' :
+ +            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                   self._downloader.report_warning(u'Video doesn\'t have automatic captions')
                   return {}
               original_lang = original_lang_node.attrib['lang_code']
@@@ -1398,29 -1403,32 +1398,29 @@@
               # this signatures are encrypted
               if 'url_encoded_fmt_stream_map' not in args:
                   raise ValueError(u'No stream_map present')  # caught below
- -            m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
+ +            re_signature = re.compile(r'[&,]s=')
+ +            m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
               if m_s is not None:
                   self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                   video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
- -            m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
+ +            m_s = re_signature.search(args.get('adaptive_fmts', u''))
               if m_s is not None:
- -                if 'url_encoded_fmt_stream_map' in video_info:
- -                    video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
- -                else:
- -                    video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
- -            elif 'adaptive_fmts' in video_info:
- -                if 'url_encoded_fmt_stream_map' in video_info:
- -                    video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
+ +                if 'adaptive_fmts' in video_info:
+ +                    video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
                   else:
- -                    video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
+ +                    video_info['adaptive_fmts'] = [args['adaptive_fmts']]
           except ValueError:
               pass
   
           if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
               self.report_rtmp_download()
               video_url_list = [(None, video_info['conn'][0])]
- -        elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
- -            if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
+ +        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+ +            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
+ +            if 'rtmpe%3Dyes' in encoded_url_map:
                   raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
               url_map = {}
- -            for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
+ +            for url_data_str in encoded_url_map.split(','):
                   url_data = compat_parse_qs(url_data_str)
                   if 'itag' in url_data and 'url' in url_data:
                       url = url_data['url'][0]
@@@ -1473,13 -1481,13 +1473,13 @@@
               raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
   
           results = []
- -        for format_param, video_real_url in video_url_list:
+ +        for itag, video_real_url in video_url_list:
               # Extension
- -            video_extension = self._video_extensions.get(format_param, 'flv')
+ +            video_extension = self._video_extensions.get(itag, 'flv')
   
- -            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
- -                                              self._video_dimensions.get(format_param, '???'),
- -                                              ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
+ +            video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
+ +                                              self._video_dimensions.get(itag, '???'),
+ +                                              ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
   
               results.append({
                   'id':       video_id,
@@@ -1490,7 -1498,6 +1490,7 @@@
                   'title':    video_title,
                   'ext':      video_extension,
                   'format':   video_format,
+ +                'format_id': itag,
                   'thumbnail':    video_thumbnail,
                   'description':  video_description,
                   'player_url':   player_url,
author	Ismaël Mejía <iemejia@gmail.com>
	Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
committer	Ismaël Mejía <iemejia@gmail.com>
	Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
		1	2
youtube_dl/extractor/dailymotion.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/youtube.py	patch \|	diff1 \|	diff2 \|	blob \| history