]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge branch 'ted_subtitles'
authorIsmaël Mejía <iemejia@gmail.com>
Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
committerIsmaël Mejía <iemejia@gmail.com>
Sat, 2 Nov 2013 18:50:45 +0000 (19:50 +0100)
1  2 
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/youtube.py

index 355b4ed0a028540c81a486ac4f389d50cbf48987,3aef82bcf402e0a8795a71cc6210596f811e954d..e87690f9d288103ea222e1c216786b42e89364de
@@@ -21,7 -21,6 +21,7 @@@ class DailymotionBaseInfoExtractor(Info
          """Build a request with the family filter disabled"""
          request = compat_urllib_request.Request(url)
          request.add_header('Cookie', 'family_filter=off')
 +        request.add_header('Cookie', 'ff=off')
          return request
  
  class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
  
      _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
      IE_NAME = u'dailymotion'
 +
 +    _FORMATS = [
 +        (u'stream_h264_ld_url', u'ld'),
 +        (u'stream_h264_url', u'standard'),
 +        (u'stream_h264_hq_url', u'hq'),
 +        (u'stream_h264_hd_url', u'hd'),
 +        (u'stream_h264_hd1080_url', u'hd180'),
 +    ]
 +
      _TESTS = [
          {
              u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
              },
              u'skip': u'VEVO is only available in some countries',
          },
 +        # age-restricted video
 +        {
 +            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
 +            u'file': u'xyh2zz.mp4',
 +            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
 +            u'info_dict': {
 +                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
 +                u'uploader': 'HotWaves1012',
 +                u'age_limit': 18,
 +            }
 +
 +        }
      ]
  
      def _real_extract(self, url):
@@@ -82,6 -60,7 +82,6 @@@
  
          video_id = mobj.group(1).split('_')[0].split('?')[0]
  
 -        video_extension = 'mp4'
          url = 'http://www.dailymotion.com/video/%s' % video_id
  
          # Retrieve video webpage to extract further information
          video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
                                               # Looking for official user
                                               r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
 -                                            webpage, 'video uploader')
 +                                            webpage, 'video uploader', fatal=False)
 +        age_limit = self._rta_search(webpage)
  
          video_upload_date = None
          mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
              msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
              raise ExtractorError(msg, expected=True)
  
 -        # TODO: support choosing qualities
 -
 -        for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
 -                    'stream_h264_hq_url','stream_h264_url',
 -                    'stream_h264_ld_url']:
 -            if info.get(key):#key in info and info[key]:
 -                max_quality = key
 -                self.to_screen(u'Using %s' % key)
 -                break
 -        else:
 +        formats = []
 +        for (key, format_id) in self._FORMATS:
 +            video_url = info.get(key)
 +            if video_url is not None:
 +                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
 +                if m_size is not None:
 +                    width, height = m_size.group(1), m_size.group(2)
 +                else:
 +                    width, height = None, None
 +                formats.append({
 +                    'url': video_url,
 +                    'ext': 'mp4',
 +                    'format_id': format_id,
 +                    'width': width,
 +                    'height': height,
 +                })
 +        if not formats:
              raise ExtractorError(u'Unable to extract video URL')
 -        video_url = info[max_quality]
  
          # subtitles
-         video_subtitles = self.extract_subtitles(video_id)
+         video_subtitles = self.extract_subtitles(video_id, webpage)
          if self._downloader.params.get('listsubtitles', False):
-             self._list_available_subtitles(video_id)
+             self._list_available_subtitles(video_id, webpage)
              return
  
 -        return [{
 +        return {
              'id':       video_id,
 -            'url':      video_url,
 +            'formats': formats,
              'uploader': video_uploader,
              'upload_date':  video_upload_date,
              'title':    self._og_search_title(webpage),
 -            'ext':      video_extension,
              'subtitles':    video_subtitles,
 -            'thumbnail': info['thumbnail_url']
 -        }]
 +            'thumbnail': info['thumbnail_url'],
 +            'age_limit': age_limit,
 +        }
  
-     def _get_available_subtitles(self, video_id):
+     def _get_available_subtitles(self, video_id, webpage):
          try:
              sub_list = self._download_webpage(
                  'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
index dc601de520bae7f83b5e64aa87a1f7e77161385c,d7c9b38f9da7054a43827a5e44572986717714a7..9053f3ead8ee81b490fdc9982359465e29d965db
@@@ -74,8 -74,14 +74,8 @@@ class YoutubeBaseInfoExtractor(InfoExtr
              self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
              return False
  
 -        galx = None
 -        dsh = None
 -        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
 -        if match:
 -          galx = match.group(1)
 -        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
 -        if match:
 -          dsh = match.group(1)
 +        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
 +                                  login_page, u'Login GALX parameter')
  
          # Log in
          login_form_strs = {
@@@ -89,6 -95,7 +89,6 @@@
                  u'checkConnection': u'',
                  u'checkedDomains': u'youtube',
                  u'dnConn': u'',
 -                u'dsh': dsh,
                  u'pstMsg': u'0',
                  u'rmShown': u'1',
                  u'secTok': u'',
@@@ -229,13 -236,11 +229,13 @@@ class YoutubeIE(YoutubeBaseInfoExtracto
          '136': 'mp4',
          '137': 'mp4',
          '138': 'mp4',
 -        '139': 'mp4',
 -        '140': 'mp4',
 -        '141': 'mp4',
          '160': 'mp4',
  
 +        # Dash mp4 audio
 +        '139': 'm4a',
 +        '140': 'm4a',
 +        '141': 'm4a',
 +
          # Dash webm
          '171': 'webm',
          '172': 'webm',
          },
          {
              u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 -            u"file":  u"1ltcDfZMA3U.flv",
 +            u"file":  u"1ltcDfZMA3U.mp4",
              u"note": u"Test VEVO video (#897)",
              u"info_dict": {
                  u"upload_date": u"20070518",
          else:
              raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
  
-     def _get_available_subtitles(self, video_id):
+     def _get_available_subtitles(self, video_id, webpage):
          try:
              sub_list = self._download_webpage(
                  'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
                  'lang': lang,
                  'v': video_id,
                  'fmt': self._downloader.params.get('subtitlesformat'),
 -                'name': l[0],
 +                'name': l[0].encode('utf-8'),
              })
              url = u'http://www.youtube.com/api/timedtext?' + params
              sub_lang_list[lang] = url
              list_page = self._download_webpage(list_url, video_id)
              caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
              original_lang_node = caption_list.find('track')
 -            if original_lang_node.attrib.get('kind') != 'asr' :
 +            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                  self._downloader.report_warning(u'Video doesn\'t have automatic captions')
                  return {}
              original_lang = original_lang_node.attrib['lang_code']
              # this signatures are encrypted
              if 'url_encoded_fmt_stream_map' not in args:
                  raise ValueError(u'No stream_map present')  # caught below
 -            m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
 +            re_signature = re.compile(r'[&,]s=')
 +            m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
              if m_s is not None:
                  self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
 -            m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
 +            m_s = re_signature.search(args.get('adaptive_fmts', u''))
              if m_s is not None:
 -                if 'url_encoded_fmt_stream_map' in video_info:
 -                    video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
 -                else:
 -                    video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
 -            elif 'adaptive_fmts' in video_info:
 -                if 'url_encoded_fmt_stream_map' in video_info:
 -                    video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
 +                if 'adaptive_fmts' in video_info:
 +                    video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
                  else:
 -                    video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
 +                    video_info['adaptive_fmts'] = [args['adaptive_fmts']]
          except ValueError:
              pass
  
          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
              self.report_rtmp_download()
              video_url_list = [(None, video_info['conn'][0])]
 -        elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
 -            if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
 +        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
 +            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
 +            if 'rtmpe%3Dyes' in encoded_url_map:
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
              url_map = {}
 -            for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
 +            for url_data_str in encoded_url_map.split(','):
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' in url_data and 'url' in url_data:
                      url = url_data['url'][0]
              raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          results = []
 -        for format_param, video_real_url in video_url_list:
 +        for itag, video_real_url in video_url_list:
              # Extension
 -            video_extension = self._video_extensions.get(format_param, 'flv')
 +            video_extension = self._video_extensions.get(itag, 'flv')
  
 -            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
 -                                              self._video_dimensions.get(format_param, '???'),
 -                                              ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
 +            video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
 +                                              self._video_dimensions.get(itag, '???'),
 +                                              ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
  
              results.append({
                  'id':       video_id,
                  'title':    video_title,
                  'ext':      video_extension,
                  'format':   video_format,
 +                'format_id': itag,
                  'thumbnail':    video_thumbnail,
                  'description':  video_description,
                  'player_url':   player_url,