PEP8 applied

[youtube-dl.git] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 19f81412555c83c8b92d80c48ae46387bb32ab2b..98cac7c17c01ccce217cd09cb7b72437913d756a 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -33,6 +33,7 @@ from ..utils import (
      uppercase_escape,
  )
  
+
  class YoutubeBaseInfoExtractor(InfoExtractor):
      """Provide base functions for Youtube extractors"""
      _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@@ -99,7 +100,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
  
          # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
          # chokes on unicode
-        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
          login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
  
          req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
@@ -149,7 +150,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                  'service': 'youtube',
                  'hl': 'en_US',
              }
-            tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
+            tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
              tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
  
              tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
@@ -274,6 +275,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
          '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
          '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
+        '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
+        '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
  
          # Dash mp4 audio
          '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
@@ -304,9 +308,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
          '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
  
-        # Dash mov
-        '298': {'ext': 'mov', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
-        '299': {'ext': 'mov', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
+        # Dash webm audio with opus inside
+        '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
+        '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
+        '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
  
          # RTMP (unnamed)
          '_rtmp': {'protocol': 'rtmp'},
@@ -402,6 +407,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'format': '141',
              },
          },
+        # Controversy video
+        {
+            'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+            'info_dict': {
+                'id': 'T4XJQO3qol8',
+                'ext': 'mp4',
+                'upload_date': '20100909',
+                'uploader': 'The Amazing Atheist',
+                'uploader_id': 'TheAmazingAtheist',
+                'title': 'Burning Everyone\'s Koran',
+                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+            }
+        }
      ]
  
      def __init__(self, *args, **kwargs):
@@ -511,7 +529,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
      def _parse_sig_js(self, jscode):
          funcname = self._search_regex(
-            r'signature=([$a-zA-Z]+)', jscode,
+            r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
               'Initial JS player signature function name')
  
          jsi = JSInterpreter(jscode)
@@ -601,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              list_url = caption_url + '&' + list_params
              caption_list = self._download_xml(list_url, video_id)
              original_lang_node = caption_list.find('track')
-            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
+            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr':
                  self._downloader.report_warning('Video doesn\'t have automatic captions')
                  return {}
              original_lang = original_lang_node.attrib['lang_code']
@@ -634,6 +652,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
      def _extract_from_m3u8(self, manifest_url, video_id):
          url_map = {}
+
          def _get_urls(_manifest):
              lines = _manifest.split('\n')
              urls = filter(lambda l: l and not l.startswith('#'),
@@ -662,7 +681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          video_id = self.extract_id(url)
  
          # Get video webpage
-        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
          pref_cookies = [
              c for c in self._downloader.cookiejar
              if c.domain == '.youtube.com' and c.name == 'PREF']
@@ -685,7 +704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          # Get video info
          self.report_video_info_webpage_download(video_id)
          if re.search(r'player-age-gate-content">', video_webpage) is not None:
-            self.report_age_confirmation()
              age_gate = True
              # We simulate the access to the video from www.youtube.com/v/{video_id}
              # this can be viewed without login into Youtube
@@ -693,12 +711,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'video_id': video_id,
                  'eurl': 'https://youtube.googleapis.com/v/' + video_id,
                  'sts': self._search_regex(
-                    r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
+                    r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''),
              })
              video_info_url = proto + '://www.youtube.com/get_video_info?' + data
-            video_info_webpage = self._download_webpage(video_info_url, video_id,
-                                    note=False,
-                                    errnote='unable to download video info webpage')
+            video_info_webpage = self._download_webpage(
+                video_info_url, video_id,
+                note='Refetching age-gated info webpage',
+                errnote='unable to download video info webpage')
              video_info = compat_parse_qs(video_info_webpage)
          else:
              age_gate = False
@@ -833,7 +852,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          # annotations
          video_annotations = None
          if self._downloader.params.get('writeannotations', False):
-                video_annotations = self._extract_annotations(video_id)
+            video_annotations = self._extract_annotations(video_id)
  
          # Decide which formats to download
          try:
@@ -883,7 +902,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'player_url': player_url,
              }]
          elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
-            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
+            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
              if 'rtmpe%3Dyes' in encoded_url_map:
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
              url_map = {}
@@ -957,6 +976,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                      dash_manifest_url = video_info.get('dashmpd')[0]
                  else:
                      dash_manifest_url = ytplayer_config['args']['dashmpd']
+
                  def decrypt_sig(mobj):
                      s = mobj.group(1)
                      dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
@@ -992,7 +1012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                          existing_format.update(f)
  
              except (ExtractorError, KeyError) as e:
-                self.report_warning('Skipping DASH manifest: %s' % e, video_id)
+                self.report_warning('Skipping DASH manifest: %r' % e, video_id)
  
          self._sort_formats(formats)
  
@@ -1016,6 +1036,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              'formats':      formats,
          }
  
+
  class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com playlists'
      _VALID_URL = r"""(?x)(?:
@@ -1029,7 +1050,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                          )
                          (
                              (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
-                            # Top tracks, they can also include dots 
+                            # Top tracks, they can also include dots
                              |(?:MC)[\w\.]*
                          )
                          .*
@@ -1044,6 +1065,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
          'info_dict': {
              'title': 'ytdl test PL',
+            'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
          },
          'playlist_count': 3,
      }, {
@@ -1212,7 +1234,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
                  <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
              channel_page, 'list')
          url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-        
+
          video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
          ids = []
          # sometimes the webpage doesn't contain the videos
@@ -1280,7 +1302,7 @@ class YoutubeChannelIE(InfoExtractor):
  
                  ids_in_page = self.extract_videos_from_page(page['content_html'])
                  video_ids.extend(ids_in_page)
-    
+
                  if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                      break
  
@@ -1315,8 +1337,10 @@ class YoutubeUserIE(InfoExtractor):
          # Don't return True if the url can be extracted with other youtube
          # extractor, the regex would is too permissive and it would match.
          other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
-        if any(ie.suitable(url) for ie in other_ies): return False
-        else: return super(YoutubeUserIE, cls).suitable(url)
+        if any(ie.suitable(url) for ie in other_ies):
+            return False
+        else:
+            return super(YoutubeUserIE, cls).suitable(url)
  
      def _real_extract(self, url):
          # Extract username
@@ -1539,12 +1563,14 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
              paging = mobj.group('paging')
          return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
  
+
  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
      IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
      _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
      _FEED_NAME = 'recommended'
      _PLAYLIST_TITLE = 'Youtube Recommended videos'
  
+
  class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
      IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
      _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
@@ -1552,6 +1578,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
      _PLAYLIST_TITLE = 'Youtube Watch Later'
      _PERSONAL_FEED = True
  
+
  class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
      IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
      _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
@@ -1559,6 +1586,7 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
      _PERSONAL_FEED = True
      _PLAYLIST_TITLE = 'Youtube Watch History'
  
+
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = 'youtube:favorites'
      IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'