Fix W504 and disable W503 (closes #20863)

[youtube-dl.git] / youtube_dl / extractor / peertube.py
diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py

index b086f6f5a806047731da5ccdf08ece752738271b..e03c3d1d3d61ec2fd981776fba2775464b9658d1 100644 (file)
--- a/youtube_dl/extractor/peertube.py
+++ b/youtube_dl/extractor/peertube.py
@@ -1,6 +1,8 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..compat import compat_str
  from ..utils import (
@@ -8,14 +10,13 @@ from ..utils import (
      parse_resolution,
      try_get,
      unified_timestamp,
+    url_or_none,
      urljoin,
  )
  
  
  class PeerTubeIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
+    _INSTANCES_RE = r'''(?:
                              # Taken from https://instances.joinpeertube.org/instances
                              tube\.openalgeria\.org|
                              peertube\.pointsecu\.fr|
@@ -115,9 +116,15 @@ class PeerTubeIE(InfoExtractor):
                              peertube2\.cpy\.re|
                              videos\.tcit\.fr|
                              peertube\.cpy\.re
-                        )
-                        /videos/watch/(?P<id>[^/?#&]+)
-                    '''
+                        )'''
+    _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        peertube:(?P<host>[^:]+):|
+                        https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
+                    )
+                    (?P<id>%s)
+                    ''' % (_INSTANCES_RE, _UUID_RE)
      _TESTS = [{
          'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
          'md5': '80f24ff364cc9d333529506a263e7feb',
@@ -147,13 +154,46 @@ class PeerTubeIE(InfoExtractor):
          # nsfw
          'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
          'only_matching': True,
+    }, {
+        'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+        'only_matching': True,
+    }, {
+        'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+        'only_matching': True,
+    }, {
+        'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+        'only_matching': True,
      }]
  
+    @staticmethod
+    def _extract_peertube_url(webpage, source_url):
+        mobj = re.match(
+            r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
+            % PeerTubeIE._UUID_RE, source_url)
+        if mobj and any(p in webpage for p in (
+                '<title>PeerTube<',
+                'There will be other non JS-based clients to access PeerTube',
+                '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
+            return 'peertube:%s:%s' % mobj.group('host', 'id')
+
+    @staticmethod
+    def _extract_urls(webpage, source_url):
+        entries = re.findall(
+            r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
+            % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
+        if not entries:
+            peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
+            if peertube_url:
+                entries = [peertube_url]
+        return entries
+
      def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host') or mobj.group('host_2')
+        video_id = mobj.group('id')
  
          video = self._download_json(
-            urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
+            'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
  
          title = video['name']
  
@@ -161,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
          for file_ in video['files']:
              if not isinstance(file_, dict):
                  continue
-            file_url = file_.get('fileUrl')
-            if not file_url or not isinstance(file_url, compat_str):
+            file_url = url_or_none(file_.get('fileUrl'))
+            if not file_url:
                  continue
              file_size = int_or_none(file_.get('size'))
              format_id = try_get(