# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
parse_resolution,
try_get,
unified_timestamp,
+ url_or_none,
urljoin,
)
class PeerTubeIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:
+ _INSTANCES_RE = r'''(?:
# Taken from https://instances.joinpeertube.org/instances
tube\.openalgeria\.org|
peertube\.pointsecu\.fr|
peertube2\.cpy\.re|
videos\.tcit\.fr|
peertube\.cpy\.re
- )
- /videos/watch/(?P<id>[^/?#&]+)
- '''
+ )'''
+ _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _VALID_URL = r'''(?x)
+ (?:
+ peertube:(?P<host>[^:]+):|
+ https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
+ )
+ (?P<id>%s)
+ ''' % (_INSTANCES_RE, _UUID_RE)
_TESTS = [{
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
'md5': '80f24ff364cc9d333529506a263e7feb',
# nsfw
'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
'only_matching': True,
+ }, {
+ 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+ 'only_matching': True,
+ }, {
+ 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+ 'only_matching': True,
}]
+ @staticmethod
+ def _extract_peertube_url(webpage, source_url):
+ mobj = re.match(
+ r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
+ % PeerTubeIE._UUID_RE, source_url)
+ if mobj and any(p in webpage for p in (
+ '<title>PeerTube<',
+ 'There will be other non JS-based clients to access PeerTube',
+ '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
+ return 'peertube:%s:%s' % mobj.group('host', 'id')
+
+ @staticmethod
+ def _extract_urls(webpage, source_url):
+ entries = re.findall(
+ r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
+ % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
+ if not entries:
+ peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
+ if peertube_url:
+ entries = [peertube_url]
+ return entries
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host') or mobj.group('host_2')
+ video_id = mobj.group('id')
video = self._download_json(
- urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
+ 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
title = video['name']
for file_ in video['files']:
if not isinstance(file_, dict):
continue
- file_url = file_.get('fileUrl')
- if not file_url or not isinstance(file_url, compat_str):
+ file_url = url_or_none(file_.get('fileUrl'))
+ if not file_url:
continue
file_size = int_or_none(file_.get('size'))
format_id = try_get(