def test_soundcloud_not_matching_sets(self):
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
+ def test_tumblr(self):
+ self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
+ self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
+
if __name__ == '__main__':
unittest.main()
ImdbListIE,
KhanAcademyIE,
EveryonesMixtapeIE,
+ RutubeChannelIE,
)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
- result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
+ result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'sMjedvGDd8U')
- self.assertEqual(result['title'], 'Animated and Family Films')
- self.assertTrue(len(result['entries']) >= 48)
+ self.assertEqual(result['id'], 'JFs9NWw6XI0')
+ self.assertEqual(result['title'], 'March 23, 2012 Releases')
+ self.assertEqual(len(result['entries']), 7)
def test_khanacademy_topic(self):
dl = FakeYDL()
self.assertEqual(result['id'], 'm7m0jJAbMQi')
self.assertEqual(result['title'], 'Driving')
self.assertEqual(len(result['entries']), 24)
+
+ def test_rutube_channel(self):
+ dl = FakeYDL()
+ ie = RutubeChannelIE(dl)
+ result = ie.extract('http://rutube.ru/tags/video/1409')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], '1409')
+ self.assertTrue(len(result['entries']) >= 34)
if __name__ == '__main__':
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
-from .rutube import RutubeIE
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+)
from .servingsys import ServingSysIE
from .sina import SinaIE
from .slashdot import SlashdotIE
+# coding: utf-8
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
)
+
class ARDIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
- _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
- _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+ _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+
_TEST = {
- u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640',
- u'file': u'14077640.mp4',
- u'md5': u'6ca8824255460c787376353f9e20bbd8',
- u'info_dict': {
- u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
+ 'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
+ 'file': '19288786.mp4',
+ 'md5': '515bf47ce209fb3f5a61b7aad364634c',
+ 'info_dict': {
+ 'title': 'Edward Snowden im Interview - Held oder Verräter?',
+ 'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
+ 'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
},
- u'skip': u'Requires rtmpdump'
+ 'skip': 'Blocked outside of Germany',
}
def _real_extract(self, url):
else:
video_id = m.group('video_id')
- # determine title and media streams from webpage
- html = self._download_webpage(url, video_id)
- title = re.search(self._TITLE, html).group('title')
- streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
+ description = self._html_search_meta(
+ 'dcterms.abstract', webpage, 'description')
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ streams = [
+ mo.groupdict()
+ for mo in re.finditer(
+ r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
if not streams:
- assert '"fsk"' in html
- raise ExtractorError(u'This video is only available after 8:00 pm')
-
- # choose default media type and highest quality for now
- stream = max([s for s in streams if int(s["media_type"]) == 0],
- key=lambda s: int(s["quality"]))
-
- # there's two possibilities: RTMP stream or HTTP download
- info = {'id': video_id, 'title': title, 'ext': 'mp4'}
- if stream['rtmp_url']:
- self.to_screen(u'RTMP download detected')
- assert stream['video_url'].startswith('mp4:')
- info["url"] = stream["rtmp_url"]
- info["play_path"] = stream['video_url']
- else:
- assert stream["video_url"].endswith('.mp4')
- info["url"] = stream["video_url"]
- return [info]
+ if '"fsk"' in webpage:
+ raise ExtractorError('This video is only available after 20:00')
+
+ formats = []
+ for s in streams:
+ format = {
+ 'quality': int(s['quality']),
+ }
+ if s.get('rtmp_url'):
+ format['protocol'] = 'rtmp'
+ format['url'] = s['rtmp_url']
+ format['playpath'] = s['video_url']
+ else:
+ format['url'] = s['video_url']
+
+ quality_name = self._search_regex(
+ r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
+ 'quality name', default='NA')
+ format['format_id'] = '%s-%s-%s-%s' % (
+ determine_ext(format['url']), quality_name, s['media_type'],
+ s['quality'])
+
+ formats.append(format)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
- ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
- return OoyalaIE._build_url_result(ooyala_code)
+ ooyala_url = self._twitter_search_player(webpage)
+ return self.url_result(ooyala_url, OoyalaIE.ie_key())
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
- _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
_TESTS = [
{
'description': 'md5:363109c02998fee92ec02211bd8000df',
'uploader': 'National Ballet of Canada',
},
- },
+ }
]
@classmethod
"""Try to extract the brightcove url from the wepbage, returns None
if it can't be found
"""
+
+ url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+ if url_m:
+ return url_m.group(1)
+
m_brightcove = re.search(
r'''(?sx)<object
(?:
return self._extract_video_info(video_info)
def _get_playlist_info(self, player_key):
- playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
- player_key, 'Downloading playlist information')
+ info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
+ playlist_info = self._download_webpage(
+ info_url, player_key, 'Downloading playlist information')
json_data = json.loads(playlist_info)
if 'videoList' not in json_data:
-# encoding: utf-8
+from __future__ import unicode_literals
import re
The type of provided URL (video or playlist) is determined according to
meta Search.PageType from web page HTML rather than URL itself, as it is
- not always possible to do.
+ not always possible to do.
'''
- IE_DESC = u'Channel 9'
- IE_NAME = u'channel9'
+ IE_DESC = 'Channel 9'
+ IE_NAME = 'channel9'
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_TESTS = [
{
- u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
- u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
- u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
- u'info_dict': {
- u'title': u'Developer Kick-Off Session: Stuff We Love',
- u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
- u'duration': 4576,
- u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
- u'session_code': u'KOS002',
- u'session_day': u'Day 1',
- u'session_room': u'Arena 1A',
- u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
+ 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
+ 'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
+ 'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
+ 'info_dict': {
+ 'title': 'Developer Kick-Off Session: Stuff We Love',
+ 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
+ 'duration': 4576,
+ 'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
+ 'session_code': 'KOS002',
+ 'session_day': 'Day 1',
+ 'session_room': 'Arena 1A',
+ 'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
},
},
{
- u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
- u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
- u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
- u'info_dict': {
- u'title': u'Self-service BI with Power BI - nuclear testing',
- u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
- u'duration': 1540,
- u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
- u'authors': [ u'Mike Wilmot' ],
+ 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
+ 'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
+ 'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
+ 'info_dict': {
+ 'title': 'Self-service BI with Power BI - nuclear testing',
+ 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
+ 'duration': 1540,
+ 'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
+ 'authors': [ 'Mike Wilmot' ],
},
}
]
return 0
units = m.group('units')
try:
- exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
+ exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
except ValueError:
return 0
size = float(m.group('size'))
'url': x.group('url'),
'format_id': x.group('quality'),
'format_note': x.group('note'),
- 'format': u'%s (%s)' % (x.group('quality'), x.group('note')),
+ 'format': '%s (%s)' % (x.group('quality'), x.group('note')),
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
'preference': self._known_formats.index(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
return formats
def _extract_title(self, html):
- title = self._html_search_meta(u'title', html, u'title')
+ title = self._html_search_meta('title', html, 'title')
if title is None:
title = self._og_search_title(html)
- TITLE_SUFFIX = u' (Channel 9)'
+ TITLE_SUFFIX = ' (Channel 9)'
if title is not None and title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
return title
m = re.search(DESCRIPTION_REGEX, html)
if m is not None:
return m.group('description')
- return self._html_search_meta(u'description', html, u'description')
+ return self._html_search_meta('description', html, 'description')
def _extract_duration(self, html):
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
# Nothing to download
if len(formats) == 0 and slides is None and zip_ is None:
- self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
+ self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
return
# Extract meta
return contents
def _extract_list(self, content_path):
- rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
+ rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
entries = [self.url_result(session_url.text, 'Channel9')
for session_url in rss.findall('./channel/item/link')]
title_text = rss.find('./channel/title').text
mobj = re.match(self._VALID_URL, url)
content_path = mobj.group('contentpath')
- webpage = self._download_webpage(url, content_path, u'Downloading web page')
+ webpage = self._download_webpage(url, content_path, 'Downloading web page')
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
if page_type_m is None:
- raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
+ raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)
page_type = page_type_m.group('pagetype')
if page_type == 'List': # List page, may contain list of 'item'-like objects
elif page_type == 'Session': # Event session page, may contain downloadable content
return self._extract_session(webpage, content_path)
else:
- raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
\ No newline at end of file
+ raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)
\ No newline at end of file
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+ _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@staticmethod
def _transform_rtmp_url(rtmp_video_url):
- m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+ m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError('Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
}
return RATING_TABLE.get(rating.lower(), None)
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
def _sort_formats(self, formats):
if not formats:
raise ExtractorError(u'No video formats found')
# encoding: utf-8
+from __future__ import unicode_literals
+
import re, base64, zlib
from hashlib import sha1
from math import pow, sqrt, floor
)
class CrunchyrollIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
- u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
- u'file': u'645513.flv',
- #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
- u'info_dict': {
- u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
- u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
- u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
- u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
- u'upload_date': u'20131013',
+ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+ 'file': '645513.flv',
+ #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
+ 'info_dict': {
+ 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+ 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
+ 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+ 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
+ 'upload_date': '20131013',
},
- u'params': {
+ 'params': {
# rtmp
- u'skip_download': True,
+ 'skip_download': True,
},
}]
_FORMAT_IDS = {
- u'360': (u'60', u'106'),
- u'480': (u'61', u'106'),
- u'720': (u'62', u'106'),
- u'1080': (u'80', u'108'),
+ '360': ('60', '106'),
+ '480': ('61', '106'),
+ '720': ('62', '106'),
+ '1080': ('80', '108'),
}
def _decrypt_subtitles(self, data, iv, id):
num3 = key ^ num1
num4 = num3 ^ (num3 >> 3) ^ num2
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
- shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
+ shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
# Extend 160 Bit hash to 256 Bit
return shaHash + [0] * 12
def _convert_subtitles_to_srt(self, subtitles):
i=1
- output = u''
+ output = ''
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
- start = start.replace(u'.', u',')
- end = end.replace(u'.', u',')
+ start = start.replace('.', ',')
+ end = end.replace('.', ',')
text = clean_html(text)
- text = text.replace(u'\\N', u'\n')
+ text = text.replace('\\N', '\n')
if not text:
continue
- output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+ output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
i+=1
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+
+ if mobj.group('prefix') == 'm':
+ mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
+ webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
+ else:
+ webpage_url = 'http://www.' + mobj.group('url')
- webpage_url = u'http://www.' + mobj.group('url')
- video_id = mobj.group(u'video_id')
- webpage = self._download_webpage(webpage_url, video_id)
- note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
+ webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
+ note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
if note_m:
raise ExtractorError(note_m)
- video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
- video_title = re.sub(r' {2,}', u' ', video_title)
- video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
+ video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
+ video_title = re.sub(r' {2,}', ' ', video_title)
+ video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
if not video_description:
video_description = None
- video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
+ video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
- video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
+ video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
- playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
+ playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
- playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
- playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
- playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
+ playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
+ playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
- stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
- video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
+ stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
+ video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
- video_format = fmt+u'p'
- streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
+ video_format = fmt+'p'
+ streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
- streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
- streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
- streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
- streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
- video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
- video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
+ streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
+ streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
+ streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
+ video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
+ video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
formats.append({
- u'url': video_url,
- u'play_path': video_play_path,
- u'ext': 'flv',
- u'format': video_format,
- u'format_id': video_format,
+ 'url': video_url,
+ 'play_path': video_play_path,
+ 'ext': 'flv',
+ 'format': video_format,
+ 'format_id': video_format,
})
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
- sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
- video_id, note=u'Downloading subtitles for '+sub_name)
- id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
- iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
- data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
+ sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
+ video_id, note='Downloading subtitles for '+sub_name)
+ id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+ iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+ data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
- subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
- lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
+ subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+ lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
- u'id': video_id,
- u'title': video_title,
- u'description': video_description,
- u'thumbnail': video_thumbnail,
- u'uploader': video_uploader,
- u'upload_date': video_upload_date,
- u'subtitles': subtitles,
- u'formats': formats,
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ 'upload_date': video_upload_date,
+ 'subtitles': subtitles,
+ 'formats': formats,
}
# encoding: utf-8
+
+from __future__ import unicode_literals
+
import re
import json
class PluzzIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'pluzz.francetv.fr'
+ IE_NAME = 'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
# Can't use tests, videos expire in 7 days
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'francetvinfo.fr'
+ IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
_TEST = {
- u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
- u'file': u'84981923.mp4',
- u'info_dict': {
- u'title': u'Soir 3',
+ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+ 'file': '84981923.mp4',
+ 'info_dict': {
+ 'title': 'Soir 3',
},
- u'params': {
- u'skip_download': True,
+ 'params': {
+ 'skip_download': True,
},
}
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
- video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
+ video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
return self._extract_video(video_id)
class FranceTVIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'francetv'
- IE_DESC = u'France 2, 3, 4, 5 and Ô'
+ IE_NAME = 'francetv'
+ IE_DESC = 'France 2, 3, 4, 5 and Ô'
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
(?:
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
_TESTS = [
# france2
{
- u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
- u'file': u'75540104.mp4',
- u'info_dict': {
- u'title': u'13h15, le samedi...',
- u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+ 'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+ 'file': '75540104.mp4',
+ 'info_dict': {
+ 'title': '13h15, le samedi...',
+ 'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# france3
{
- u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
- u'info_dict': {
- u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
- u'ext': u'flv',
- u'title': u'Le scandale du prix des médicaments',
- u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
+ 'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
+ 'info_dict': {
+ 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
+ 'ext': 'flv',
+ 'title': 'Le scandale du prix des médicaments',
+ 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
},
- u'params': {
+ 'params': {
# rtmp download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# france4
{
- u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
- u'info_dict': {
- u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
- u'ext': u'flv',
- u'title': u'Hero Corp Making of - Extrait 1',
- u'description': u'md5:c87d54871b1790679aec1197e73d650a',
+ 'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+ 'info_dict': {
+ 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+ 'ext': 'flv',
+ 'title': 'Hero Corp Making of - Extrait 1',
+ 'description': 'md5:c87d54871b1790679aec1197e73d650a',
},
- u'params': {
+ 'params': {
# rtmp download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# france5
{
- u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
- u'info_dict': {
- u'id': u'92837968',
- u'ext': u'mp4',
- u'title': u'C à dire ?!',
- u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
+ 'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
+ 'info_dict': {
+ 'id': '92837968',
+ 'ext': 'mp4',
+ 'title': 'C à dire ?!',
+ 'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# franceo
{
- u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
- u'info_dict': {
- u'id': u'92327925',
- u'ext': u'mp4',
- u'title': u'Infô-Afrique',
- u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
+ 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
+ 'info_dict': {
+ 'id': '92327925',
+ 'ext': 'mp4',
+ 'title': 'Infô-Afrique',
+ 'description': 'md5:ebf346da789428841bee0fd2a935ea55',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
- u'skip': u'The id changes frequently',
+ 'skip': 'The id changes frequently',
},
]
'\.fr/\?id-video=([^"/&]+)'),
(r'<a class="video" id="ftv_player_(.+?)"'),
]
- video_id = self._html_search_regex(id_res, webpage, u'video ID')
+ video_id = self._html_search_regex(id_res, webpage, 'video ID')
else:
video_id = mobj.group('id')
return self._extract_video(video_id)
class GenerationQuoiIE(InfoExtractor):
- IE_NAME = u'france2.fr:generation-quoi'
+ IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
_TEST = {
- u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
- u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
- u'info_dict': {
- u'title': u'Génération Quoi - Garde à Vous',
- u'uploader': u'Génération Quoi',
+ 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
+ 'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
+ 'info_dict': {
+ 'title': 'Génération Quoi - Garde à Vous',
+ 'uploader': 'Génération Quoi',
},
- u'params': {
+ 'params': {
# It uses Dailymotion
- u'skip_download': True,
+ 'skip_download': True,
},
}
class CultureboxIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'culturebox.francetvinfo.fr'
+ IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
- u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
- u'info_dict': {
- u'id': u'EV_6785',
- u'ext': u'mp4',
- u'title': u'Einstein on the beach au Théâtre du Châtelet',
- u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
+ 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
+ 'info_dict': {
+ 'id': 'EV_6785',
+ 'ext': 'mp4',
+ 'title': 'Einstein on the beach au Théâtre du Châtelet',
+ 'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
}
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
- video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
+ video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
return self._extract_video(video_id)
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class FunnyOrDieIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
_TEST = {
- u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
- u'file': u'0732f586d7.mp4',
- u'md5': u'f647e9e90064b53b6e046e75d0241fbd',
- u'info_dict': {
- u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
- u"title": u"Heart-Shaped Box: Literal Video Version"
- }
+ 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
+ 'file': '0732f586d7.mp4',
+ 'md5': 'f647e9e90064b53b6e046e75d0241fbd',
+ 'info_dict': {
+ 'description': ('Lyrics changed to match the video. Spoken cameo '
+ 'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a '
+ 'concept by Dustin McLean (DustFilms.com). Performed, edited, '
+ 'and written by David A. Scott.'),
+ 'title': 'Heart-Shaped Box: Literal Video Version',
+ },
}
def _real_extract(self, url):
video_url = self._search_regex(
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
- webpage, u'video URL', flags=re.DOTALL)
+ webpage, 'video URL', flags=re.DOTALL)
- info = {
+ return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}
- return [info]
'title': 'R\u00e9gis plante sa Jeep',
}
},
- # embedded vimeo video
- {
- 'add_ie': ['Vimeo'],
- 'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
- 'file': '22444065.mp4',
- 'md5': '2903896e23df39722c33f015af0666e2',
- 'info_dict': {
- 'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
- 'uploader_id': 'skillsmatter',
- 'uploader': 'Skills Matter',
- }
- },
# bandcamp page with custom domain
{
'add_ie': ['Bandcamp'],
'skip_download': True,
},
},
+ {
+ # https://github.com/rg3/youtube-dl/issues/2253
+ 'url': 'http://bcove.me/i6nfkrc3',
+ 'file': '3101154703001.mp4',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ },
+ 'add_ie': ['Brightcove'],
+ },
# Direct link to a video
{
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
# Look for embedded (iframe) Vimeo player
mobj = re.search(
- r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
+ r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
if mobj:
player_url = unescapeHTML(mobj.group(1))
surl = smuggle_url(player_url, {'Referer': url})
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
- r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+ r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
return self.url_result(mobj.group(1), 'Vimeo')
return self.url_result(mobj.group(1), 'Aparat')
# Look for MPORA videos
- mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded Huffington Post player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'HuffPost')
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
# Look for gorilla-vid style embedding
- mobj = re.search(r'(?s)jw_plugins.*?file:\s*["\'](.*?)["\']', webpage)
+ mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
if mobj is None:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
list_id = mobj.group('id')
webpage = self._download_webpage(url, list_id)
- list_code = self._search_regex(
- r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"',
- webpage, 'list code')
entries = [
self.url_result('http://www.imdb.com' + m, 'Imdb')
- for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)]
+ for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
list_title = self._html_search_regex(
r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
+from __future__ import unicode_literals
+
import base64
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
-
- ExtractorError,
)
class InfoQIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
_TEST = {
- u"name": u"InfoQ",
- u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
- u"file": u"12-jan-pythonthings.mp4",
- u"info_dict": {
- u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
- u"title": u"A Few of My Favorite [Python] Things"
+ "name": "InfoQ",
+ "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
+ "file": "12-jan-pythonthings.mp4",
+ "info_dict": {
+ "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
+ "title": "A Few of My Favorite [Python] Things",
+ },
+ "params": {
+ "skip_download": True,
},
- u"params": {
- u"skip_download": True
- }
}
def _real_extract(self, url):
self.report_extraction(url)
# Extract video URL
- mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
- real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
+ encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
+ real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
# Extract title
video_title = self._search_regex(r'contentTitle = "(.*?)";',
- webpage, u'title')
+ webpage, 'title')
# Extract description
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
- webpage, u'description', fatal=False)
+ webpage, 'description', fatal=False)
video_filename = video_url.split('/')[-1]
video_id, extension = video_filename.split('.')
- info = {
+ return {
'id': video_id,
'url': video_url,
- 'uploader': None,
- 'upload_date': None,
'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
- 'thumbnail': None,
'description': video_description,
}
-
- return [info]
\ No newline at end of file
# encoding: utf-8
+from __future__ import unicode_literals
import re
import json
class IviIE(InfoExtractor):
- IE_DESC = u'ivi.ru'
- IE_NAME = u'ivi'
+ IE_DESC = 'ivi.ru'
+ IE_NAME = 'ivi'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_TESTS = [
# Single movie
{
- u'url': u'http://www.ivi.ru/watch/53141',
- u'file': u'53141.mp4',
- u'md5': u'6ff5be2254e796ed346251d117196cf4',
- u'info_dict': {
- u'title': u'Иван Васильевич меняет профессию',
- u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
- u'duration': 5498,
- u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
+ 'url': 'http://www.ivi.ru/watch/53141',
+ 'file': '53141.mp4',
+ 'md5': '6ff5be2254e796ed346251d117196cf4',
+ 'info_dict': {
+ 'title': 'Иван Васильевич меняет профессию',
+ 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
+ 'duration': 5498,
+ 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
},
- u'skip': u'Only works from Russia',
+ 'skip': 'Only works from Russia',
},
# Serial's serie
{
- u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
- u'file': u'74791.mp4',
- u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
- u'info_dict': {
- u'title': u'Дежурный ангел - 1 серия',
- u'duration': 2490,
- u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+ 'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
+ 'file': '74791.mp4',
+ 'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
+ 'info_dict': {
+ 'title': 'Дежурный ангел - 1 серия',
+ 'duration': 2490,
+ 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
},
- u'skip': u'Only works from Russia',
+ 'skip': 'Only works from Russia',
}
]
-
+
# Sorted by quality
_known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
return m.group('description') if m is not None else None
def _extract_comment_count(self, html):
- m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
+ m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
return int(m.group('commentcount')) if m is not None else 0
def _real_extract(self, url):
api_url = 'http://api.digitalaccess.ru/api/json/'
- data = {u'method': u'da.content.get',
- u'params': [video_id, {u'site': u's183',
- u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
- u'contentid': video_id
- }
- ]
+ data = {'method': 'da.content.get',
+ 'params': [video_id, {'site': 's183',
+ 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
+ 'contentid': video_id
+ }
+ ]
}
request = compat_urllib_request.Request(api_url, json.dumps(data))
- video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
+ video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
video_json = json.loads(video_json_page)
- if u'error' in video_json:
- error = video_json[u'error']
- if error[u'origin'] == u'NoRedisValidData':
- raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
- raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
+ if 'error' in video_json:
+ error = video_json['error']
+ if error['origin'] == 'NoRedisValidData':
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+ raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
- result = video_json[u'result']
+ result = video_json['result']
formats = [{
- 'url': x[u'url'],
- 'format_id': x[u'content_format'],
- 'preference': self._known_formats.index(x[u'content_format']),
- } for x in result[u'files'] if x[u'content_format'] in self._known_formats]
+ 'url': x['url'],
+ 'format_id': x['content_format'],
+ 'preference': self._known_formats.index(x['content_format']),
+ } for x in result['files'] if x['content_format'] in self._known_formats]
self._sort_formats(formats)
if not formats:
- raise ExtractorError(u'No media links available for %s' % video_id)
+ raise ExtractorError('No media links available for %s' % video_id)
- duration = result[u'duration']
- compilation = result[u'compilation']
- title = result[u'title']
+ duration = result['duration']
+ compilation = result['compilation']
+ title = result['title']
title = '%s - %s' % (compilation, title) if compilation is not None else title
- previews = result[u'preview']
+ previews = result['preview']
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
- thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
+ thumbnail = previews[-1]['url'] if len(previews) > 0 else None
- video_page = self._download_webpage(url, video_id, u'Downloading video page')
+ video_page = self._download_webpage(url, video_id, 'Downloading video page')
description = self._extract_description(video_page)
comment_count = self._extract_comment_count(video_page)
class IviCompilationIE(InfoExtractor):
- IE_DESC = u'ivi.ru compilations'
- IE_NAME = u'ivi:compilation'
+ IE_DESC = 'ivi.ru compilations'
+ IE_NAME = 'ivi:compilation'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
def _extract_entries(self, html, compilation_id):
season_id = mobj.group('seasonid')
if season_id is not None: # Season link
- season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
+ season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
playlist_id = '%s/season%s' % (compilation_id, season_id)
- playlist_title = self._html_search_meta(u'title', season_page, u'title')
+ playlist_title = self._html_search_meta('title', season_page, 'title')
entries = self._extract_entries(season_page, compilation_id)
else: # Compilation link
- compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
+ compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
playlist_id = compilation_id
- playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
+ playlist_title = self._html_search_meta('title', compilation_page, 'title')
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
if len(seasons) == 0: # No seasons in this compilation
entries = self._extract_entries(compilation_page, compilation_id)
else:
entries = []
for season_id in seasons:
- season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
- compilation_id, u'Downloading season %s web page' % season_id)
+ season_page = self._download_webpage(
+ 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+ compilation_id, 'Downloading season %s web page' % season_id)
entries.extend(self._extract_entries(season_page, compilation_id))
return self.playlist_result(entries, playlist_id, playlist_title)
\ No newline at end of file
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class KeekIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
- IE_NAME = u'keek'
+ IE_NAME = 'keek'
_TEST = {
- u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
- u'file': u'NODfbab.mp4',
- u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
- u'info_dict': {
- u"uploader": u"ytdl",
- u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
- }
+ 'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
+ 'file': 'NODfbab.mp4',
+ 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
+ 'info_dict': {
+ 'uploader': 'ytdl',
+ 'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
+ },
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
- video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
- thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
+ video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
+ thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
webpage = self._download_webpage(url, video_id)
- video_title = self._og_search_title(webpage)
-
- uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
- webpage, u'uploader', fatal=False)
-
- info = {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': video_title,
- 'thumbnail': thumbnail,
- 'uploader': uploader
+ uploader = self._html_search_regex(
+ r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
+ webpage, 'uploader', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': self._og_search_title(webpage),
+ 'thumbnail': thumbnail,
+ 'uploader': uploader
}
- return [info]
'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254,
- }
+ },
+ 'skip': 'Blocked in the US',
}
def _real_extract(self, url):
+from __future__ import unicode_literals
+
+import json
import re
from .common import InfoExtractor
class LiveLeakIE(InfoExtractor):
-
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
- IE_NAME = u'liveleak'
- _TEST = {
- u'url': u'http://www.liveleak.com/view?i=757_1364311680',
- u'file': u'757_1364311680.mp4',
- u'md5': u'0813c2430bea7a46bf13acf3406992f4',
- u'info_dict': {
- u"description": u"extremely bad day for this guy..!",
- u"uploader": u"ljfriel2",
- u"title": u"Most unlucky car accident"
+ _TESTS = [{
+ 'url': 'http://www.liveleak.com/view?i=757_1364311680',
+ 'file': '757_1364311680.mp4',
+ 'md5': '0813c2430bea7a46bf13acf3406992f4',
+ 'info_dict': {
+ 'description': 'extremely bad day for this guy..!',
+ 'uploader': 'ljfriel2',
+ 'title': 'Most unlucky car accident'
}
- }
+ },
+ {
+ 'url': 'http://www.liveleak.com/view?i=f93_1390833151',
+ 'file': 'f93_1390833151.mp4',
+ 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
+ 'info_dict': {
+ 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
+ 'uploader': 'ARD_Stinkt',
+ 'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
+ }
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id')
-
webpage = self._download_webpage(url, video_id)
+ sources_raw = self._search_regex(
+ r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
+ if sources_raw is None:
+ sources_raw = '[{ %s}]' % (
+ self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
- video_url = self._search_regex(r'file: "(.*?)",',
- webpage, u'video URL')
+ sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
+ sources = json.loads(sources_json)
- video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
+ formats = [{
+ 'format_note': s.get('label'),
+ 'url': s['file'],
+ } for s in sources]
+ self._sort_formats(formats)
+ video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
+ video_uploader = self._html_search_regex(
+ r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
- video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
- webpage, u'uploader', fatal=False)
-
- info = {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
+ return {
+ 'id': video_id,
'title': video_title,
'description': video_description,
- 'uploader': video_uploader
+ 'uploader': video_uploader,
+ 'formats': formats,
}
-
- return [info]
'info_dict': {
"title": "Bien dur",
"age_limit": 18,
- }
+ },
+ 'skip': 'This video has been deleted.'
}
def _real_extract(self, url):
+from __future__ import unicode_literals
import os.path
from .common import InfoExtractor
class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
- u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
- u'file': u'11741.mp4',
- u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
- u'info_dict': {
- u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
- u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
- }
+ 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+ 'file': '11741.mp4',
+ 'md5': '0b49f4844a068f8b33f4b7c88405862b',
+ 'info_dict': {
+ "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
+ "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
+ },
}
def _real_extract(self, url):
# extract values from metadata
url_flv_el = metadata.find('url_flv')
if url_flv_el is None:
- raise ExtractorError(u'Unable to extract download url')
+ raise ExtractorError('Unable to extract download url')
video_url = url_flv_el.text
- extension = os.path.splitext(video_url)[1][1:]
title_el = metadata.find('title')
if title_el is None:
- raise ExtractorError(u'Unable to extract title')
+ raise ExtractorError('Unable to extract title')
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
thumbnail = imagePreview_el.text
else:
thumbnail = None
- info = {
+
+ return {
'id': video_id,
'url': video_url,
'title': title,
- 'ext': extension,
'format': format,
'thumbnail': thumbnail,
- 'description': description
+ 'description': description,
}
- return [info]
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
class NBAIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
- u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
- u'file': u'0021200253-okc-bkn-recap.nba.mp4',
- u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
- u'info_dict': {
- u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
- u"title": u"Thunder vs. Nets"
- }
+ 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
+ 'file': u'0021200253-okc-bkn-recap.nba.mp4',
+ 'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+ 'info_dict': {
+ 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+ 'title': 'Thunder vs. Nets',
+ },
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
- video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+ video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
- # It isn't there in the HTML it returns to us
- # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
-
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
- info = {
+ return {
'id': shortened_video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
- # 'uploader_date': uploader_date,
'description': description,
}
- return [info]
+from __future__ import unicode_literals
+
import json
import re
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
_TEST = {
- u"url": u"http://9gag.tv/v/1912",
- u"file": u"1912.mp4",
- u"info_dict": {
- u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
- u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
+ "url": "http://9gag.tv/v/1912",
+ "file": "1912.mp4",
+ "info_dict": {
+ "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
+ "title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
},
- u'add_ie': [u'Youtube']
+ 'add_ie': ['Youtube']
}
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(r'''(?x)
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
- data-video-meta="([^"]+)"''', webpage, u'video metadata')
+ data-video-meta="([^"]+)"''', webpage, 'video metadata')
data = json.loads(data_json)
from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
- _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
+ _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class PornHdIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
_TEST = {
- u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
- u'file': u'1962.flv',
- u'md5': u'35272469887dca97abd30abecc6cdf75',
- u'info_dict': {
- u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
- u"age_limit": 18,
+ 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+ 'file': '1962.flv',
+ 'md5': '35272469887dca97abd30abecc6cdf75',
+ 'info_dict': {
+ "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
+ "age_limit": 18,
}
}
webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_regex(
- r'&hd=(http.+?)&', webpage, u'video URL')
- video_url = compat_urllib_parse.unquote(video_url)
+ next_url = self._html_search_regex(
+ r'&hd=(http.+?)&', webpage, 'video URL')
+ next_url = compat_urllib_parse.unquote(next_url)
+
+ video_url = self._download_webpage(
+ next_url, video_id, note='Retrieving video URL',
+ errnote='Could not retrieve video URL')
age_limit = 18
return {
+# encoding: utf-8
+from __future__ import unicode_literals
+
import json
import re
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
_TEST = {
- u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
- u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
- u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
- u'info_dict': {
- u"uploader_id": u"ford-lopatin",
- u"location": u"Spain",
- u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
- u"uploader": u"Ford & Lopatin",
- u"title": u"Live at Primavera Sound 2011"
- }
+ 'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
+ 'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
+ 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
+ 'info_dict': {
+ "uploader_id": "ford-lopatin",
+ "location": "Spain",
+ "description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
+ "uploader": "Ford & Lopatin",
+ "title": "Live at Primavera Sound 2011",
+ },
}
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
- webpage, u'json data', flags=re.MULTILINE)
+ webpage, 'json data', flags=re.MULTILINE)
try:
data = json.loads(json_data)
except ValueError as e:
- raise ExtractorError(u'Invalid JSON: ' + str(e))
+ raise ExtractorError('Invalid JSON: ' + str(e))
video_url = data['akamai_url'] + '&cbr=256'
url_parts = compat_urllib_parse_urlparse(video_url)
- video_ext = url_parts.path.rpartition('.')[2]
- info = {
- 'id': video_id,
- 'url': video_url,
- 'ext': video_ext,
- 'title': data['title'],
- 'description': data.get('teaser_text'),
- 'location': data.get('country_of_origin'),
- 'uploader': data.get('host', {}).get('name'),
- 'uploader_id': data.get('host', {}).get('slug'),
- 'thumbnail': data.get('image', {}).get('large_url_2x'),
- 'duration': data.get('duration'),
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': data['title'],
+ 'description': data.get('teaser_text'),
+ 'location': data.get('country_of_origin'),
+ 'uploader': data.get('host', {}).get('name'),
+ 'uploader_id': data.get('host', {}).get('slug'),
+ 'thumbnail': data.get('image', {}).get('large_url_2x'),
+ 'duration': data.get('duration'),
}
- return [info]
# encoding: utf-8
+from __future__ import unicode_literals
+
import re
import json
+import itertools
from .common import InfoExtractor
from ..utils import (
- compat_urlparse,
compat_str,
+ unified_strdate,
ExtractorError,
)
class RutubeIE(InfoExtractor):
- _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
+ IE_NAME = 'rutube'
+ IE_DESC = 'Rutube videos'
+ _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
_TEST = {
- u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
- u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
- u'info_dict': {
- u'title': u'Раненный кенгуру забежал в аптеку',
- u'uploader': u'NTDRussian',
- u'uploader_id': u'29790',
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
+ 'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
+ 'info_dict': {
+ 'title': 'Раненный кенгуру забежал в аптеку',
+ 'description': 'http://www.ntdtv.ru ',
+ 'duration': 80,
+ 'uploader': 'NTDRussian',
+ 'uploader_id': '29790',
+ 'upload_date': '20131016',
},
- u'params': {
+ 'params': {
# It requires ffmpeg (m3u8 download)
- u'skip_download': True,
+ 'skip_download': True,
},
}
- def _get_api_response(self, short_id, subpath):
- api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
- response_json = self._download_webpage(api_url, short_id,
- u'Downloading %s json' % subpath)
- return json.loads(response_json)
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- long_id = mobj.group('long_id')
- webpage = self._download_webpage(url, long_id)
- og_video = self._og_search_video_url(webpage)
- short_id = compat_urlparse.urlparse(og_video).path[1:]
- options = self._get_api_response(short_id, 'options')
- trackinfo = self._get_api_response(short_id, 'trackinfo')
+ video_id = mobj.group('id')
+
+ api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
+ video_id, 'Downloading video JSON')
+ video = json.loads(api_response)
+
+ api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+ video_id, 'Downloading trackinfo JSON')
+ trackinfo = json.loads(api_response)
+
# Some videos don't have the author field
author = trackinfo.get('author') or {}
m3u8_url = trackinfo['video_balancer'].get('m3u8')
if m3u8_url is None:
- raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
+ raise ExtractorError('Couldn\'t find m3u8 manifest url')
return {
- 'id': trackinfo['id'],
- 'title': trackinfo['title'],
+ 'id': video['id'],
+ 'title': video['title'],
+ 'description': video['description'],
+ 'duration': video['duration'],
+ 'view_count': video['hits'],
'url': m3u8_url,
'ext': 'mp4',
- 'thumbnail': options['thumbnail_url'],
+ 'thumbnail': video['thumbnail_url'],
'uploader': author.get('name'),
'uploader_id': compat_str(author['id']) if author else None,
+ 'upload_date': unified_strdate(video['created_ts']),
+ 'age_limit': 18 if video['is_adult'] else 0,
}
+
+
+class RutubeChannelIE(InfoExtractor):
+ IE_NAME = 'rutube:channel'
+ IE_DESC = 'Rutube channels'
+ _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
+
+ def _extract_videos(self, channel_id, channel_title=None):
+ entries = []
+ for pagenum in itertools.count(1):
+ api_response = self._download_webpage(
+ self._PAGE_TEMPLATE % (channel_id, pagenum),
+ channel_id, 'Downloading page %s' % pagenum)
+ page = json.loads(api_response)
+ results = page['results']
+ if not results:
+ break
+ entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
+ if not page['has_next']:
+ break
+ return self.playlist_result(entries, channel_id, channel_title)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ channel_id = mobj.group('id')
+ return self._extract_videos(channel_id)
+
+
+class RutubeMovieIE(RutubeChannelIE):
+ IE_NAME = 'rutube:movie'
+ IE_DESC = 'Rutube movies'
+ _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+
+ _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ movie_id = mobj.group('id')
+ api_response = self._download_webpage(
+ self._MOVIE_TEMPLATE % movie_id, movie_id,
+ 'Downloading movie JSON')
+ movie = json.loads(api_response)
+ movie_name = movie['name']
+ return self._extract_videos(movie_id, movie_name)
+
+
+class RutubePersonIE(RutubeChannelIE):
+ IE_NAME = 'rutube:person'
+ IE_DESC = 'Rutube person videos'
+ _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
# encoding: utf-8
+from __future__ import unicode_literals
import os.path
import re
class SmotriIE(InfoExtractor):
- IE_DESC = u'Smotri.com'
- IE_NAME = u'smotri'
+ IE_DESC = 'Smotri.com'
+ IE_NAME = 'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
_TESTS = [
# real video id 2610366
{
- u'url': u'http://smotri.com/video/view/?id=v261036632ab',
- u'file': u'v261036632ab.mp4',
- u'md5': u'2a7b08249e6f5636557579c368040eb9',
- u'info_dict': {
- u'title': u'катастрофа с камер видеонаблюдения',
- u'uploader': u'rbc2008',
- u'uploader_id': u'rbc08',
- u'upload_date': u'20131118',
- u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
- u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
+ 'url': 'http://smotri.com/video/view/?id=v261036632ab',
+ 'file': 'v261036632ab.mp4',
+ 'md5': '2a7b08249e6f5636557579c368040eb9',
+ 'info_dict': {
+ 'title': 'катастрофа с камер видеонаблюдения',
+ 'uploader': 'rbc2008',
+ 'uploader_id': 'rbc08',
+ 'upload_date': '20131118',
+ 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
+ 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
# real video id 57591
{
- u'url': u'http://smotri.com/video/view/?id=v57591cb20',
- u'file': u'v57591cb20.flv',
- u'md5': u'830266dfc21f077eac5afd1883091bcd',
- u'info_dict': {
- u'title': u'test',
- u'uploader': u'Support Photofile@photofile',
- u'uploader_id': u'support-photofile',
- u'upload_date': u'20070704',
- u'description': u'test, видео test',
- u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
+ 'url': 'http://smotri.com/video/view/?id=v57591cb20',
+ 'file': 'v57591cb20.flv',
+ 'md5': '830266dfc21f077eac5afd1883091bcd',
+ 'info_dict': {
+ 'title': 'test',
+ 'uploader': 'Support Photofile@photofile',
+ 'uploader_id': 'support-photofile',
+ 'upload_date': '20070704',
+ 'description': 'test, видео test',
+ 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
# video-password
{
- u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
- u'file': u'v1390466a13c.mp4',
- u'md5': u'f6331cef33cad65a0815ee482a54440b',
- u'info_dict': {
- u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
- u'uploader': u'timoxa40',
- u'uploader_id': u'timoxa40',
- u'upload_date': u'20100404',
- u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
- u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+ 'url': 'http://smotri.com/video/view/?id=v1390466a13c',
+ 'file': 'v1390466a13c.mp4',
+ 'md5': 'f6331cef33cad65a0815ee482a54440b',
+ 'info_dict': {
+ 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+ 'uploader': 'timoxa40',
+ 'uploader_id': 'timoxa40',
+ 'upload_date': '20100404',
+ 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
+ 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
- u'params': {
- u'videopassword': u'qwerty',
+ 'params': {
+ 'videopassword': 'qwerty',
},
},
# age limit + video-password
{
- u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
- u'file': u'v15408898bcf.flv',
- u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
- u'info_dict': {
- u'title': u'этот ролик не покажут по ТВ',
- u'uploader': u'zzxxx',
- u'uploader_id': u'ueggb',
- u'upload_date': u'20101001',
- u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
- u'age_limit': 18,
- u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
+ 'url': 'http://smotri.com/video/view/?id=v15408898bcf',
+ 'file': 'v15408898bcf.flv',
+ 'md5': '91e909c9f0521adf5ee86fbe073aad70',
+ 'info_dict': {
+ 'title': 'этот ролик не покажут по ТВ',
+ 'uploader': 'zzxxx',
+ 'uploader_id': 'ueggb',
+ 'upload_date': '20101001',
+ 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
+ 'age_limit': 18,
+ 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
- u'params': {
- u'videopassword': u'333'
+ 'params': {
+ 'videopassword': '333'
}
}
]
-
+
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
- video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
+ video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
video_json = json.loads(video_json_page)
-
+
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
- raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
- elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+ elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
- raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
- video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
+ video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
- raise ExtractorError(u'Video password is invalid', expected=True)
-
+ raise ExtractorError('Video password is invalid', expected=True)
+
if status != self._SUCCESS:
- raise ExtractorError(u'Unexpected status value %s' % status)
-
+ raise ExtractorError('Unexpected status value %s' % status)
+
# Extract the URL of the video
video_url = video_json['file_data']
-
+
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
- video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
+ video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
r'<div class="videoUnModer">(.*?)</div>', video_page,
- u'warning message', default=None)
+ 'warning message', default=None)
if warning is not None:
self._downloader.report_warning(
- u'Video %s may not be available; smotri said: %s ' %
+ 'Video %s may not be available; smotri said: %s ' %
(video_id, warning))
# Adult content
- if re.search(u'EroConfirmText">', video_page) is not None:
+ if re.search('EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
- video_page, u'confirm string')
+ video_page, 'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
- video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
+ video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
-
+
# Extract the rest of meta data
- video_title = self._search_meta(u'name', video_page, u'title')
+ video_title = self._search_meta('name', video_page, 'title')
if not video_title:
video_title = os.path.splitext(url_basename(video_url))[0]
- video_description = self._search_meta(u'description', video_page)
- END_TEXT = u' на сайте Smotri.com'
+ video_description = self._search_meta('description', video_page)
+ END_TEXT = ' на сайте Smotri.com'
if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
- START_TEXT = u'Смотреть онлайн ролик '
+ START_TEXT = 'Смотреть онлайн ролик '
if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
- video_thumbnail = self._search_meta(u'thumbnail', video_page)
+ video_thumbnail = self._search_meta('thumbnail', video_page)
- upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
+ upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
if upload_date_str:
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
)
else:
video_upload_date = None
-
- duration_str = self._search_meta(u'duration', video_page)
+
+ duration_str = self._search_meta('duration', video_page)
if duration_str:
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
)
else:
video_duration = None
-
+
video_uploader = self._html_search_regex(
- u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
- video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
+ '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
+ video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
+
video_uploader_id = self._html_search_regex(
- u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
- video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
+ '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
+ video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
+
video_view_count = self._html_search_regex(
- u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
- video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
+ 'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
+ video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
+
return {
'id': video_id,
'url': video_url,
class SmotriCommunityIE(InfoExtractor):
- IE_DESC = u'Smotri.com community videos'
- IE_NAME = u'smotri:community'
+ IE_DESC = 'Smotri.com community videos'
+ IE_NAME = 'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
community_id = mobj.group('communityid')
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
- rss = self._download_xml(url, community_id, u'Downloading community RSS')
+ rss = self._download_xml(url, community_id, 'Downloading community RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
community_title = self._html_search_regex(
- u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
+ '^Видео сообщества "([^"]+)"$', description_text, 'community title')
return self.playlist_result(entries, community_id, community_title)
class SmotriUserIE(InfoExtractor):
- IE_DESC = u'Smotri.com user videos'
- IE_NAME = u'smotri:user'
+ IE_DESC = 'Smotri.com user videos'
+ IE_NAME = 'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
user_id = mobj.group('userid')
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
- rss = self._download_xml(url, user_id, u'Downloading user RSS')
+ rss = self._download_xml(url, user_id, 'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
user_nickname = self._html_search_regex(
- u'^Видео режиссера (.*)$', description_text,
- u'user nickname')
+ '^Видео режиссера (.*)$', description_text,
+ 'user nickname')
return self.playlist_result(entries, user_id, user_nickname)
class SmotriBroadcastIE(InfoExtractor):
- IE_DESC = u'Smotri.com broadcasts'
- IE_NAME = u'smotri:broadcast'
+ IE_DESC = 'Smotri.com broadcasts'
+ IE_NAME = 'smotri:broadcast'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
def _real_extract(self, url):
broadcast_id = mobj.group('broadcastid')
broadcast_url = 'http://' + mobj.group('url')
- broadcast_page = self._download_webpage(broadcast_url, broadcast_id, u'Downloading broadcast page')
+ broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
- if re.search(u'>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
- raise ExtractorError(u'Broadcast %s does not exist' % broadcast_id, expected=True)
+ if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
+ raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
# Adult content
- if re.search(u'EroConfirmText">', broadcast_page) is not None:
+ if re.search('EroConfirmText">', broadcast_page) is not None:
(username, password) = self._get_login_info()
if username is None:
- raise ExtractorError(u'Erotic broadcasts allowed only for registered users, '
- u'use --username and --password options to provide account credentials.', expected=True)
-
- # Log in
- login_form_strs = {
- u'login-hint53': '1',
- u'confirm_erotic': '1',
- u'login': username,
- u'password': password,
+ raise ExtractorError('Erotic broadcasts allowed only for registered users, '
+ 'use --username and --password options to provide account credentials.', expected=True)
+
+ login_form = {
+ 'login-hint53': '1',
+ 'confirm_erotic': '1',
+ 'login': username,
+ 'password': password,
}
- # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
- # chokes on unicode
- login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
- login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
- login_url = broadcast_url + '/?no_redirect=1'
- request = compat_urllib_request.Request(login_url, login_data)
+
+ request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- broadcast_page = self._download_webpage(
- request, broadcast_id, note=u'Logging in and confirming age')
+ broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
- if re.search(u'>Неверный логин или пароль<', broadcast_page) is not None:
- raise ExtractorError(u'Unable to log in: bad username or password', expected=True)
+ if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
+ raise ExtractorError('Unable to log in: bad username or password', expected=True)
adult_content = True
else:
adult_content = False
ticket = self._html_search_regex(
- u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
- broadcast_page, u'broadcast ticket')
+ 'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+ broadcast_page, 'broadcast ticket')
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
if broadcast_password:
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
- broadcast_json_page = self._download_webpage(url, broadcast_id, u'Downloading broadcast JSON')
+ broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
try:
broadcast_json = json.loads(broadcast_json_page)
protected_broadcast = broadcast_json['_pass_protected'] == 1
if protected_broadcast and not broadcast_password:
- raise ExtractorError(u'This broadcast is protected by a password, use the --video-password option', expected=True)
+ raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
broadcast_offline = broadcast_json['is_play'] == 0
if broadcast_offline:
- raise ExtractorError(u'Broadcast %s is offline' % broadcast_id, expected=True)
+ raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
rtmp_url = broadcast_json['_server']
if not rtmp_url.startswith('rtmp://'):
- raise ExtractorError(u'Unexpected broadcast rtmp URL')
+ raise ExtractorError('Unexpected broadcast rtmp URL')
broadcast_playpath = broadcast_json['_streamName']
broadcast_thumbnail = broadcast_json['_imgURL']
rtmp_conn = 'S:%s' % uuid.uuid4().hex
except KeyError:
if protected_broadcast:
- raise ExtractorError(u'Bad broadcast password', expected=True)
- raise ExtractorError(u'Unexpected broadcast JSON')
+ raise ExtractorError('Bad broadcast password', expected=True)
+ raise ExtractorError('Unexpected broadcast JSON')
return {
'id': broadcast_id,
class TumblrIE(InfoExtractor):
- _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
+ _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
_TEST = {
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
'file': '54196191430.mp4',
+from __future__ import unicode_literals
import base64
import re
compat_parse_qs,
)
+
class TutvIE(InfoExtractor):
- _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
- u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
- u'file': u'2742556.flv',
- u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
- u'info_dict': {
- u"title": u"Noah en pabellon cuahutemoc"
- }
+ 'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
+ 'file': '2742556.flv',
+ 'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+ 'info_dict': {
+ 'title': 'Noah en pabellon cuahutemoc',
+ },
}
def _real_extract(self, url):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
+ internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
- data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
- data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
+ data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
+ data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
data = compat_parse_qs(data_content)
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
- ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
- info = {
+ return {
'id': internal_id,
'url': video_url,
- 'ext': ext,
'title': self._og_search_title(webpage),
}
- return [info]
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class YouJizzIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
_TEST = {
- u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- u'file': u'2189178.flv',
- u'md5': u'07e15fa469ba384c7693fd246905547c',
- u'info_dict': {
- u"title": u"Zeichentrick 1",
- u"age_limit": 18,
+ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+ 'file': '2189178.flv',
+ 'md5': '07e15fa469ba384c7693fd246905547c',
+ 'info_dict': {
+ "title": "Zeichentrick 1",
+ "age_limit": 18,
}
}
# Get the video title
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
- webpage, u'title').strip()
+ webpage, 'title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
- raise ExtractorError(u'ERROR: unable to extract embed page')
+ raise ExtractorError('ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
if m_playlist is not None:
playlist_url = m_playlist.group('playlist')
playlist_page = self._download_webpage(playlist_url, video_id,
- u'Downloading playlist page')
+ 'Downloading playlist page')
m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
if len(m_levels) == 0:
- raise ExtractorError(u'Unable to extract video url')
+ raise ExtractorError('Unable to extract video url')
videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
(_, video_url) = sorted(videos)[0]
video_url = video_url.replace('%252F', '%2F')
else:
video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
- webpage, u'video URL')
-
- info = {'id': video_id,
- 'url': video_url,
- 'title': video_title,
- 'ext': 'flv',
- 'format': 'flv',
- 'player_url': embed_page_url,
- 'age_limit': age_limit}
+ webpage, 'video URL')
- return [info]
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'ext': 'flv',
+ 'format': 'flv',
+ 'player_url': embed_page_url,
+ 'age_limit': age_limit,
+ }
'_type': 'url',
'url': video_id,
'ie_key': 'Youtube',
- 'id': 'video_id',
+ 'id': video_id,
'title': title,
}
url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
-__version__ = '2014.01.27.1'
+__version__ = '2014.01.30.1'