Matthew Rayfield
t0mm0
Tithen-Firion
+Zack Fernandes
+cryptonaut
+Adrian Kretz
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
clean:
- rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
+ rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json
cleanall: clean
rm -f youtube-dl youtube-dl.exe
this is not possible instead of searching.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
- /youtube-dl.conf: do not read the user
- configuration in ~/.config/youtube-dl.conf
- (%APPDATA%/youtube-dl/config.txt on
- Windows)
+ /youtube-dl.conf: Do not read the user
+ configuration in ~/.config/youtube-
+ dl/config (%APPDATA%/youtube-dl/config.txt
+ on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.
intlist_to_bytes,
args_to_str,
parse_filesize,
+ version_tuple,
)
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
+ self.assertEqual(
+ unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
+ '20141126')
def test_find_xpath_attr(self):
testxml = '''<root>
self.assertEqual(parse_duration('0s'), 0)
self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
self.assertEqual(parse_duration('T30M38S'), 1838)
+ self.assertEqual(parse_duration('5 s'), 5)
+ self.assertEqual(parse_duration('3 min'), 180)
+ self.assertEqual(parse_duration('2.5 hours'), 9000)
def test_fix_xml_ampersands(self):
self.assertEqual(
self.assertEqual(parse_filesize('2 MiB'), 2097152)
self.assertEqual(parse_filesize('5 GB'), 5000000000)
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+ self.assertEqual(parse_filesize('1,24 KB'), 1240)
+
+ def test_version_tuple(self):
+ self.assertEqual(version_tuple('1'), (1,))
+ self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
+ self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
if __name__ == '__main__':
unittest.main()
import datetime
import errno
import io
+import itertools
import json
import locale
import os
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
- def make_result(embedded_info):
- new_result = ie_result.copy()
- for f in ('_type', 'url', 'ext', 'player_url', 'formats',
- 'entries', 'ie_key', 'duration',
- 'subtitles', 'annotations', 'format',
- 'thumbnail', 'thumbnails'):
- if f in new_result:
- del new_result[f]
- if f in embedded_info:
- new_result[f] = embedded_info[f]
- return new_result
- new_result = make_result(info)
+ new_result = ie_result.copy()
+ for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
+ 'entries', 'ie_key', 'duration',
+ 'subtitles', 'annotations', 'format',
+ 'thumbnail', 'thumbnails'):
+ if f in new_result:
+ del new_result[f]
+ if f in info:
+ new_result[f] = info[f]
assert new_result.get('_type') != 'url_transparent'
- if new_result.get('_type') == 'compat_list':
- new_result['entries'] = [
- make_result(e) for e in new_result['entries']]
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
if playlistend == -1:
playlistend = None
- if isinstance(ie_result['entries'], list):
- n_all_entries = len(ie_result['entries'])
- entries = ie_result['entries'][playliststart:playlistend]
+ ie_entries = ie_result['entries']
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
- else:
- assert isinstance(ie_result['entries'], PagedList)
- entries = ie_result['entries'].getslice(
+ elif isinstance(ie_entries, PagedList):
+ entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
+ else: # iterable
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ self.to_screen(
+ "[%s] playlist %s: Downloading %d videos" %
+ (ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
if self.params.get('forceid', False):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
from __future__ import unicode_literals
+import ctypes
import getpass
import optparse
import os
+import platform
import re
import subprocess
import sys
userhome = compat_getenv('HOME')
elif 'USERPROFILE' in os.environ:
userhome = compat_getenv('USERPROFILE')
- elif not 'HOMEPATH' in os.environ:
+ elif 'HOMEPATH' not in os.environ:
return path
else:
try:
optparse.OptionGroup.add_option = _compat_add_option
+if platform.python_implementation() == 'PyPy':
+ # PyPy expects byte strings as Windows function names
+ # https://github.com/rg3/youtube-dl/pull/4392
+ def compat_WINFUNCTYPE(*args, **kwargs):
+ real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+ def resf(tpl, *args, **kwargs):
+ funcname, dll = tpl
+ return real((str(funcname), dll), *args, **kwargs)
+
+ return resf
+else:
+ def compat_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
__all__ = [
'compat_HTTPError',
'compat_chr',
'compat_urllib_request',
'compat_urlparse',
'compat_urlretrieve',
+ 'compat_WINFUNCTYPE',
'compat_xml_parse_error',
'shlex_quote',
'subprocess_check_output',
import re
import subprocess
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
from .common import FileDownloader
from ..utils import (
compat_urlparse,
return False
cmd = [program] + args
+ ffpp = FFmpegPostProcessor(downloader=self)
+ ffpp.check_version()
+
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
)
from .audiomack import AudiomackIE
from .auengine import AUEngineIE
+from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
+from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
-from .cinemassacre import CinemassacreIE
+from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
from .flickr import FlickrIE
from .folketinget import FolketingetIE
from .fourtube import FourTubeIE
+from .foxgay import FoxgayIE
+from .foxnews import FoxNewsIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
+from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
+from .myvidster import MyVidsterIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
+from .radiode import RadioDeIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
+from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
from .servingsys import ServingSysIE
from .sexu import SexuIE
from .sexykarma import SexyKarmaIE
YoutubeUserIE,
YoutubeWatchLaterIE,
)
-from .zdf import ZDFIE
+from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import (
ZingMp3SongIE,
ZingMp3AlbumIE,
from __future__ import unicode_literals
import re
+import json
from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+)
class AdultSwimIE(InfoExtractor):
- _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
- _TEST = {
- 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
+ _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
+
+ _TESTS = [{
+ 'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
'playlist': [
{
- 'md5': '4da359ec73b58df4575cd01a610ba5dc',
- 'info_dict': {
- 'id': '8a250ba1450996e901453d7f02ca02f5',
- 'ext': 'flv',
- 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
- 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
- 'uploader': 'Rick and Morty',
- 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
- }
- },
- {
- 'md5': 'ffbdf55af9331c509d95350bd0cc1819',
+ 'md5': '247572debc75c7652f253c8daa51a14d',
'info_dict': {
- 'id': '8a250ba1450996e901453d7f4bd102f6',
+ 'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
'ext': 'flv',
- 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
- 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
- 'uploader': 'Rick and Morty',
- 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
- }
+ 'title': 'Rick and Morty - Pilot Part 1',
+ 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+ },
},
{
- 'md5': 'b92409635540304280b4b6c36bd14a0a',
+ 'md5': '77b0e037a4b20ec6b98671c4c379f48d',
'info_dict': {
- 'id': '8a250ba1450996e901453d7fa73c02f7',
+ 'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
'ext': 'flv',
- 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
- 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
- 'uploader': 'Rick and Morty',
- 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
- }
+ 'title': 'Rick and Morty - Pilot Part 4',
+ 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+ },
},
+ ],
+ 'info_dict': {
+ 'title': 'Rick and Morty - Pilot',
+ 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+ }
+ }, {
+ 'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
+ 'playlist': [
{
- 'md5': 'e8818891d60e47b29cd89d7b0278156d',
+ 'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
'info_dict': {
- 'id': '8a250ba1450996e901453d7fc8ba02f8',
+ 'id': '-t8CamQlQ2aYZ49ItZCFog-0',
'ext': 'flv',
- 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
- 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
- 'uploader': 'Rick and Morty',
- 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
- }
+ 'title': 'American Dad - Putting Francine Out of Business',
+ 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+ },
}
- ]
- }
-
- _video_extensions = {
- '3500': 'flv',
- '640': 'mp4',
- '150': 'mp4',
- 'ipad': 'm3u8',
- 'iphone': 'm3u8'
- }
- _video_dimensions = {
- '3500': (1280, 720),
- '640': (480, 270),
- '150': (320, 180)
- }
+ ],
+ 'info_dict': {
+ 'title': 'American Dad - Putting Francine Out of Business',
+ 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+ },
+ }]
+
+ @staticmethod
+ def find_video_info(collection, slug):
+ for video in collection.get('videos'):
+ if video.get('slug') == slug:
+ return video
+
+ @staticmethod
+ def find_collection_by_linkURL(collections, linkURL):
+ for collection in collections:
+ if collection.get('linkURL') == linkURL:
+ return collection
+
+ @staticmethod
+ def find_collection_containing_video(collections, slug):
+ for collection in collections:
+ for video in collection.get('videos'):
+ if video.get('slug') == slug:
+ return collection, video
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_path = mobj.group('path')
-
- webpage = self._download_webpage(url, video_path)
- episode_id = self._html_search_regex(
- r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
- webpage, 'episode_id')
- title = self._og_search_title(webpage)
-
- index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
- idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
-
- episode_el = idoc.find('.//episode')
- show_title = episode_el.attrib.get('collectionTitle')
- episode_title = episode_el.attrib.get('title')
- thumbnail = episode_el.attrib.get('thumbnailUrl')
- description = episode_el.find('./description').text.strip()
+ show_path = mobj.group('show_path')
+ episode_path = mobj.group('episode_path')
+ is_playlist = True if mobj.group('is_playlist') else False
+
+ webpage = self._download_webpage(url, episode_path)
+
+ # Extract the value of `bootstrappedData` from the Javascript in the page.
+ bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
+
+ try:
+ bootstrappedData = json.loads(bootstrappedDataJS)
+ except ValueError as ve:
+ errmsg = '%s: Failed to parse JSON ' % episode_path
+ raise ExtractorError(errmsg, cause=ve)
+
+ # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
+ # NOTE: We are only downloading one video (the current one) not the playlist
+ if is_playlist:
+ collections = bootstrappedData['playlists']['collections']
+ collection = self.find_collection_by_linkURL(collections, show_path)
+ video_info = self.find_video_info(collection, episode_path)
+
+ show_title = video_info['showTitle']
+ segment_ids = [video_info['videoPlaybackID']]
+ else:
+ collections = bootstrappedData['show']['collections']
+ collection, video_info = self.find_collection_containing_video(collections, episode_path)
+
+ show = bootstrappedData['show']
+ show_title = show['title']
+ segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
+
+ episode_id = video_info['id']
+ episode_title = video_info['title']
+ episode_description = video_info['description']
+ episode_duration = video_info.get('duration')
entries = []
- segment_els = episode_el.findall('./segments/segment')
+ for part_num, segment_id in enumerate(segment_ids):
+ segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
- for part_num, segment_el in enumerate(segment_els):
- segment_id = segment_el.attrib.get('id')
- segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
- thumbnail = segment_el.attrib.get('thumbnailUrl')
- duration = segment_el.attrib.get('duration')
+ segment_title = '%s - %s' % (show_title, episode_title)
+ if len(segment_ids) > 1:
+ segment_title += ' Part %d' % (part_num + 1)
- segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
idoc = self._download_xml(
segment_url, segment_title,
'Downloading segment information', 'Unable to download segment information')
+ segment_duration = idoc.find('.//trt').text.strip()
+
formats = []
file_els = idoc.findall('.//files/file')
for file_el in file_els:
bitrate = file_el.attrib.get('bitrate')
- type = file_el.attrib.get('type')
- width, height = self._video_dimensions.get(bitrate, (None, None))
+ ftype = file_el.attrib.get('type')
+
formats.append({
- 'format_id': '%s-%s' % (bitrate, type),
- 'url': file_el.text,
- 'ext': self._video_extensions.get(bitrate, 'mp4'),
+ 'format_id': '%s_%s' % (bitrate, ftype),
+ 'url': file_el.text.strip(),
# The bitrate may not be a number (for example: 'iphone')
'tbr': int(bitrate) if bitrate.isdigit() else None,
- 'height': height,
- 'width': width
+ 'quality': 1 if ftype == 'hd' else -1
})
self._sort_formats(formats)
'id': segment_id,
'title': segment_title,
'formats': formats,
- 'uploader': show_title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'description': description
+ 'duration': segment_duration,
+ 'description': episode_description
})
return {
'_type': 'playlist',
'id': episode_id,
- 'display_id': video_path,
+ 'display_id': episode_path,
'entries': entries,
- 'title': '%s %s' % (show_title, episode_title),
- 'description': description,
- 'thumbnail': thumbnail
+ 'title': '%s - %s' % (show_title, episode_title),
+ 'description': episode_description,
+ 'duration': episode_duration
}
{
'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
- 'file': '172419696.mp3',
- 'info_dict':
- {
+ 'info_dict': {
+ 'id': '172419696',
+ 'ext': 'mp3',
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
'title': 'Young Thug ft Lil Wayne - Take Kare',
- 'uploader':'Young Thug World',
- 'upload_date':'20141016',
+ 'uploader': 'Young Thug World',
+ 'upload_date': '20141016',
}
},
]
--- /dev/null
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class AzubuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
+ 'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
+ 'info_dict': {
+ 'id': '15575',
+ 'ext': 'mp4',
+ 'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
+ 'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
+ 'thumbnail': 're:^https?://.*\.jpe?g',
+ 'timestamp': 1417523507.334,
+ 'upload_date': '20141202',
+ 'duration': 9988.7,
+ 'uploader': 'GSL',
+ 'uploader_id': 414310,
+ 'view_count': int,
+ },
+ },
+ {
+ 'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
+ 'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
+ 'info_dict': {
+ 'id': '9344',
+ 'ext': 'mp4',
+ 'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
+ 'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
+ 'thumbnail': 're:^https?://.*\.jpe?g',
+ 'timestamp': 1410530893.320,
+ 'upload_date': '20140912',
+ 'duration': 172.385,
+ 'uploader': 'FnaticTV',
+ 'uploader_id': 272749,
+ 'view_count': int,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
+
+ title = data['title'].strip()
+ description = data['description']
+ thumbnail = data['thumbnail']
+ view_count = data['view_count']
+ uploader = data['user']['username']
+ uploader_id = data['user']['id']
+
+ stream_params = json.loads(data['stream_params'])
+
+ timestamp = float_or_none(stream_params['creationDate'], 1000)
+ duration = float_or_none(stream_params['length'], 1000)
+
+ renditions = stream_params.get('renditions') or []
+ video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
+ if video:
+ renditions.append(video)
+
+ formats = [{
+ 'url': fmt['url'],
+ 'width': fmt['frameWidth'],
+ 'height': fmt['frameHeight'],
+ 'vbr': float_or_none(fmt['encodingRate'], 1000),
+ 'filesize': fmt['size'],
+ 'vcodec': fmt['videoCodec'],
+ 'container': fmt['videoContainer'],
+ } for fmt in renditions if fmt['url']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
from __future__ import unicode_literals
-import re
import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor
class BehindKinkIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
_TEST = {
- 'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
- 'md5': '41ad01222b8442089a55528fec43ec01',
+ 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+ 'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
'info_dict': {
- 'id': '36370',
+ 'id': '37127',
'ext': 'mp4',
- 'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
- 'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
- 'upload_date': '20140814',
- 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+ 'title': 'What are you passionate about – Marley Blaze',
+ 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+ 'upload_date': '20141205',
+ 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
- year = mobj.group('year')
- month = mobj.group('month')
- day = mobj.group('day')
- upload_date = year + month + day
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
- r"'file':\s*'([^']+)'",
- webpage, 'URL base')
-
- video_id = url_basename(video_url)
- video_id = video_id.split('_')[0]
+ r'<source src="([^"]+)"', webpage, 'video URL')
+ video_id = url_basename(video_url).split('_')[0]
+ upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
return {
'id': video_id,
+ 'display_id': display_id,
'url': video_url,
- 'ext': 'mp4',
'title': self._og_search_title(webpage),
- 'display_id': display_id,
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
'upload_date': upload_date,
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ xpath_text,
+ xpath_with_ns,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class BetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
+ _TESTS = [
+ {
+ 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
+ 'info_dict': {
+ 'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
+ 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
+ 'ext': 'flv',
+ 'title': 'BET News Presents: A Conversation With President Obama',
+ 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
+ 'duration': 1534,
+ 'timestamp': 1418075340,
+ 'upload_date': '20141208',
+ 'uploader': 'admin',
+ 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
+ 'info_dict': {
+ 'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
+ 'display_id': 'justice-for-ferguson-a-community-reacts',
+ 'ext': 'flv',
+ 'title': 'Justice for Ferguson: A Community Reacts',
+ 'description': 'A BET News special.',
+ 'duration': 1696,
+ 'timestamp': 1416942360,
+ 'upload_date': '20141125',
+ 'uploader': 'admin',
+ 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ media_url = compat_urllib_parse.unquote(self._search_regex(
+ [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
+ webpage, 'media URL'))
+
+ mrss = self._download_xml(media_url, display_id)
+
+ item = mrss.find('./channel/item')
+
+ NS_MAP = {
+ 'dc': 'http://purl.org/dc/elements/1.1/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'ka': 'http://kickapps.com/karss',
+ }
+
+ title = xpath_text(item, './title', 'title')
+ description = xpath_text(
+ item, './description', 'description', fatal=False)
+
+ video_id = xpath_text(item, './guid', 'video id', fatal=False)
+
+ timestamp = parse_iso8601(xpath_text(
+ item, xpath_with_ns('./dc:date', NS_MAP),
+ 'upload date', fatal=False))
+ uploader = xpath_text(
+ item, xpath_with_ns('./dc:creator', NS_MAP),
+ 'uploader', fatal=False)
+
+ media_content = item.find(
+ xpath_with_ns('./media:content', NS_MAP))
+ duration = int_or_none(media_content.get('duration'))
+ smil_url = media_content.get('url')
+
+ thumbnail = media_content.find(
+ xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
+
+ formats = self._extract_smil_formats(smil_url, display_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'formats': formats,
+ }
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
+
+from ..compat import (
+ compat_str,
compat_urllib_request,
- unescapeHTML,
- parse_iso8601,
compat_urlparse,
+)
+from ..utils import (
clean_html,
- compat_str,
+ int_or_none,
+ parse_iso8601,
+ unescapeHTML,
)
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
- }
+ },
+ {
+ # https://github.com/rg3/youtube-dl/pull/4404
+ 'note': 'Audio only',
+ 'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
+ 'md5': '76c0a56f24e769ceaab21fbb6416a351',
+ 'info_dict': {
+ 'id': '7103299',
+ 'ext': 'flv',
+ 'title': 'Weekly Manga Recap: Kingdom',
+ 'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?',
+ 'timestamp': 1417660321,
+ 'upload_date': '20141204',
+ 'uploader': 'The Rollo T',
+ 'uploader_id': '407429',
+ 'duration': 7251,
+ 'vcodec': 'none',
+ }
+ },
]
def _real_extract(self, url):
'url': real_url,
'format_id': role,
'format_note': media_type,
- 'vcodec': media_content.get(blip('vcodec')),
+ 'vcodec': media_content.get(blip('vcodec')) or 'none',
'acodec': media_content.get(blip('acodec')),
'filesize': media_content.get('filesize'),
- 'width': int(media_content.get('width')),
- 'height': int(media_content.get('height')),
+ 'width': int_or_none(media_content.get('width')),
+ 'height': int_or_none(media_content.get('height')),
})
self._sort_formats(formats)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ xpath_text,
+)
+
+
+class CinchcastIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
+ _TEST = {
+ # Actual test is run in generic, look for undergroundwellness
+ 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ doc = self._download_xml(
+ 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
+ video_id)
+
+ item = doc.find('.//item')
+ title = xpath_text(item, './title', fatal=True)
+ date_str = xpath_text(
+ item, './{http://developer.longtailvideo.com/trac/}date')
+ upload_date = unified_strdate(date_str, day_first=False)
+ # duration is present but wrong
+ formats = []
+ formats.append({
+ 'format_id': 'main',
+ 'url': item.find(
+ './{http://search.yahoo.com/mrss/}content').attrib['url'],
+ })
+ backup_url = xpath_text(
+ item, './{http://developer.longtailvideo.com/trac/}backupContent')
+ if backup_url:
+ formats.append({
+ 'preference': 2, # seems to be more reliable
+ 'format_id': 'backup',
+ 'url': backup_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
The following fields are optional:
+ alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
* "resolution" (optional, string "{width}x{height"},
deprecated)
thumbnail: Full URL to a video thumbnail image.
- description: One-line video description.
+ description: Full video description.
uploader: Full name of the video uploader.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
_type "playlist" indicates multiple videos.
- There must be a key "entries", which is a list or a PagedList object, each
- element of which is a valid dictionary under this specfication.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above).
_type "url" indicates that the video must be extracted from another
location, possibly by a different extractor. Its only required key is:
"url" - the next URL to extract.
-
- Additionally, it may have properties believed to be identical to the
- resolved entity, for example "title" if the title of the referred video is
+ The key "ie_key" can be set to the class name (minus the trailing "IE",
+ e.g. "Youtube") if the extractor class is known in advance.
+ Additionally, the dictionary may have any properties of the resolved entity
+ known in advance, for example "title" if the title of the referred video is
known ahead of time.
return video_info
@staticmethod
- def playlist_result(entries, playlist_id=None, playlist_title=None):
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
video_info['id'] = playlist_id
if playlist_title:
video_info['title'] = playlist_title
+ if playlist_description:
+ video_info['description'] = playlist_description
return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
self._sort_formats(formats)
return formats
+ # TODO: improve extraction
+ def _extract_smil_formats(self, smil_url, video_id):
+ smil = self._download_xml(
+ smil_url, video_id, 'Downloading SMIL file',
+ 'Unable to download SMIL file')
+
+ base = smil.find('./head/meta').get('base')
+
+ formats = []
+ rtmp_count = 0
+ for video in smil.findall('./body/switch/video'):
+ src = video.get('src')
+ if not src:
+ continue
+ bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ width = int_or_none(video.get('width'))
+ height = int_or_none(video.get('height'))
+ proto = video.get('proto')
+ if not proto:
+ if base:
+ if base.startswith('rtmp'):
+ proto = 'rtmp'
+ elif base.startswith('http'):
+ proto = 'http'
+ ext = video.get('ext')
+ if proto == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+ elif proto == 'rtmp':
+ rtmp_count += 1
+ streamer = video.get('streamer') or base
+ formats.append({
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ return formats
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
return res
def _set_cookie(self, domain, name, value, expire_time=None):
- cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+ cookie = compat_cookiejar.Cookie(
+ 0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie)
compat_urllib_request,
)
from ..utils import (
- urlencode_postdata,
ExtractorError,
+ int_or_none,
limit_length,
+ urlencode_postdata,
)
'info_dict': {
'id': '637842556329505',
'ext': 'mp4',
- 'duration': 38,
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
}
}, {
self._login()
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
webpage = self._download_webpage(url, video_id)
'id': video_id,
'title': video_title,
'url': video_url,
- 'duration': int(video_data['video_duration']),
- 'thumbnail': video_data['thumbnail_src'],
+ 'duration': int_or_none(video_data.get('video_duration')),
+ 'thumbnail': video_data.get('thumbnail_src'),
}
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FoxgayIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
+ _TEST = {
+ 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
+ 'md5': '80d72beab5d04e1655a56ad37afe6841',
+ 'info_dict': {
+ 'id': '2582',
+ 'ext': 'mp4',
+ 'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
+ 'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
+ 'age_limit': 18,
+ 'thumbnail': 're:https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>(?P<title>.*?)</title>',
+ webpage, 'title', fatal=False)
+ description = self._html_search_regex(
+ r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
+ webpage, 'description', fatal=False)
+
+ # Find the URL for the iFrame which contains the actual video.
+ iframe = self._download_webpage(
+ self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
+ video_id)
+ video_url = self._html_search_regex(
+ r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
+ thumb_url = self._html_search_regex(
+ r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'description': description,
+ 'thumbnail': thumb_url,
+ 'age_limit': 18,
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ int_or_none,
+)
+
+
+class FoxNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
+ 'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
+ 'info_dict': {
+ 'id': '3937480',
+ 'ext': 'flv',
+ 'title': 'Frozen in Time',
+ 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+ 'duration': 265,
+ 'timestamp': 1304411491,
+ 'upload_date': '20110503',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
+ 'md5': '5846c64a1ea05ec78175421b8323e2df',
+ 'info_dict': {
+ 'id': '3922535568001',
+ 'ext': 'mp4',
+ 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
+ 'description': "Congressman discusses the president's executive action",
+ 'duration': 292,
+ 'timestamp': 1417662047,
+ 'upload_date': '20141204',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
+
+ item = video['channel']['item']
+ title = item['title']
+ description = item['description']
+ timestamp = parse_iso8601(item['dc-date'])
+
+ media_group = item['media-group']
+ duration = None
+ formats = []
+ for media in media_group['media-content']:
+ attributes = media['@attributes']
+ video_url = attributes['url']
+ if video_url.endswith('.f4m'):
+ formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
+ elif video_url.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
+ elif not video_url.endswith('.smil'):
+ duration = int_or_none(attributes.get('duration'))
+ formats.append({
+ 'url': video_url,
+ 'format_id': media['media-category']['@attributes']['label'],
+ 'preference': 1,
+ 'vbr': int_or_none(attributes.get('bitrate')),
+ 'filesize': int_or_none(attributes.get('fileSize'))
+ })
+ self._sort_formats(formats)
+
+ media_thumbnail = media_group['media-thumbnail']['@attributes']
+ thumbnails = [{
+ 'url': media_thumbnail['url'],
+ 'width': int_or_none(media_thumbnail.get('width')),
+ 'height': int_or_none(media_thumbnail.get('height')),
+ }] if media_thumbnail else []
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
]
- }
-
+ },
+ # Cinchcast embed
+ {
+ 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+ 'info_dict': {
+ 'id': '7141703',
+ 'ext': 'mp3',
+ 'upload_date': '20141126',
+ 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+ }
+ },
]
def report_following_redirect(self, new_url):
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
+ # Look for embedded Cinchcast player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Cinchcast')
+
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
webpage)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_filesize,
+)
+
+
+class MinhatecaIE(InfoExtractor):
+ _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
+ _TEST = {
+ 'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
+ 'info_dict': {
+ 'id': '125848331',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'filesize_approx': 1530000,
+ 'duration': 9,
+ 'view_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ token = self._html_search_regex(
+ r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
+ webpage, 'request token')
+ token_data = [
+ ('fileId', video_id),
+ ('__RequestVerificationToken', token),
+ ]
+ req = compat_urllib_request.Request(
+ 'http://minhateca.com.br/action/License/Download',
+ data=compat_urllib_parse.urlencode(token_data))
+ req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ data = self._download_json(
+ req, video_id, note='Downloading metadata')
+
+ video_url = data['redirectUrl']
+ title_str = self._html_search_regex(
+ r'<h1.*?>(.*?)</h1>', webpage, 'title')
+ title, _, ext = title_str.rpartition('.')
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'<p class="fileSize">(.*?)</p>',
+ webpage, 'file size approximation', fatal=False))
+ duration = parse_duration(self._html_search_regex(
+ r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
+ webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'<p class="downloadsCounter">([0-9]+)</p>',
+ webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'ext': ext,
+ 'filesize_approx': filesize_approx,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
raise ExtractorError('Unable to extract track url')
PREFIX = (
- r'<div class="cloudcast-play-button-container[^"]*?"'
+ r'<span class="play-button[^"]*?"'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
self.report_warning(
'%s: No downloadable song on this page' % video_id)
return
+
def search_data(name):
return self._search_regex(
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MyVidsterIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
+
+ _TEST = {
+ 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
+ 'md5': '95296d0231c1363222c3441af62dc4ca',
+ 'info_dict': {
+ 'id': '3685814',
+ 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
+ 'upload_date': '20141027',
+ 'uploader_id': 'utkualp',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ },
+ 'add_ie': ['XHamster'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return self.url_result(self._html_search_regex(
+ r'rel="videolink" href="(?P<real_url>.*)">',
+ webpage, 'real video url'))
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
remove_end,
class NBAIE(InfoExtractor):
- _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
- _TEST = {
+ _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
+ _TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': {
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
},
- }
+ }, {
+ 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
description = self._og_search_description(webpage)
duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration', fatal=False))
+ self._html_search_meta('duration', webpage, 'duration'))
return {
'id': shortened_video_id,
import re
import json
+import os
from .common import InfoExtractor
from ..compat import (
initial_video_url = info['publishPoint']
if info['formats'] == '1':
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
- path = parsed_url.path.replace('.', '_sd.', 1)
+ filename, ext = os.path.splitext(parsed_url.path)
+ path = '%s_sd%s' % (filename, ext)
data = compat_urllib_parse.urlencode({
'type': 'fvod',
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
'rtmp_conn': 'B:1',
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
'page_url': 'http://www.ntv.ru',
- 'flash_ver': 'LNX 11,2,202,341',
+ 'flash_version': 'LNX 11,2,202,341',
'rtmp_live': True,
'ext': 'flv',
'filesize': int(size.text),
int_or_none,
js_to_json,
qualities,
- determine_ext,
)
thumbnail = self._search_regex(
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
- quality = qualities(['SD', 'HD'])
- formats = [{
- 'url': source['file'],
- 'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
- 'quality': quality(source['label']),
- } for source in json.loads(js_to_json(self._search_regex(
- r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
+ quality = qualities(['sd', 'hd'])
+ sources = json.loads(js_to_json(self._search_regex(
+ r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+ formats = []
+ for container, s in sources.items():
+ for qname, video_url in s.items():
+ formats.append({
+ 'url': video_url,
+ 'container': container,
+ 'format_id': '%s-%s' % (container, qname),
+ 'quality': quality(qname),
+ })
self._sort_formats(formats)
return {
'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
- 'upload_date': '20140225',
+ 'upload_date': '20140203',
'duration': 522.56,
},
'params': {
'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
- 'upload_date': '20140225',
+ 'upload_date': '20141014',
'duration': 2410.44,
},
'params': {
'skip_download': True,
},
},
+ {
+ 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+ 'info_dict': {
+ 'id': '439664',
+ 'title': 'Episode 8 - Ganze Folge - Playlist',
+ 'description': 'md5:63b8963e71f481782aeea877658dec84',
+ },
+ 'playlist_count': 2,
+ },
]
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
+ r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
]
+ _PAGE_TYPE_REGEXES = [
+ r'<meta name="page_type" content="([^"]+)">',
+ r"'itemType'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_ID_REGEXES = [
+ r'content[iI]d=(\d+)',
+ r"'itemId'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_CLIP_REGEXES = [
+ r'(?s)data-qvt=.+?<a href="([^"]+)"',
+ ]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
+ def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'testclient'
'duration': duration,
'formats': formats,
}
+
+ def _extract_playlist(self, url, webpage):
+ playlist_id = self._html_search_regex(
+ self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
+ for regex in self._PLAYLIST_CLIP_REGEXES:
+ playlist_clips = re.findall(regex, webpage)
+ if playlist_clips:
+ title = self._html_search_regex(
+ self._TITLE_REGEXES, webpage, 'title')
+ description = self._html_search_regex(
+ self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+ entries = [
+ self.url_result(
+ re.match('(.+?//.+?)/', url).group(1) + clip_path,
+ 'ProSiebenSat1')
+ for clip_path in playlist_clips]
+ return self.playlist_result(entries, playlist_id, title, description)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ page_type = self._search_regex(
+ self._PAGE_TYPE_REGEXES, webpage,
+ 'page type', default='clip').lower()
+ if page_type == 'clip':
+ return self._extract_clip(url, webpage)
+ elif page_type == 'playlist':
+ return self._extract_playlist(url, webpage)
--- /dev/null
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+
+
+class RadioDeIE(InfoExtractor):
+ IE_NAME = 'radio.de'
+ _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
+ _TEST = {
+ 'url': 'http://ndr2.radio.de/',
+ 'md5': '3b4cdd011bc59174596b6145cda474a4',
+ 'info_dict': {
+ 'id': 'ndr2',
+ 'ext': 'mp3',
+ 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:591c49c702db1a33751625ebfb67f273',
+ 'thumbnail': 're:^https?://.*\.png',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ def _real_extract(self, url):
+ radio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, radio_id)
+
+ broadcast = json.loads(self._search_regex(
+ r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
+ webpage, 'broadcast'))
+
+ title = self._live_title(broadcast['name'])
+ description = broadcast.get('description') or broadcast.get('shortDescription')
+ thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
+
+ formats = [{
+ 'url': stream['streamUrl'],
+ 'ext': stream['streamContentFormat'].lower(),
+ 'acodec': stream['streamContentFormat'],
+ 'abr': stream['bitRate'],
+ 'asr': stream['sampleRate']
+ } for stream in broadcast['streamUrls']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': radio_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'is_live': True,
+ 'formats': formats,
+ }
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
int_or_none,
+ unified_strdate,
)
-class CinemassacreIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
- _TESTS = [
- {
- 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
- 'md5': 'fde81fbafaee331785f58cd6c0d46190',
- 'info_dict': {
- 'id': '19911',
- 'ext': 'mp4',
- 'upload_date': '20121110',
- 'title': '“Angry Video Game Nerd: The Movie” – Trailer',
- 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
- },
- },
- {
- 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
- 'md5': 'd72f10cd39eac4215048f62ab477a511',
- 'info_dict': {
- 'id': '521be8ef82b16',
- 'ext': 'mp4',
- 'upload_date': '20131002',
- 'title': 'The Mummy’s Hand (1940)',
- },
- }
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
- video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
- mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
- if not mobj:
- raise ExtractorError('Can\'t extract embed url and video id')
- playerdata_url = mobj.group('embed_url')
- video_id = mobj.group('video_id')
- full_video_id = mobj.group('full_video_id')
+class ScreenwaveMediaIE(InfoExtractor):
+ _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
- video_title = self._html_search_regex(
- r'<title>(?P<title>.+?)\|', webpage, 'title')
- video_description = self._html_search_regex(
- r'<div class="entry-content">(?P<description>.+?)</div>',
- webpage, 'description', flags=re.DOTALL, fatal=False)
- video_thumbnail = self._og_search_thumbnail(webpage)
+ _TESTS = [{
+ 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
+ 'only_matching': True,
+ }]
- playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
+ vidtitle = self._search_regex(
+ r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
vidurl = self._search_regex(
- r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+ r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
videolist_url = None
if mobj:
videoserver = mobj.group('videoserver')
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
- vidid = mobj.group('vidid') if mobj else full_video_id
+ vidid = mobj.group('vidid') if mobj else video_id
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
else:
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
file_ = src.partition(':')[-1]
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
- bitrate = int_or_none(video.get('system-bitrate'))
+ bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
format = {
'url': baseurl + file_,
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
}
if width or height:
format.update({
- 'tbr': bitrate // 1000 if bitrate else None,
+ 'tbr': bitrate,
'width': width,
'height': height,
})
else:
format.update({
- 'abr': bitrate // 1000 if bitrate else None,
+ 'abr': bitrate,
'vcodec': 'none',
})
formats.append(format)
- self._sort_formats(formats)
else:
formats = [{
'url': vidurl,
}]
+ self._sort_formats(formats)
return {
'id': video_id,
- 'title': video_title,
+ 'title': vidtitle,
'formats': formats,
+ }
+
+
+class CinemassacreIE(InfoExtractor):
+ _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+ _TESTS = [
+ {
+ 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+ 'md5': 'fde81fbafaee331785f58cd6c0d46190',
+ 'info_dict': {
+ 'id': 'Cinemassacre-19911',
+ 'ext': 'mp4',
+ 'upload_date': '20121110',
+ 'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+ 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+ },
+ },
+ {
+ 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+ 'md5': 'd72f10cd39eac4215048f62ab477a511',
+ 'info_dict': {
+ 'id': 'Cinemassacre-521be8ef82b16',
+ 'ext': 'mp4',
+ 'upload_date': '20131002',
+ 'title': 'The Mummy’s Hand (1940)',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('display_id')
+ video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playerdata_url = self._search_regex(
+ r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+ webpage, 'player data URL')
+ video_title = self._html_search_regex(
+ r'<title>(?P<title>.+?)\|', webpage, 'title')
+ video_description = self._html_search_regex(
+ r'<div class="entry-content">(?P<description>.+?)</div>',
+ webpage, 'description', flags=re.DOTALL, fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'upload_date': video_date,
+ 'thumbnail': video_thumbnail,
+ 'url': playerdata_url,
+ }
+
+
+class TeamFourIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
+ _TEST = {
+ 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
+ 'info_dict': {
+ 'id': 'TeamFourStar-5292a02f20bfa',
+ 'ext': 'mp4',
+ 'upload_date': '20130401',
+ 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
+ 'title': 'A Moment With TFS Episode 4',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ playerdata_url = self._search_regex(
+ r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+ webpage, 'player data URL')
+
+ video_title = self._html_search_regex(
+ r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
+ webpage, 'title')
+ video_date = unified_strdate(self._html_search_regex(
+ r'<div class="heroheadingdate">(?P<date>.+?)</div>',
+ webpage, 'date', fatal=False))
+ video_description = self._html_search_regex(
+ r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
+ webpage, 'description', fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
+ 'url': playerdata_url,
}
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
- raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
+ raise ExtractorError(
+ 'Broadcast %s does not exist' % broadcast_id, expected=True)
# Adult content
if re.search('EroConfirmText">', broadcast_page) is not None:
(username, password) = self._get_login_info()
if username is None:
- raise ExtractorError('Erotic broadcasts allowed only for registered users, '
- 'use --username and --password options to provide account credentials.', expected=True)
+ raise ExtractorError(
+ 'Erotic broadcasts allowed only for registered users, '
+ 'use --username and --password options to provide account credentials.',
+ expected=True)
login_form = {
'login-hint53': '1',
'password': password,
}
- request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+ request = compat_urllib_request.Request(
+ broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+ broadcast_page = self._download_webpage(
+ request, broadcast_id, 'Logging in and confirming age')
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
raise ExtractorError('Unable to log in: bad username or password', expected=True)
adult_content = False
ticket = self._html_search_regex(
- 'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+ r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
broadcast_page, 'broadcast ticket')
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
if broadcast_password:
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
- broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
+ broadcast_json_page = self._download_webpage(
+ url, broadcast_id, 'Downloading broadcast JSON')
try:
broadcast_json = json.loads(broadcast_json_page)
protected_broadcast = broadcast_json['_pass_protected'] == 1
if protected_broadcast and not broadcast_password:
- raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
+ raise ExtractorError(
+ 'This broadcast is protected by a password, use the --video-password option',
+ expected=True)
broadcast_offline = broadcast_json['is_play'] == 0
if broadcast_offline:
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
rtmp_url = broadcast_json['_server']
- if not rtmp_url.startswith('rtmp://'):
+ mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
+ if not mobj:
raise ExtractorError('Unexpected broadcast rtmp URL')
broadcast_playpath = broadcast_json['_streamName']
+ broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
broadcast_thumbnail = broadcast_json['_imgURL']
- broadcast_title = broadcast_json['title']
+ broadcast_title = self._live_title(broadcast_json['title'])
broadcast_description = broadcast_json['description']
broadcaster_nick = broadcast_json['nick']
broadcaster_login = broadcast_json['login']
'age_limit': 18 if adult_content else 0,
'ext': 'flv',
'play_path': broadcast_playpath,
+ 'player_url': 'http://pics.smotri.com/broadcast_play.swf',
+ 'app': broadcast_app,
'rtmp_live': True,
- 'rtmp_conn': rtmp_conn
+ 'rtmp_conn': rtmp_conn,
+ 'is_live': True,
}
import re
from .common import InfoExtractor
+from ..utils import parse_filesize
class TagesschauIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$',
},
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
+ 'md5': '3c54c1f6243d279b706bde660ceec633',
+ 'info_dict': {
+ 'id': '5727',
+ 'ext': 'mp4',
+ 'description': 'md5:695c01bfd98b7e313c501386327aea59',
+ 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
+ 'thumbnail': 're:^http:.*\.jpg$',
+ }
}]
_FORMATS = {
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- if video_id.startswith('-'):
- display_id = video_id.strip('-')
- else:
- display_id = video_id
-
+ video_id = self._match_id(url)
+ display_id = video_id.lstrip('-')
webpage = self._download_webpage(url, display_id)
- playerpage = self._download_webpage(
- 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
- display_id, 'Downloading player page')
-
- medias = re.findall(
- r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
- playerpage)
+ player_url = self._html_search_meta(
+ 'twitter:player', webpage, 'player URL', default=None)
+ if player_url:
+ playerpage = self._download_webpage(
+ player_url, display_id, 'Downloading player page')
- formats = []
- for url, ext, res in medias:
- f = {
- 'format_id': res + '_' + ext,
- 'url': url,
- 'ext': ext,
- }
- f.update(self._FORMATS.get(res, {}))
- formats.append(f)
+ medias = re.findall(
+ r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+ playerpage)
+ formats = []
+ for url, ext, res in medias:
+ f = {
+ 'format_id': res + '_' + ext,
+ 'url': url,
+ 'ext': ext,
+ }
+ f.update(self._FORMATS.get(res, {}))
+ formats.append(f)
+ thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+ title = self._og_search_title(webpage).strip()
+ description = self._og_search_description(webpage).strip()
+ else:
+ download_text = self._search_regex(
+ r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
+ webpage, 'download links')
+ links = re.finditer(
+ r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+ download_text)
+ formats = []
+ for l in links:
+ format_id = self._search_regex(
+ r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
+ format = {
+ 'format_id': format_id,
+ 'url': l.group('url'),
+ 'format_name': l.group('name'),
+ }
+ m = re.match(
+ r'''(?x)
+ Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+ (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+ (?P<vbr>[0-9]+)kbps&\#10;
+ Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+ Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+ l.group('title'))
+ if m:
+ format.update({
+ 'format_note': m.group('audio_desc'),
+ 'vcodec': m.group('vcodec'),
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ 'abr': int(m.group('abr')),
+ 'vbr': int(m.group('vbr')),
+ 'filesize_approx': parse_filesize(m.group('filesize_approx')),
+ })
+ formats.append(format)
+ thumbnail_fn = self._search_regex(
+ r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+ description = self._html_search_regex(
+ r'(?s)<p class="teasertext">(.*?)</p>',
+ webpage, 'description', fatal=False)
+ title = self._html_search_regex(
+ r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
self._sort_formats(formats)
-
- thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+ thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
return {
'id': display_id,
- 'title': self._og_search_title(webpage).strip(),
- 'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+ 'title': title,
+ 'thumbnail': thumbnail,
'formats': formats,
- 'description': self._og_search_description(webpage).strip(),
+ 'description': description,
}
# encoding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
- str_to_int,
+ parse_age_limit,
)
class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
- _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
+ _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
_TESTS = [
{
- 'url': 'http://www.tvigle.ru/video/brat/',
- 'md5': 'ff4344a4894b0524441fb6f8218dc716',
+ 'url': 'http://www.tvigle.ru/video/sokrat/',
+ 'md5': '36514aed3657d4f70b4b2cef8eb520cd',
'info_dict': {
- 'id': '5118490',
- 'display_id': 'brat',
- 'ext': 'mp4',
- 'title': 'Ð\91рат',
- 'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
- 'duration': 5722.6,
- 'age_limit': 16,
+ 'id': '1848932',
+ 'display_id': 'sokrat',
+ 'ext': 'flv',
+ 'title': 'Сократ',
+ 'description': 'md5:a05bd01be310074d5833efc6743be95e',
+ 'duration': 6586,
+ 'age_limit': 0,
},
},
{
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
+ display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = item['title']
description = item['description']
thumbnail = item['thumbnail']
- duration = float_or_none(item['durationMilliseconds'], 1000)
- age_limit = str_to_int(item['ageRestrictions'])
+ duration = float_or_none(item.get('durationMilliseconds'), 1000)
+ age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = []
for vcodec, fmts in item['videos'].items():
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- ExtractorError,
parse_iso8601,
qualities,
)
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
if video['is_geo_blocked']:
- raise ExtractorError(
- 'This content is not available in your country due to copyright reasons', expected=True)
+ self.report_warning(
+ 'This content might not be available in your country due to copyright reasons')
streams = self._download_json(
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
+# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..utils import (
+ compat_urllib_parse,
+ compat_urllib_request,
ExtractorError,
parse_iso8601,
)
"""
_PAGE_LIMIT = 100
_API_BASE = 'https://api.twitch.tv'
+ _LOGIN_URL = 'https://secure.twitch.tv/user/login'
_TESTS = [{
'url': 'http://www.twitch.tv/riotgames/b/577357806',
'info_dict': {
'view_count': info['views'],
}
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ authenticity_token = self._search_regex(
+ r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
+ login_page, 'authenticity token')
+
+ login_form = {
+ 'utf8': '✓'.encode('utf-8'),
+ 'authenticity_token': authenticity_token,
+ 'redirect_on_login': '',
+ 'embed_form': 'false',
+ 'mp_source_action': '',
+ 'follow': '',
+ 'user[login]': username,
+ 'user[password]': password,
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ request.add_header('Referer', self._LOGIN_URL)
+ response = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ m = re.search(
+ r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
+ if m:
+ raise ExtractorError(
+ 'Unable to login: %s' % m.group('msg').strip(), expected=True)
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('chapterid'):
if 'returnUrl' not in response:
raise ExtractorError('Unable to log in')
-
-
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- lecture_id = mobj.group('id')
+ lecture_id = self._match_id(url)
lecture = self._download_json(
'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,
'id': 'b9KOOWX7HUx',
'ext': 'mp4',
'title': 'Chicken.',
+ 'alt_title': 'Vine by Jack Dorsey',
'description': 'Chicken.',
'upload_date': '20130519',
'uploader': 'Jack Dorsey',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
data = json.loads(self._html_search_regex(
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
- formats = [
- {
- 'url': data['videoLowURL'],
- 'ext': 'mp4',
- 'format_id': 'low',
- },
- {
- 'url': data['videoUrl'],
- 'ext': 'mp4',
- 'format_id': 'standard',
- }
- ]
+ formats = [{
+ 'url': data['videoLowURL'],
+ 'ext': 'mp4',
+ 'format_id': 'low',
+ }, {
+ 'url': data['videoUrl'],
+ 'ext': 'mp4',
+ 'format_id': 'standard',
+ }]
return {
'id': video_id,
'title': self._og_search_title(webpage),
+ 'alt_title': self._og_search_description(webpage),
'description': data['description'],
'thumbnail': data['thumbnailUrl'],
'upload_date': unified_strdate(data['created']),
class VineUserIE(InfoExtractor):
IE_NAME = 'vine:user'
- _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
+ _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
_VINE_BASE_URL = "https://vine.co/"
- _TEST = {
- 'url': 'https://vine.co/Visa',
- 'info_dict': {
- 'id': 'Visa',
+ _TESTS = [
+ {
+ 'url': 'https://vine.co/Visa',
+ 'info_dict': {
+ 'id': 'Visa',
+ },
+ 'playlist_mincount': 46,
},
- 'playlist_mincount': 46,
- }
+ {
+ 'url': 'https://vine.co/u/941705360593584128',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
+ u = mobj.group('u')
- profile_url = "%sapi/users/profiles/vanity/%s" % (
- self._VINE_BASE_URL, user)
+ profile_url = "%sapi/users/profiles/%s%s" % (
+ self._VINE_BASE_URL, 'vanity/' if not u else '', user)
profile_data = self._download_json(
profile_url, user, note='Downloading user profile data')
user_id = profile_data['data']['userId']
timeline_data = []
for pagenum in itertools.count(1):
- timeline_url = "%sapi/timelines/users/%s?page=%s" % (
+ timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
self._VINE_BASE_URL, user_id, pagenum)
timeline_page = self._download_json(
timeline_url, user, note='Downloading page %d' % pagenum)
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import (
compat_chr,
'tbr': 320,
'filesize_approx': 5900000,
'view_count': int,
+ 'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
}
}
view_count = int_or_none(self._html_search_regex(
r'<div class="quality.*?► ([0-9]+)',
webpage, 'view count', fatal=False))
+ description = self._html_search_regex(
+ r'(?s)<div id="song_texts">(.*?)</div><br',
+ webpage, 'song lyrics', fatal=False)
+ if description:
+ description = re.sub(' *\r *', '\n', description)
enc_token = self._html_search_regex(
r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
'filesize_approx': filesize_approx,
'tbr': tbr,
'view_count': view_count,
+ 'description': description,
}
age_limit = self._rta_search(webpage)
# Get JSON parameters
- json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
+ json_params = self._search_regex(
+ r'var currentVideo = new Video\((.*)\)[,;]',
+ webpage, 'JSON parameters')
try:
params = json.loads(json_params)
except:
from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
compat_chr,
compat_parse_qs,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
-
+)
+from ..utils import (
clean_html,
- get_element_by_id,
- get_element_by_attribute,
ExtractorError,
+ get_element_by_attribute,
+ get_element_by_id,
int_or_none,
OnDemandPagedList,
+ orderedSet,
unescapeHTML,
unified_strdate,
- orderedSet,
uppercase_escape,
)
_LOGIN_REQUIRED = False
def _set_language(self):
- self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+ self._set_cookie(
+ '.youtube.com', 'PREF', 'f1=50000000&hl=en',
# YouTube sets the expire time to about two months
- expire_time=time.time() + 60*24*3600)
+ expire_time=time.time() + 2 * 30 * 24 * 3600)
def _login(self):
"""
'upload_date': '20140605',
},
},
+ # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'upload_date': '20100430',
+ 'uploader_id': 'deadmau5',
+ 'description': 'md5:12c56784b8032162bb936a5f76d55360',
+ 'uploader': 'deadmau5',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
+ # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'upload_date': '20120731',
+ 'uploader_id': 'olympic',
+ 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'uploader': 'Olympics',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
]
def __init__(self, *args, **kwargs):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
+ def _parse_dash_manifest(
+ self, video_id, dash_manifest_url, player_url, age_gate):
+ def decrypt_sig(mobj):
+ s = mobj.group(1)
+ dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+ return '/signature/%s' % dec_s
+ dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
+ dash_doc = self._download_xml(
+ dash_manifest_url, video_id,
+ note='Downloading DASH manifest',
+ errnote='Could not download DASH manifest')
+
+ formats = []
+ for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+ url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+ if url_el is None:
+ continue
+ format_id = r.attrib['id']
+ video_url = url_el.text
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': int_or_none(r.attrib.get('width')),
+ 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+ 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+ 'filesize': filesize,
+ 'fps': int_or_none(r.attrib.get('frameRate')),
+ }
+ try:
+ existing_format = next(
+ fo for fo in formats
+ if fo['format_id'] == format_id)
+ except StopIteration:
+ f.update(self._formats.get(format_id, {}))
+ formats.append(f)
+ else:
+ existing_format.update(f)
+ return formats
+
def _real_extract(self, url):
proto = (
'http' if self._downloader.params.get('prefer_insecure', False)
# We fallback to the get_video_info pages (used by the embed page)
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
- video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
- % (video_id, el_type))
- video_info_webpage = self._download_webpage(video_info_url,
+ video_info_url = (
+ '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+ % (proto, video_id, el_type))
+ video_info_webpage = self._download_webpage(
+ video_info_url,
video_id, note=False,
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', fatal=False)
+ video_webpage, 'categories', default=None)
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
'url': video_info['conn'][0],
'player_url': player_url,
}]
- elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+ elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
- try:
- # The DASH manifest used needs to be the one from the original video_webpage.
- # The one found in get_video_info seems to be using different signatures.
- # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
- # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
- # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
- dash_manifest_url = video_info.get('dashmpd')[0]
-
- def decrypt_sig(mobj):
- s = mobj.group(1)
- dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
- return '/signature/%s' % dec_s
- dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
- dash_doc = self._download_xml(
- dash_manifest_url, video_id,
- note='Downloading DASH manifest',
- errnote='Could not download DASH manifest')
- for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
- url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
- if url_el is None:
- continue
- format_id = r.attrib['id']
- video_url = url_el.text
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
- f = {
- 'format_id': format_id,
- 'url': video_url,
- 'width': int_or_none(r.attrib.get('width')),
- 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
- 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
- 'filesize': filesize,
- 'fps': int_or_none(r.attrib.get('frameRate')),
- }
- try:
- existing_format = next(
- fo for fo in formats
- if fo['format_id'] == format_id)
- except StopIteration:
- f.update(self._formats.get(format_id, {}))
- formats.append(f)
- else:
- existing_format.update(f)
-
- except (ExtractorError, KeyError) as e:
- self.report_warning('Skipping DASH manifest: %r' % e, video_id)
+ dash_mpd = video_info.get('dashmpd')
+ if dash_mpd:
+ dash_manifest_url = dash_mpd[0]
+ try:
+ dash_formats = self._parse_dash_manifest(
+ video_id, dash_manifest_url, player_url, age_gate)
+ except (ExtractorError, KeyError) as e:
+ self.report_warning(
+ 'Skipping DASH manifest: %r' % e, video_id)
+ else:
+ formats.extend(dash_formats)
self._sort_formats(formats)
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
- _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
+ _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = 'youtube:channel'
return ids_in_page
def _real_extract(self, url):
- # Extract channel id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
+ channel_id = self._match_id(url)
- # Download channel page
- channel_id = mobj.group(1)
video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page)
- else:
- # Download all channel pages using the json-based channel_ajax query
+ entries = [
+ self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in video_ids]
+ return self.playlist_result(entries, channel_id)
+
+ def _entries():
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json(
transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html'])
- video_ids.extend(ids_in_page)
+ for video_id in ids_in_page:
+ yield self.url_result(
+ video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
- self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
- url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in video_ids]
- return self.playlist_result(url_entries, channel_id)
+ return self.playlist_result(_entries(), channel_id)
class YoutubeUserIE(InfoExtractor):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url):
- # Extract username
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
-
- username = mobj.group(1)
+ username = self._match_id(url)
# Download video ids using YouTube Data API. Result size per
# query is limited (currently to 50 videos) so we need to query
# coding: utf-8
from __future__ import unicode_literals
+import functools
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
+ OnDemandPagedList,
)
class ZDFIE(InfoExtractor):
- _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+ _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = {
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
def _real_extract(self, url):
video_id = self._match_id(url)
-
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
return extract_from_xml_url(self, video_id, xml_url)
+
+
+class ZDFChannelIE(InfoExtractor):
+ _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+ 'info_dict': {
+ 'id': '1586442',
+ },
+ 'playlist_count': 4,
+ }
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, channel_id, page):
+ offset = page * self._PAGE_SIZE
+ xml_url = (
+ 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
+ % (offset, self._PAGE_SIZE, channel_id))
+ doc = self._download_xml(
+ xml_url, channel_id,
+ note='Downloading channel info',
+ errnote='Failed to download channel info')
+
+ title = doc.find('.//information/title').text
+ description = doc.find('.//information/detail').text
+ for asset in doc.findall('.//teasers/teaser'):
+ a_type = asset.find('./type').text
+ a_id = asset.find('./details/assetId').text
+ if a_type not in ('video', 'topic'):
+ continue
+ yield {
+ '_type': 'url',
+ 'playlist_title': title,
+ 'playlist_description': description,
+ 'url': 'zdf:%s:%s' % (a_type, a_id),
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
+
+ return {
+ '_type': 'playlist',
+ 'id': channel_id,
+ 'entries': entries,
+ }
general.add_option(
'--ignore-config',
action='store_true',
- help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+ help='Do not read configuration files. '
+ 'When given in the global configuration file /etc/youtube-dl.conf: '
+ 'Do not read the user configuration in ~/.config/youtube-dl/config '
+ '(%APPDATA%/youtube-dl/config.txt on Windows)')
general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist',
def run(self, information):
cmd = self.exec_cmd
- if not '{}' in cmd:
+ if '{}' not in cmd:
cmd += ' {}'
cmd = cmd.replace('{}', shlex_quote(information['filepath']))
if not self._executable:
raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
- REQUIRED_VERSION = '1.0'
+ required_version = '10-0' if self._uses_avconv() else '1.0'
if is_outdated_version(
- self._versions[self._executable], REQUIRED_VERSION):
+ self._versions[self._executable], required_version):
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
- self._executable, self._executable, REQUIRED_VERSION)
+ self._executable, self._executable, required_version)
if self._downloader:
self._downloader.report_warning(warning)
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
- if not 'signature' in versions_info:
+ if 'signature' not in versions_info:
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
return
signature = versions_info['signature']
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
+ compat_WINFUNCTYPE,
shlex_quote,
)
xpath = xpath.encode('ascii')
n = node.find(xpath)
- if n is None:
+ if n is None or n.text is None:
if fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element %s' % name)
return calendar.timegm(dt.timetuple())
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
"""Return a string with the date in the format YYYYMMDD"""
if date_str is None:
return None
-
upload_date = None
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+
format_expressions = [
'%d %B %Y',
'%d %b %Y',
'%d/%m/%Y',
'%d/%m/%y',
'%Y/%m/%d %H:%M:%S',
- '%d/%m/%Y %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
+ if day_first:
+ format_expressions.extend([
+ '%d/%m/%Y %H:%M:%S',
+ ])
+ else:
+ format_expressions.extend([
+ '%m/%d/%Y %H:%M:%S',
+ ])
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
Return a datetime object from a string in the format YYYYMMDD or
(now|today)[+-][0-9](day|week|month|year)(s)?"""
today = datetime.date.today()
- if date_str == 'now'or date_str == 'today':
+ if date_str in ('now', 'today'):
return today
+ if date_str == 'yesterday':
+ return today - datetime.timedelta(days=1)
match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
if match is not None:
sign = match.group('sign')
if fileno not in WIN_OUTPUT_IDS:
return False
- GetStdHandle = ctypes.WINFUNCTYPE(
+ GetStdHandle = compat_WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
("GetStdHandle", ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
- WriteConsoleW = ctypes.WINFUNCTYPE(
+ WriteConsoleW = compat_WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
- GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+ GetFileType = compat_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
- GetConsoleMode = ctypes.WINFUNCTYPE(
+ GetConsoleMode = compat_WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
("GetConsoleMode", ctypes.windll.kernel32))
def unsmuggle_url(smug_url, default=None):
- if not '#__youtubedl_smuggle' in smug_url:
+ if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
}
units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
- m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+ m = re.match(
+ r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
if not m:
return None
- return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
+ num_str = m.group('num').replace(',', '.')
+ mult = _UNIT_TABLE[m.group('unit')]
+ return int(float(num_str) * mult)
def get_term_width():
m = re.match(
r'''(?ix)T?
+ (?:
+ (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+ (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
(?:
(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
)?
- (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+ )$''', s)
if not m:
return None
- res = int(m.group('secs'))
+ res = 0
+ if m.group('only_mins'):
+ return float_or_none(m.group('only_mins'), invscale=60)
+ if m.group('only_hours'):
+ return float_or_none(m.group('only_hours'), invscale=60 * 60)
+ if m.group('secs'):
+ res += int(m.group('secs'))
if m.group('mins'):
res += int(m.group('mins')) * 60
- if m.group('hours'):
- res += int(m.group('hours')) * 60 * 60
+ if m.group('hours'):
+ res += int(m.group('hours')) * 60 * 60
if m.group('ms'):
res += float(m.group('ms'))
return res
def version_tuple(v):
- return [int(e) for e in v.split('.')]
+ return tuple(int(e) for e in re.split(r'[-.]', v))
def is_outdated_version(version, limit, assume_new=True):
from __future__ import unicode_literals
-__version__ = '2014.12.03'
+__version__ = '2014.12.12.1'