From: Sergey M․ Date: Sat, 11 Apr 2015 13:34:06 +0000 (+0600) Subject: Merge branch 'crooksandliars' of https://github.com/fstirlitz/youtube-dl into fstirli... X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=af14ded75e10653b4713c23f8c428c6cd88610ad;hp=6e53c91608d1c43a9fe1614f13a15db74e877a91;p=youtube-dl.git Merge branch 'crooksandliars' of https://github.com/fstirlitz/youtube-dl into fstirlitz-crooksandliars --- diff --git a/AUTHORS b/AUTHORS index 512469f4c..9c65dc1d4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -117,3 +117,8 @@ Alexander Mamay Devin J. Pohly Eduardo Ferro Aldama Jeff Buchbinder +Amish Bhadeshia +Joram Schrijver +Will W. +Mohammad Teimori Pabandi +Roman Le Négrate diff --git a/Makefile b/Makefile index c6c76274f..fdb1abb60 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe - find -name "*.pyc" -delete + find . -name "*.pyc" -delete PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin diff --git a/README.md b/README.md index 4f9fc8174..caa1478d9 100644 --- a/README.md +++ b/README.md @@ -45,21 +45,21 @@ which means you can modify it, redistribute it or use it however you like. youtube-dl [OPTIONS] URL [URL...] # OPTIONS - -h, --help print this help text and exit - --version print program version and exit - -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) - -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist + -h, --help Print this help text and exit + --version Print program version and exit + -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) + -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs - --dump-user-agent display the current browser identification + --dump-user-agent Display the current browser identification --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported extractors - --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". + --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching. --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows) --flat-playlist Do not extract the videos of a playlist, only list them. - --no-color Do not emit color codes in output. + --no-color Do not emit color codes in output ## Network Options: --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection @@ -71,70 +71,70 @@ which means you can modify it, redistribute it or use it however you like. not present) is used for the actual downloading. (experimental) ## Video Selection: - --playlist-start NUMBER playlist video to start at (default is 1) - --playlist-end NUMBER playlist video to end at (default is last) - --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" + --playlist-start NUMBER Playlist video to start at (default is 1) + --playlist-end NUMBER Playlist video to end at (default is last) + --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. - --match-title REGEX download only matching titles (regex or caseless sub-string) - --reject-title REGEX skip download for matching titles (regex or caseless sub-string) + --match-title REGEX Download only matching titles (regex or caseless sub-string) + --reject-title REGEX Skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) - --date DATE download only videos uploaded in this date - --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive) - --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive) + --date DATE Download only videos uploaded in this date + --datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive) + --dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive) --min-views COUNT Do not download any videos with less than COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, + --match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count 10M]"). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, @@ -194,44 +194,44 @@ which means you can modify it, redistribute it or use it however you like. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f + (requires ffmpeg or avconv), for example -f bestvideo+bestaudio. - --all-formats download all available video formats - --prefer-free-formats prefer free video formats unless a specific one is requested - --max-quality FORMAT highest quality format to download - -F, --list-formats list all available formats + --all-formats Download all available video formats + --prefer-free-formats Prefer free video formats unless a specific one is requested + --max-quality FORMAT Highest quality format to download + -F, --list-formats List all available formats --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required ## Subtitle Options: - --write-sub write subtitle file - --write-auto-sub write automatic subtitle file (youtube only) - --all-subs downloads all the available subtitles of the video - --list-subs lists all available subtitles for the video - --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best" - --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' + --write-sub Write subtitle file + --write-auto-sub Write automatic subtitle file (YouTube only) + --all-subs Download all the available subtitles of the video + --list-subs List all available subtitles for the video + --sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best" + --sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' ## Authentication Options: - -u, --username USERNAME login with this account ID - -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively. - -2, --twofactor TWOFACTOR two-factor auth code - -n, --netrc use .netrc authentication data - --video-password PASSWORD video password (vimeo, smotri) + -u, --username USERNAME Login with this account ID + -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively. + -2, --twofactor TWOFACTOR Two-factor auth code + -n, --netrc Use .netrc authentication data + --video-password PASSWORD Video password (vimeo, smotri) ## Post-processing Options: - -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) - --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default - --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K - (default 5) + -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) + --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default + --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default + 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) - -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default - --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs embed subtitles in the video (only for mp4 videos) - --embed-thumbnail embed thumbnail in the audio as cover art - --add-metadata write metadata to the video file - --metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed + -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default + --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default + --embed-subs Embed subtitles in the video (only for mp4 videos) + --embed-thumbnail Embed thumbnail in the audio as cover art + --add-metadata Write metadata to the video file + --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise" - --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards) + --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise) --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default) diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 6a5bd9eda..7a219ebe9 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -28,7 +28,7 @@ for test in get_testcases(): if METHOD == 'EURISTIC': try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() - except: + except Exception: print('\nFail: {0}'.format(test['name'])) continue diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py new file mode 100644 index 000000000..2e389fc8e --- /dev/null +++ b/devscripts/generate_aes_testdata.py @@ -0,0 +1,42 @@ +from __future__ import unicode_literals + +import codecs +import subprocess + +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import intlist_to_bytes +from youtube_dl.aes import aes_encrypt, key_expansion + +secret_msg = b'Secret message goes here' + + +def hex_str(int_list): + return codecs.encode(intlist_to_bytes(int_list), 'hex') + + +def openssl_encode(algo, key, iv): + cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] + prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = prog.communicate(secret_msg) + return out + +iv = key = [0x20, 0x15] + 14 * [0] + +r = openssl_encode('aes-128-cbc', key, iv) +print('aes_cbc_decrypt') +print(repr(r)) + +password = key +new_key = aes_encrypt(password, key_expansion(password)) +r = openssl_encode('aes-128-ctr', new_key, iv) +print('aes_decrypt_text 16') +print(repr(r)) + +password = key + 16 * [0] +new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) +r = openssl_encode('aes-256-ctr', new_key, iv) +print('aes_decrypt_text 32') +print(repr(r)) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 72b365305..c85a39918 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -2,6 +2,8 @@ - **1tv**: Первый канал - **1up.com** - **220.ro** + - **22tracks:genre** + - **22tracks:track** - **24video** - **3sat** - **4tube** @@ -109,6 +111,7 @@ - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** + - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Discovery** - **divxstage**: DivxStage - **Dotsub** @@ -118,6 +121,7 @@ - **DrTuber** - **DRTV** - **Dump** + - **Dumpert** - **dvtv**: http://video.aktualne.cz/ - **EaglePlatform** - **EbaumsWorld** @@ -162,6 +166,7 @@ - **Gamekings** - **GameOne** - **gameone:playlist** + - **Gamersyde** - **GameSpot** - **GameStar** - **Gametrailers** @@ -231,6 +236,7 @@ - **Letv** - **LetvPlaylist** - **LetvTv** + - **Libsyn** - **lifenews**: LIFE | NEWS - **LiveLeak** - **livestream** @@ -250,6 +256,7 @@ - **Mgoon** - **Minhateca** - **MinistryGrid** + - **miomio.tv** - **mitele.es** - **mixcloud** - **MLB** @@ -283,6 +290,8 @@ - **NBA** - **NBC** - **NBCNews** + - **NBCSports** + - **NBCSportsVPlayer** - **ndr**: NDR.de - Mediathek - **NDTV** - **NerdCubedFeed** @@ -310,6 +319,7 @@ - **npo.nl:radio** - **npo.nl:radio:fragment** - **NRK** + - **NRKPlaylist** - **NRKTV** - **ntv.ru** - **Nuvid** @@ -342,6 +352,7 @@ - **PornHub** - **PornHubPlaylist** - **Pornotube** + - **PornoVoisines** - **PornoXO** - **PrimeShareTV** - **PromptFile** @@ -353,6 +364,7 @@ - **radio.de** - **radiobremen** - **radiofrance** + - **RadioJavan** - **Rai** - **RBMARadio** - **RedTube** @@ -378,6 +390,8 @@ - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **RUTV**: RUTV.RU + - **safari**: safaribooksonline.com online video + - **safari:course**: safaribooksonline.com online courses - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos - **savefrom.net** @@ -411,6 +425,7 @@ - **southpark.cc.com** - **southpark.de** - **Space** + - **SpankBang** - **Spankwire** - **Spiegel** - **Spiegel:Article**: Articles on spiegel.de @@ -490,14 +505,17 @@ - **Ubu** - **udemy** - **udemy:course** + - **UDNEmbed** - **Ultimedia** - **Unistra** - **Urort**: NRK P3 Urørt - **ustream** - **ustream:channel** + - **Varzesh3** - **Vbox7** - **VeeHD** - **Veoh** + - **Vessel** - **Vesti**: Вести.Ru - **Vevo** - **VGTV** @@ -586,7 +604,7 @@ - **youtube:show**: YouTube.com (multi-season) shows - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) + - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **Zapiks** - **ZDF** - **ZDFChannel** diff --git a/test/test_aes.py b/test/test_aes.py new file mode 100644 index 000000000..4dc7de7b5 --- /dev/null +++ b/test/test_aes.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text +from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes +import base64 + +# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' + + +class TestAES(unittest.TestCase): + def setUp(self): + self.key = self.iv = [0x20, 0x15] + 14 * [0] + self.secret_msg = b'Secret message goes here' + + def test_encrypt(self): + msg = b'message' + key = list(range(16)) + encrypted = aes_encrypt(bytes_to_intlist(msg), key) + decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + self.assertEqual(decrypted, msg) + + def test_cbc_decrypt(self): + data = bytes_to_intlist( + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd" + ) + decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_decrypt_text(self): + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + ) + decrypted = (aes_decrypt_text(encrypted, password, 16)) + self.assertEqual(decrypted, self.secret_msg) + + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + ) + decrypted = (aes_decrypt_text(encrypted, password, 32)) + self.assertEqual(decrypted, self.secret_msg) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 6ae168b7f..a9db42b30 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) def test_youtube_feeds(self): - self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later']) + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) diff --git a/test/test_execution.py b/test/test_execution.py index 60df187de..620db080e 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# coding: utf-8 + from __future__ import unicode_literals import unittest @@ -6,6 +8,9 @@ import unittest import sys import os import subprocess +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -27,5 +32,12 @@ class TestExecution(unittest.TestCase): def test_main_exec(self): subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + def test_cmdline_umlauts(self): + p = subprocess.Popen( + [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], + cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) + _, stderr = p.communicate() + self.assertFalse(stderr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 3431ad24e..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -24,6 +24,7 @@ from youtube_dl.utils import ( encodeFilename, escape_rfc3986, escape_url, + ExtractorError, find_xpath_attr, fix_xml_ampersands, InAdvancePagedList, @@ -54,6 +55,7 @@ from youtube_dl.utils import ( urlencode_postdata, version_tuple, xpath_with_ns, + xpath_text, render_table, match_str, ) @@ -198,6 +200,8 @@ class TestUtil(unittest.TestCase): def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('/'), '/') self.assertEqual( unescapeHTML('é'), 'é') @@ -223,6 +227,7 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') def test_find_xpath_attr(self): testxml = ''' @@ -250,6 +255,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(find('media:song/media:author').text, 'The Author') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') + def test_xpath_text(self): + testxml = ''' +
+

Foo

+
+
''' + doc = xml.etree.ElementTree.fromstring(testxml) + self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') + self.assertTrue(xpath_text(doc, 'div/bar') is None) + self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) + def test_smuggle_url(self): data = {"ö": "ö", "abc": [3]} url = 'https://foo.bar/baz?x=y#a' @@ -455,6 +471,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') + on = js_to_json('["abc", "def",]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('{"abc": "def",}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5a83bc956..640b8c99d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -328,9 +328,6 @@ class YoutubeDL(object): 'Parameter outtmpl is bytes, but should be a unicode string. ' 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') - if '%(stitle)s' in self.params.get('outtmpl', ''): - self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') - self._setup_opener() if auto_init: @@ -1218,9 +1215,6 @@ class YoutubeDL(object): if len(info_dict['title']) > 200: info_dict['title'] = info_dict['title'][:197] + '...' - # Keep for backwards compatibility - info_dict['stitle'] = info_dict['title'] - if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] @@ -1707,10 +1701,10 @@ class YoutubeDL(object): out = out.decode().strip() if re.match('[0-9a-f]+', out): self._write_string('[debug] Git HEAD: ' + out + '\n') - except: + except Exception: try: sys.exc_clear() - except: + except Exception: pass self._write_string('[debug] Python version %s - %s\n' % ( platform.python_version(), platform_name())) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 852b2fc3d..1c8b411b7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,10 +189,6 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True - if sys.version_info < (3,): - # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) - if opts.outtmpl is not None: - opts.outtmpl = opts.outtmpl.decode(preferredencoding()) outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b2bf149ef..973bcd320 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -389,7 +389,7 @@ else: stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = sp.communicate() lines, columns = map(int, out.split()) - except: + except Exception: pass return _terminal_size(columns, lines) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8ed5c19a6..a0fc5ead0 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -204,7 +204,7 @@ class FileDownloader(object): return try: os.utime(filename, (time.time(), filetime)) - except: + except Exception: pass return filetime @@ -318,7 +318,7 @@ class FileDownloader(object): ) continuedl_and_exists = ( - self.params.get('continuedl', False) and + self.params.get('continuedl', True) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False) ) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 4047d7167..d136bebd1 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -49,7 +49,7 @@ class HttpFD(FileDownloader): open_mode = 'wb' if resume_len != 0: - if self.params.get('continuedl', False): + if self.params.get('continuedl', True): self.report_resuming_byte(resume_len) request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 89e98ae61..ddf5724ae 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -105,7 +105,7 @@ class RtmpFD(FileDownloader): protocol = info_dict.get('rtmp_protocol', None) real_time = info_dict.get('rtmp_real_time', False) no_resume = info_dict.get('no_resume', False) - continue_dl = info_dict.get('continuedl', False) + continue_dl = info_dict.get('continuedl', True) self.report_destination(filename) tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dc272af82..894aa5b43 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -107,6 +107,7 @@ from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE +from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE from .dreisat import DreiSatIE @@ -115,6 +116,7 @@ from .drtuber import DrTuberIE from .drtv import DRTVIE from .dvtv import DVTVIE from .dump import DumpIE +from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .divxstage import DivxStageIE @@ -176,6 +178,7 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) +from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE @@ -251,6 +254,7 @@ from .letv import ( LetvTvIE, LetvPlaylistIE ) +from .libsyn import LibsynIE from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE from .livestream import ( @@ -274,6 +278,7 @@ from .metacritic import MetacriticIE from .mgoon import MgoonIE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE +from .miomio import MioMioIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import MixcloudIE @@ -309,6 +314,8 @@ from .nba import NBAIE from .nbc import ( NBCIE, NBCNewsIE, + NBCSportsIE, + NBCSportsVPlayerIE, ) from .ndr import NDRIE from .ndtv import NDTVIE @@ -347,6 +354,7 @@ from .npo import ( ) from .nrk import ( NRKIE, + NRKPlaylistIE, NRKTVIE, ) from .ntvde import NTVDeIE @@ -381,6 +389,7 @@ from .pornhub import ( PornHubPlaylistIE, ) from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE @@ -390,6 +399,7 @@ from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE @@ -419,6 +429,10 @@ from .rutube import ( ) from .rutv import RUTVIE from .sandia import SandiaIE +from .safari import ( + SafariIE, + SafariCourseIE, +) from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE @@ -458,6 +472,7 @@ from .southpark import ( SouthparkDeIE, ) from .space import SpaceIE +from .spankbang import SpankBangIE from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE @@ -525,6 +540,10 @@ from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE +from .twentytwotracks import ( + TwentyTwoTracksIE, + TwentyTwoTracksGenreIE +) from .twitch import ( TwitchVideoIE, TwitchChapterIE, @@ -539,13 +558,16 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) +from .udn import UDNEmbedIE from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE from .ustream import UstreamIE, UstreamChannelIE +from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE +from .vessel import VesselIE from .vesti import VestiIE from .vevo import VevoIE from .vgtv import VGTVIE diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 203936e54..e3e6d2113 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -11,12 +11,13 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + qualities, ) class AddAnimeIE(InfoExtractor): - _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P[\w_]+)(?:.*)' - _TEST = { + _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P[\w_]+)' + _TESTS = [{ 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'md5': '72954ea10bc979ab5e2eb288b21425a0', 'info_dict': { @@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor): 'description': 'One Piece 606', 'title': 'One Piece 606', } - } + }, { + 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor): note='Confirming after redirect') webpage = self._download_webpage(url, video_id) + FORMATS = ('normal', 'hq') + quality = qualities(FORMATS) formats = [] - for format_id in ('normal', 'hq'): + for format_id in FORMATS: rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) video_url = self._search_regex(rex, webpage, 'video file URLx', fatal=False) @@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor): formats.append({ 'format_id': format_id, 'url': video_url, + 'quality': quality(format_id), }) self._sort_formats(formats) video_title = self._og_search_title(webpage) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index 8442019ea..a117502bc 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -2,10 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class AftonbladetIE(InfoExtractor): - _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?Particle[0-9]+)\.ab(?:$|[?#])' + _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?Particle[0-9]+)\.ab(?:$|[?#])' _TEST = { 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', 'info_dict': { @@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor): formats.append({ 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), 'ext': 'mp4', - 'width': fmt['width'], - 'height': fmt['height'], - 'tbr': fmt['bitrate'], + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'tbr': int_or_none(fmt.get('bitrate')), 'protocol': 'http', }) self._sort_formats(formats) @@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor): 'id': video_id, 'title': internal_meta_json['title'], 'formats': formats, - 'thumbnail': internal_meta_json['imageUrl'], - 'description': internal_meta_json['shortPreamble'], - 'timestamp': internal_meta_json['timePublished'], - 'duration': internal_meta_json['duration'], - 'view_count': internal_meta_json['views'], + 'thumbnail': internal_meta_json.get('imageUrl'), + 'description': internal_meta_json.get('shortPreamble'), + 'timestamp': int_or_none(internal_meta_json.get('timePublished')), + 'duration': int_or_none(internal_meta_json.get('duration')), + 'view_count': int_or_none(internal_meta_json.get('views')), } diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 8c7ba4b91..b632ce967 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor): 'width': int_or_none(media_content.get('width')), 'height': int_or_none(media_content.get('height')), }) + self._check_formats(formats, video_id) self._sort_formats(formats) subtitles = self.extract_subtitles(video_id, subtitles_urls) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 4a88ccd13..0dca29b71 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -6,32 +6,39 @@ from .common import InfoExtractor class BloombergIE(InfoExtractor): - _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?)\.html' + _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P[^/?#]+)' _TEST = { - 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', # The md5 checksum changes 'info_dict': { 'id': 'qurhIVlJSB6hzkVi229d8g', 'ext': 'flv', 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', - 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', + 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, } def _real_extract(self, url): name = self._match_id(url) webpage = self._download_webpage(url, name) - - f4m_url = self._search_regex( - r'.+?/(?P[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))''' + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))''' _TESTS = [{ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', @@ -45,6 +45,12 @@ class CNNIE(InfoExtractor): 'description': 'md5:e7223a503315c9f150acac52e76de086', 'upload_date': '20141222', } + }, { + 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', + 'only_matching': True, + }, { + 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e5245ec3f..530c449c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -822,7 +822,7 @@ class InfoExtractor(object): (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), + 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, 'ext': 'flv', 'tbr': tbr, diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index e64b88fbc..6ded723c9 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -23,7 +23,6 @@ from ..utils import ( ) from ..aes import ( aes_cbc_decrypt, - inc, ) @@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor): key = obfuscate_key(id) - class Counter: - __value = iv - - def next_value(self): - temp = self.__value - self.__value = inc(self.__value) - return temp decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) return zlib.decompress(decrypted_data) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4f67c3aac..7615ecd4b 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): def _build_request(url): """Build a request with the family filter disabled""" request = compat_urllib_request.Request(url) - request.add_header('Cookie', 'family_filter=off') - request.add_header('Cookie', 'ff=off') + request.add_header('Cookie', 'family_filter=off; ff=off') return request @@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id - embed_page = self._download_webpage(embed_url, video_id, - 'Downloading embed page') + embed_request = self._build_request(embed_url) + embed_page = self._download_webpage( + embed_request, video_id, 'Downloading embed page') info = self._search_regex(r'var info = ({.*?}),$', embed_page, 'video info', flags=re.MULTILINE) info = json.loads(info) @@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py new file mode 100644 index 000000000..3ed1f1663 --- /dev/null +++ b/youtube_dl/extractor/dhm.py @@ -0,0 +1,73 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + parse_duration, +) + + +class DHMIE(InfoExtractor): + IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' + _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)' + + _TESTS = [{ + 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', + 'md5': '11c475f670209bf6acca0b2b7ef51827', + 'info_dict': { + 'id': 'the-marshallplan-at-work-in-west-germany', + 'ext': 'flv', + 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', + 'description': 'md5:1fabd480c153f97b07add61c44407c82', + 'duration': 660, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/', + 'md5': '09890226332476a3e3f6f2cb74734aa5', + 'info_dict': { + 'id': 'rolle-1', + 'ext': 'flv', + 'title': 'ROLLE 1', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._search_regex( + r"file\s*:\s*'([^']+)'", webpage, 'playlist url') + + playlist = self._download_xml(playlist_url, video_id) + + track = playlist.find( + './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track') + + video_url = xpath_text( + track, './{http://xspf.org/ns/0/}location', + 'video url', fatal=True) + thumbnail = xpath_text( + track, './{http://xspf.org/ns/0/}image', + 'thumbnail') + + title = self._search_regex( + [r'dc:title="([^"]+)"', r'<title> »([^<]+)'], + webpage, 'title').strip() + description = self._html_search_regex( + r'

Description:(.+?)

', + webpage, 'description', default=None) + duration = parse_duration(self._search_regex( + r'Length\s*\s*:\s*([^<]+)', + webpage, 'duration', default=None)) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index d7956e6e4..479430c51 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,19 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib +import time from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import (ExtractorError, unescapeHTML) +from ..compat import (compat_str, compat_basestring) class DouyuTVIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P[A-Za-z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { - 'id': 'iseven', + 'id': '17732', + 'display_id': 'iseven', 'ext': 'flv', 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:9e525642c25a0a24302869937cf69d17', + 'description': 'md5:c93d6692dde6fe33809a46edcbecca44', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', 'uploader_id': '431925', @@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor): 'params': { 'skip_download': True, } - } + }, { + 'url': 'http://www.douyutv.com/85982', + 'info_dict': { + 'id': '85982', + 'display_id': '85982', + 'ext': 'flv', + 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'douyu小漠', + 'uploader_id': '3769985', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }] def _real_extract(self, url): video_id = self._match_id(url) + if video_id.isdigit(): + room_id = video_id + else: + page = self._download_webpage(url, video_id) + room_id = self._html_search_regex( + r'"room_id"\s*:\s*(\d+),', page, 'room id') + + prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( + room_id, int(time.time())) + + auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() config = self._download_json( - 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id) + 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), + video_id) data = config['data'] error_code = config.get('error', 0) - show_status = data.get('show_status') if error_code is not 0: - raise ExtractorError( - 'Server reported error %i' % error_code, expected=True) + error_desc = 'Server reported error %i' % error_code + if isinstance(data, (compat_str, compat_basestring)): + error_desc += ': ' + data + raise ExtractorError(error_desc, expected=True) + show_status = data.get('show_status') # 1 = live, 2 = offline if show_status == '2': raise ExtractorError( @@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor): base_url = data['rtmp_url'] live_path = data['rtmp_live'] - title = self._live_title(data['room_name']) + title = self._live_title(unescapeHTML(data['room_name'])) description = data.get('show_details') thumbnail = data.get('room_src') @@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor): self._sort_formats(formats) return { - 'id': video_id, + 'id': room_id, + 'display_id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 69ca75423..05bb22ddf 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -3,22 +3,25 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + ExtractorError, + unified_strdate, +) class DreiSatIE(InfoExtractor): IE_NAME = '3sat' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' _TEST = { - 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', - 'md5': '9dcfe344732808dbfcc901537973c922', + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', 'info_dict': { - 'id': '36983', + 'id': '45918', 'ext': 'mp4', - 'title': 'Kaffeeland Schweiz', - 'description': 'md5:cc4424b18b75ae9948b13929a0814033', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'uploader': '3sat', - 'upload_date': '20130622' + 'upload_date': '20140913' } } @@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor): details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_doc = self._download_xml(details_url, video_id, 'Downloading video details') + status_code = details_doc.find('./status/statuscode') + if status_code is not None and status_code.text != 'ok': + code = status_code.text + if code == 'notVisibleAnymore': + message = 'Video %s is not available' % video_id + else: + message = '%s returned error: %s' % (self.IE_NAME, code) + raise ExtractorError(message, expected=True) + thumbnail_els = details_doc.findall('.//teaserimage') thumbnails = [{ 'width': int(te.attrib['key'].partition('x')[0]), diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 8257e35a4..f25ab319e 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor, ExtractorError @@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _TEST = { - 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', - 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', + 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { - 'id': 'partiets-mand-7-8', + 'id': 'panisk-paske-5', 'ext': 'mp4', - 'title': 'Partiets mand (7:8)', - 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', - 'timestamp': 1403047940, - 'upload_date': '20140617', - 'duration': 1299.040, + 'title': 'Panisk Påske (5)', + 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', + 'timestamp': 1426984612, + 'upload_date': '20150322', + 'duration': 1455, }, } @@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + if '>Programmet er ikke længere tilgængeligt' in webpage: + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) + video_id = self._search_regex( r'data-(?:material-identifier|episode-slug)="([^"]+)"', webpage, 'video id') diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py index 6b651778a..ff78d4fd2 100644 --- a/youtube_dl/extractor/dump.py +++ b/youtube_dl/extractor/dump.py @@ -28,12 +28,12 @@ class DumpIE(InfoExtractor): video_url = self._search_regex( r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL') - thumb = self._og_search_thumbnail(webpage) - title = self._search_regex(r'([^"]+)', webpage, 'title') + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, 'title': title, 'url': video_url, - 'thumbnail': thumb, + 'thumbnail': thumbnail, } diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py new file mode 100644 index 000000000..9c594b757 --- /dev/null +++ b/youtube_dl/extractor/dumpert.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..compat import compat_urllib_request +from ..utils import qualities + + +class DumpertIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P[0-9]+/[0-9a-zA-Z]+)' + _TEST = { + 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', + 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', + 'info_dict': { + 'id': '6646981/951bc60f', + 'ext': 'mp4', + 'title': 'Ik heb nieuws voor je', + 'description': 'Niet schrikken hoor', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'nsfw=1') + webpage = self._download_webpage(req, video_id) + + files_base64 = self._search_regex( + r'data-files="([^"]+)"', webpage, 'data files') + + files = self._parse_json( + base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), + video_id) + + quality = qualities(['flv', 'mobile', 'tablet', '720p']) + + formats = [{ + 'url': video_url, + 'format_id': format_id, + 'quality': quality(format_id), + } for format_id, video_url in files.items() if format_id != 'still'] + self._sort_formats(formats) + + title = self._html_search_meta( + 'title', webpage) or self._og_search_title(webpage) + description = self._html_search_meta( + 'description', webpage) or self._og_search_description(webpage) + thumbnail = files.get('still') or self._og_search_thumbnail(webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats + } diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 7173371ee..688dfc2f7 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'skip': 'Georestricted', }] def _handle_error(self, response): diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index fc92ff825..5154bbd7f 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -13,15 +13,15 @@ from ..utils import ( class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P[a-z0-9_-]+)' _TESTS = [{ - 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', - 'md5': 'e4af06f3bf0d5f471921a18db5764642', + 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', + 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c', 'info_dict': { - 'id': '0-7jqrsr18', + 'id': '0-ipq1gsai', 'ext': 'mp4', - 'title': 'What\'s Wrong with These Photos? A Whole Lot', - 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6', - 'timestamp': 1406876400, - 'upload_date': '20140801', + 'title': 'Fast Fingers of Fate', + 'description': 'md5:686114ced0a032926935e9015ee794ac', + 'timestamp': 1428033600, + 'upload_date': '20150403', } }, { 'url': 'http://ellentube.com/videos/0-dvzmabd5/', @@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._html_search_meta('VideoURL', webpage, 'url') + + video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) title = self._og_search_title(webpage, default=None) or self._search_regex( r'pageName\s*=\s*"([^"]+)"', webpage, 'title') description = self._html_search_meta( 'description', webpage, 'description') or self._og_search_description(webpage) timestamp = parse_iso8601(self._search_regex( r'