]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge remote-tracking branch 'Tithen-Firion/hsw-update'
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 12 Dec 2014 03:10:55 +0000 (04:10 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 12 Dec 2014 03:10:55 +0000 (04:10 +0100)
49 files changed:
AUTHORS
Makefile
README.md
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/compat.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/azubu.py [new file with mode: 0644]
youtube_dl/extractor/bbccouk.py
youtube_dl/extractor/behindkink.py
youtube_dl/extractor/bet.py [new file with mode: 0644]
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/cinchcast.py [new file with mode: 0644]
youtube_dl/extractor/common.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/foxgay.py [new file with mode: 0644]
youtube_dl/extractor/foxnews.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/minhateca.py [new file with mode: 0644]
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/myspace.py
youtube_dl/extractor/myvidster.py [new file with mode: 0644]
youtube_dl/extractor/nba.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/ntv.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/radiode.py [new file with mode: 0644]
youtube_dl/extractor/screenwavemedia.py [moved from youtube_dl/extractor/cinemassacre.py with 50% similarity]
youtube_dl/extractor/smotri.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/xminus.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/postprocessor/execafterdownload.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index bd2e967e32d653104a77a730f5f4ad667b29b61e..bfa00f91ba6e3cf9fc1762ad81a56f11501d2390 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -89,3 +89,6 @@ Oskar Jauch
 Matthew Rayfield
 t0mm0
 Tithen-Firion
+Zack Fernandes
+cryptonaut
+Adrian Kretz
index 3e1debc7e1a9184b82a5bc6528b0f8dbdfd7e6f4..b846331b931e13878813254384970a9671eab55e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json
 
 cleanall: clean
        rm -f youtube-dl youtube-dl.exe
index d6e7ff902c86021db324cb3b9d06d441b8673cad..18be078d768b9afe76df42d8079bdbf50af62911 100644 (file)
--- a/README.md
+++ b/README.md
@@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like.
                                      this is not possible instead of searching.
     --ignore-config                  Do not read configuration files. When given
                                      in the global configuration file /etc
-                                     /youtube-dl.conf: do not read the user
-                                     configuration in ~/.config/youtube-dl.conf
-                                     (%APPDATA%/youtube-dl/config.txt on
-                                     Windows)
+                                     /youtube-dl.conf: Do not read the user
+                                     configuration in ~/.config/youtube-
+                                     dl/config (%APPDATA%/youtube-dl/config.txt
+                                     on Windows)
     --flat-playlist                  Do not extract the videos of a playlist,
                                      only list them.
 
index baa3a215657026245bf93960c53374bc3abdd61b..d42df6d96d92f7cde133e2bddfc769793920c37d 100644 (file)
@@ -48,6 +48,7 @@ from youtube_dl.utils import (
     intlist_to_bytes,
     args_to_str,
     parse_filesize,
+    version_tuple,
 )
 
 
@@ -143,6 +144,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
         self.assertEqual(unified_strdate('1968-12-10'), '19681210')
         self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
+        self.assertEqual(
+            unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
+            '20141126')
 
     def test_find_xpath_attr(self):
         testxml = '''<root>
@@ -220,6 +224,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('0s'), 0)
         self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
         self.assertEqual(parse_duration('T30M38S'), 1838)
+        self.assertEqual(parse_duration('5 s'), 5)
+        self.assertEqual(parse_duration('3 min'), 180)
+        self.assertEqual(parse_duration('2.5 hours'), 9000)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
@@ -376,6 +383,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_filesize('2 MiB'), 2097152)
         self.assertEqual(parse_filesize('5 GB'), 5000000000)
         self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+        self.assertEqual(parse_filesize('1,24 KB'), 1240)
+
+    def test_version_tuple(self):
+        self.assertEqual(version_tuple('1'), (1,))
+        self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
+        self.assertEqual(version_tuple('10.1-6'), (10, 1, 6))  # avconv style
 
 if __name__ == '__main__':
     unittest.main()
index f89ac4e1de513330467e3569a17ef624f2abc736..31531855e8e43294c155c752cda3e0c397e0921a 100755 (executable)
@@ -7,6 +7,7 @@ import collections
 import datetime
 import errno
 import io
+import itertools
 import json
 import locale
 import os
@@ -621,23 +622,17 @@ class YoutubeDL(object):
                 ie_result['url'], ie_key=ie_result.get('ie_key'),
                 extra_info=extra_info, download=False, process=False)
 
-            def make_result(embedded_info):
-                new_result = ie_result.copy()
-                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
-                          'entries', 'ie_key', 'duration',
-                          'subtitles', 'annotations', 'format',
-                          'thumbnail', 'thumbnails'):
-                    if f in new_result:
-                        del new_result[f]
-                    if f in embedded_info:
-                        new_result[f] = embedded_info[f]
-                return new_result
-            new_result = make_result(info)
+            new_result = ie_result.copy()
+            for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
+                      'entries', 'ie_key', 'duration',
+                      'subtitles', 'annotations', 'format',
+                      'thumbnail', 'thumbnails'):
+                if f in new_result:
+                    del new_result[f]
+                if f in info:
+                    new_result[f] = info[f]
 
             assert new_result.get('_type') != 'url_transparent'
-            if new_result.get('_type') == 'compat_list':
-                new_result['entries'] = [
-                    make_result(e) for e in new_result['entries']]
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
@@ -654,21 +649,28 @@ class YoutubeDL(object):
             if playlistend == -1:
                 playlistend = None
 
-            if isinstance(ie_result['entries'], list):
-                n_all_entries = len(ie_result['entries'])
-                entries = ie_result['entries'][playliststart:playlistend]
+            ie_entries = ie_result['entries']
+            if isinstance(ie_entries, list):
+                n_all_entries = len(ie_entries)
+                entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
-            else:
-                assert isinstance(ie_result['entries'], PagedList)
-                entries = ie_result['entries'].getslice(
+            elif isinstance(ie_entries, PagedList):
+                entries = ie_entries.getslice(
                     playliststart, playlistend)
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
                     (ie_result['extractor'], playlist, n_entries))
+            else:  # iterable
+                entries = list(itertools.islice(
+                    ie_entries, playliststart, playlistend))
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Downloading %d videos" %
+                    (ie_result['extractor'], playlist, n_entries))
 
             for i, entry in enumerate(entries, 1):
                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@@ -934,8 +936,12 @@ class YoutubeDL(object):
         if self.params.get('forceid', False):
             self.to_stdout(info_dict['id'])
         if self.params.get('forceurl', False):
-            # For RTMP URLs, also include the playpath
-            self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+            if info_dict.get('requested_formats') is not None:
+                for f in info_dict['requested_formats']:
+                    self.to_stdout(f['url'] + f.get('play_path', ''))
+            else:
+                # For RTMP URLs, also include the playpath
+                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
             self.to_stdout(info_dict['thumbnail'])
         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
index 27596687d0d2c354990e6112027e054865c1c79c..cd46693b34f4746284e74b36af3bddd7ba8155ee 100644 (file)
@@ -1,8 +1,10 @@
 from __future__ import unicode_literals
 
+import ctypes
 import getpass
 import optparse
 import os
+import platform
 import re
 import subprocess
 import sys
@@ -247,7 +249,7 @@ else:
                 userhome = compat_getenv('HOME')
             elif 'USERPROFILE' in os.environ:
                 userhome = compat_getenv('USERPROFILE')
-            elif not 'HOMEPATH' in os.environ:
+            elif 'HOMEPATH' not in os.environ:
                 return path
             else:
                 try:
@@ -326,6 +328,22 @@ def workaround_optparse_bug9161():
         optparse.OptionGroup.add_option = _compat_add_option
 
 
+if platform.python_implementation() == 'PyPy':
+    # PyPy expects byte strings as Windows function names
+    # https://github.com/rg3/youtube-dl/pull/4392
+    def compat_WINFUNCTYPE(*args, **kwargs):
+        real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+        def resf(tpl, *args, **kwargs):
+            funcname, dll = tpl
+            return real((str(funcname), dll), *args, **kwargs)
+
+        return resf
+else:
+    def compat_WINFUNCTYPE(*args, **kwargs):
+        return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
 __all__ = [
     'compat_HTTPError',
     'compat_chr',
@@ -349,6 +367,7 @@ __all__ = [
     'compat_urllib_request',
     'compat_urlparse',
     'compat_urlretrieve',
+    'compat_WINFUNCTYPE',
     'compat_xml_parse_error',
     'shlex_quote',
     'subprocess_check_output',
index 954beffd50e51db43ce203931d31ad2fbeee95dc..ad26cfa4085bbb028c7252aa9db2a8de3f7bd1e4 100644 (file)
@@ -4,6 +4,7 @@ import os
 import re
 import subprocess
 
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from .common import FileDownloader
 from ..utils import (
     compat_urlparse,
@@ -32,6 +33,9 @@ class HlsFD(FileDownloader):
             return False
         cmd = [program] + args
 
+        ffpp = FFmpegPostProcessor(downloader=self)
+        ffpp.check_version()
+
         retval = subprocess.call(cmd)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
index 8b513ffd1d903bd7a3fc1c4d8b63c461ca0a81cc..746ee69e4bbe5feca9aa9b86445bbda44a832918 100644 (file)
@@ -24,11 +24,13 @@ from .arte import (
 )
 from .audiomack import AudiomackIE
 from .auengine import AUEngineIE
+from .azubu import AzubuIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .bet import BetIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
@@ -49,7 +51,7 @@ from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
-from .cinemassacre import CinemassacreIE
+from .cinchcast import CinchcastIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
@@ -121,6 +123,8 @@ from .fktv import (
 from .flickr import FlickrIE
 from .folketinget import FolketingetIE
 from .fourtube import FourTubeIE
+from .foxgay import FoxgayIE
+from .foxnews import FoxNewsIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
@@ -216,6 +220,7 @@ from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
+from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
@@ -245,6 +250,7 @@ from .muzu import MuzuTVIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
+from .myvidster import MyVidsterIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
@@ -302,6 +308,7 @@ from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .quickvid import QuickVidIE
+from .radiode import RadioDeIE
 from .radiofrance import RadioFranceIE
 from .rai import RaiIE
 from .rbmaradio import RBMARadioIE
@@ -329,6 +336,7 @@ from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
+from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
@@ -519,7 +527,7 @@ from .youtube import (
     YoutubeUserIE,
     YoutubeWatchLaterIE,
 )
-from .zdf import ZDFIE
+from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import (
     ZingMp3SongIE,
     ZingMp3AlbumIE,
index 0d05cbb4b16b470aaa1a82d318c07f323ccd1bf7..39e4ca296f97a8fe20e65ab7160f9931fee89a67 100644 (file)
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
 
 
 class AdultSwimIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
-    _TEST = {
-        'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
+    _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
+
+    _TESTS = [{
+        'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
         'playlist': [
             {
-                'md5': '4da359ec73b58df4575cd01a610ba5dc',
-                'info_dict': {
-                    'id': '8a250ba1450996e901453d7f02ca02f5',
-                    'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
-            },
-            {
-                'md5': 'ffbdf55af9331c509d95350bd0cc1819',
+                'md5': '247572debc75c7652f253c8daa51a14d',
                 'info_dict': {
-                    'id': '8a250ba1450996e901453d7f4bd102f6',
+                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
                     'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
+                    'title': 'Rick and Morty - Pilot Part 1',
+                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+                },
             },
             {
-                'md5': 'b92409635540304280b4b6c36bd14a0a',
+                'md5': '77b0e037a4b20ec6b98671c4c379f48d',
                 'info_dict': {
-                    'id': '8a250ba1450996e901453d7fa73c02f7',
+                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
                     'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
+                    'title': 'Rick and Morty - Pilot Part 4',
+                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+                },
             },
+        ],
+        'info_dict': {
+            'title': 'Rick and Morty - Pilot',
+            'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+        }
+    }, {
+        'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
+        'playlist': [
             {
-                'md5': 'e8818891d60e47b29cd89d7b0278156d',
+                'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
                 'info_dict': {
-                    'id': '8a250ba1450996e901453d7fc8ba02f8',
+                    'id': '-t8CamQlQ2aYZ49ItZCFog-0',
                     'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
+                    'title': 'American Dad - Putting Francine Out of Business',
+                    'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+                },
             }
-        ]
-    }
-
-    _video_extensions = {
-        '3500': 'flv',
-        '640': 'mp4',
-        '150': 'mp4',
-        'ipad': 'm3u8',
-        'iphone': 'm3u8'
-    }
-    _video_dimensions = {
-        '3500': (1280, 720),
-        '640': (480, 270),
-        '150': (320, 180)
-    }
+        ],
+        'info_dict': {
+            'title': 'American Dad - Putting Francine Out of Business',
+            'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+        },
+    }]
+
+    @staticmethod
+    def find_video_info(collection, slug):
+        for video in collection.get('videos'):
+            if video.get('slug') == slug:
+                return video
+
+    @staticmethod
+    def find_collection_by_linkURL(collections, linkURL):
+        for collection in collections:
+            if collection.get('linkURL') == linkURL:
+                return collection
+
+    @staticmethod
+    def find_collection_containing_video(collections, slug):
+        for collection in collections:
+            for video in collection.get('videos'):
+                if video.get('slug') == slug:
+                    return collection, video
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_path = mobj.group('path')
-
-        webpage = self._download_webpage(url, video_path)
-        episode_id = self._html_search_regex(
-            r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
-            webpage, 'episode_id')
-        title = self._og_search_title(webpage)
-
-        index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
-        idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
-
-        episode_el = idoc.find('.//episode')
-        show_title = episode_el.attrib.get('collectionTitle')
-        episode_title = episode_el.attrib.get('title')
-        thumbnail = episode_el.attrib.get('thumbnailUrl')
-        description = episode_el.find('./description').text.strip()
+        show_path = mobj.group('show_path')
+        episode_path = mobj.group('episode_path')
+        is_playlist = True if mobj.group('is_playlist') else False
+
+        webpage = self._download_webpage(url, episode_path)
+
+        # Extract the value of `bootstrappedData` from the Javascript in the page.
+        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
+
+        try:
+            bootstrappedData = json.loads(bootstrappedDataJS)
+        except ValueError as ve:
+            errmsg = '%s: Failed to parse JSON ' % episode_path
+            raise ExtractorError(errmsg, cause=ve)
+
+        # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
+        # NOTE: We are only downloading one video (the current one) not the playlist
+        if is_playlist:
+            collections = bootstrappedData['playlists']['collections']
+            collection = self.find_collection_by_linkURL(collections, show_path)
+            video_info = self.find_video_info(collection, episode_path)
+
+            show_title = video_info['showTitle']
+            segment_ids = [video_info['videoPlaybackID']]
+        else:
+            collections = bootstrappedData['show']['collections']
+            collection, video_info = self.find_collection_containing_video(collections, episode_path)
+
+            show = bootstrappedData['show']
+            show_title = show['title']
+            segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
+
+        episode_id = video_info['id']
+        episode_title = video_info['title']
+        episode_description = video_info['description']
+        episode_duration = video_info.get('duration')
 
         entries = []
-        segment_els = episode_el.findall('./segments/segment')
+        for part_num, segment_id in enumerate(segment_ids):
+            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
 
-        for part_num, segment_el in enumerate(segment_els):
-            segment_id = segment_el.attrib.get('id')
-            segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
-            thumbnail = segment_el.attrib.get('thumbnailUrl')
-            duration = segment_el.attrib.get('duration')
+            segment_title = '%s - %s' % (show_title, episode_title)
+            if len(segment_ids) > 1:
+                segment_title += ' Part %d' % (part_num + 1)
 
-            segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
             idoc = self._download_xml(
                 segment_url, segment_title,
                 'Downloading segment information', 'Unable to download segment information')
 
+            segment_duration = idoc.find('.//trt').text.strip()
+
             formats = []
             file_els = idoc.findall('.//files/file')
 
             for file_el in file_els:
                 bitrate = file_el.attrib.get('bitrate')
-                type = file_el.attrib.get('type')
-                width, height = self._video_dimensions.get(bitrate, (None, None))
+                ftype = file_el.attrib.get('type')
+
                 formats.append({
-                    'format_id': '%s-%s' % (bitrate, type),
-                    'url': file_el.text,
-                    'ext': self._video_extensions.get(bitrate, 'mp4'),
+                    'format_id': '%s_%s' % (bitrate, ftype),
+                    'url': file_el.text.strip(),
                     # The bitrate may not be a number (for example: 'iphone')
                     'tbr': int(bitrate) if bitrate.isdigit() else None,
-                    'height': height,
-                    'width': width
+                    'quality': 1 if ftype == 'hd' else -1
                 })
 
             self._sort_formats(formats)
@@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor):
                 'id': segment_id,
                 'title': segment_title,
                 'formats': formats,
-                'uploader': show_title,
-                'thumbnail': thumbnail,
-                'duration': duration,
-                'description': description
+                'duration': segment_duration,
+                'description': episode_description
             })
 
         return {
             '_type': 'playlist',
             'id': episode_id,
-            'display_id': video_path,
+            'display_id': episode_path,
             'entries': entries,
-            'title': '%s %s' % (show_title, episode_title),
-            'description': description,
-            'thumbnail': thumbnail
+            'title': '%s %s' % (show_title, episode_title),
+            'description': episode_description,
+            'duration': episode_duration
         }
index 04386f7f780ca2c455088a3b224727d042cbd001..622b209899ec3dac2432ed3c7a1dadcf14537a5e 100644 (file)
@@ -26,13 +26,13 @@ class AudiomackIE(InfoExtractor):
         {
             'add_ie': ['Soundcloud'],
             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
-            'file': '172419696.mp3',
-            'info_dict':
-            {
+            'info_dict': {
+                'id': '172419696',
+                'ext': 'mp3',
                 'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
                 'title': 'Young Thug ft Lil Wayne - Take Kare',
-                'uploader':'Young Thug World',
-                'upload_date':'20141016',
+                'uploader': 'Young Thug World',
+                'upload_date': '20141016',
             }
         },
     ]
diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py
new file mode 100644 (file)
index 0000000..0961d33
--- /dev/null
@@ -0,0 +1,93 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class AzubuIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
+            'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
+            'info_dict': {
+                'id': '15575',
+                'ext': 'mp4',
+                'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
+                'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
+                'thumbnail': 're:^https?://.*\.jpe?g',
+                'timestamp': 1417523507.334,
+                'upload_date': '20141202',
+                'duration': 9988.7,
+                'uploader': 'GSL',
+                'uploader_id': 414310,
+                'view_count': int,
+            },
+        },
+        {
+            'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
+            'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
+            'info_dict': {
+                'id': '9344',
+                'ext': 'mp4',
+                'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
+                'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
+                'thumbnail': 're:^https?://.*\.jpe?g',
+                'timestamp': 1410530893.320,
+                'upload_date': '20140912',
+                'duration': 172.385,
+                'uploader': 'FnaticTV',
+                'uploader_id': 272749,
+                'view_count': int,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(
+            'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
+
+        title = data['title'].strip()
+        description = data['description']
+        thumbnail = data['thumbnail']
+        view_count = data['view_count']
+        uploader = data['user']['username']
+        uploader_id = data['user']['id']
+
+        stream_params = json.loads(data['stream_params'])
+
+        timestamp = float_or_none(stream_params['creationDate'], 1000)
+        duration = float_or_none(stream_params['length'], 1000)
+
+        renditions = stream_params.get('renditions') or []
+        video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
+        if video:
+            renditions.append(video)
+
+        formats = [{
+            'url': fmt['url'],
+            'width': fmt['frameWidth'],
+            'height': fmt['frameHeight'],
+            'vbr': float_or_none(fmt['encodingRate'], 1000),
+            'filesize': fmt['size'],
+            'vcodec': fmt['videoCodec'],
+            'container': fmt['videoContainer'],
+        } for fmt in renditions if fmt['url']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'view_count': view_count,
+            'formats': formats,
+        }
index beb6cfc8ae88a3c40ac2b4ee7f6b0ae2c6ddfc87..01c02d360cd7255b14aa7aa8259de52e44701884 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import re
 import xml.etree.ElementTree
 
 from .subtitles import SubtitlesInfoExtractor
index 31fdc0dcc0614babf4ff3b48186566904cfcc57a..1bdc25812b6afb4cf133007f2d12b89fd56b353f 100644 (file)
@@ -10,15 +10,15 @@ from ..utils import url_basename
 class BehindKinkIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
     _TEST = {
-        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
-        'md5': '41ad01222b8442089a55528fec43ec01',
+        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
         'info_dict': {
-            'id': '36370',
+            'id': '37127',
             'ext': 'mp4',
-            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
-            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
-            'upload_date': '20140814',
-            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+            'title': 'What are you passionate about – Marley Blaze',
+            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+            'upload_date': '20141205',
+            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
             'age_limit': 18,
         }
     }
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         display_id = mobj.group('id')
-        year = mobj.group('year')
-        month = mobj.group('month')
-        day = mobj.group('day')
-        upload_date = year + month + day
 
         webpage = self._download_webpage(url, display_id)
 
         video_url = self._search_regex(
-            r"'file':\s*'([^']+)'",
-            webpage, 'URL base')
-
-        video_id = url_basename(video_url)
-        video_id = video_id.split('_')[0]
+            r'<source src="([^"]+)"', webpage, 'video URL')
+        video_id = url_basename(video_url).split('_')[0]
+        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'url': video_url,
-            'ext': 'mp4',
             'title': self._og_search_title(webpage),
-            'display_id': display_id,
             'thumbnail': self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
             'upload_date': upload_date,
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py
new file mode 100644 (file)
index 0000000..c1fc433
--- /dev/null
@@ -0,0 +1,108 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    xpath_text,
+    xpath_with_ns,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class BetIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
+    _TESTS = [
+        {
+            'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
+            'info_dict': {
+                'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
+                'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
+                'ext': 'flv',
+                'title': 'BET News Presents: A Conversation With President Obama',
+                'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
+                'duration': 1534,
+                'timestamp': 1418075340,
+                'upload_date': '20141208',
+                'uploader': 'admin',
+                'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
+            'info_dict': {
+                'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
+                'display_id': 'justice-for-ferguson-a-community-reacts',
+                'ext': 'flv',
+                'title': 'Justice for Ferguson: A Community Reacts',
+                'description': 'A BET News special.',
+                'duration': 1696,
+                'timestamp': 1416942360,
+                'upload_date': '20141125',
+                'uploader': 'admin',
+                'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        media_url = compat_urllib_parse.unquote(self._search_regex(
+            [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
+            webpage, 'media URL'))
+
+        mrss = self._download_xml(media_url, display_id)
+
+        item = mrss.find('./channel/item')
+
+        NS_MAP = {
+            'dc': 'http://purl.org/dc/elements/1.1/',
+            'media': 'http://search.yahoo.com/mrss/',
+            'ka': 'http://kickapps.com/karss',
+        }
+
+        title = xpath_text(item, './title', 'title')
+        description = xpath_text(
+            item, './description', 'description', fatal=False)
+
+        video_id = xpath_text(item, './guid', 'video id', fatal=False)
+
+        timestamp = parse_iso8601(xpath_text(
+            item, xpath_with_ns('./dc:date', NS_MAP),
+            'upload date', fatal=False))
+        uploader = xpath_text(
+            item, xpath_with_ns('./dc:creator', NS_MAP),
+            'uploader', fatal=False)
+
+        media_content = item.find(
+            xpath_with_ns('./media:content', NS_MAP))
+        duration = int_or_none(media_content.get('duration'))
+        smil_url = media_content.get('url')
+
+        thumbnail = media_content.find(
+            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
+
+        formats = self._extract_smil_formats(smil_url, display_id)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+            'formats': formats,
+        }
index da47f27bdd6702d3927f3fde72fc0ebe064df53a..14b814120be3b8215a28fc00a95f87bd22e0c062 100644 (file)
@@ -4,13 +4,17 @@ import re
 
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
+
+from ..compat import (
+    compat_str,
     compat_urllib_request,
-    unescapeHTML,
-    parse_iso8601,
     compat_urlparse,
+)
+from ..utils import (
     clean_html,
-    compat_str,
+    int_or_none,
+    parse_iso8601,
+    unescapeHTML,
 )
 
 
@@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):
                 'uploader': 'NostalgiaCritic',
                 'uploader_id': '246467',
             }
-        }
+        },
+        {
+            # https://github.com/rg3/youtube-dl/pull/4404
+            'note': 'Audio only',
+            'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
+            'md5': '76c0a56f24e769ceaab21fbb6416a351',
+            'info_dict': {
+                'id': '7103299',
+                'ext': 'flv',
+                'title': 'Weekly Manga Recap: Kingdom',
+                'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
+                'timestamp': 1417660321,
+                'upload_date': '20141204',
+                'uploader': 'The Rollo T',
+                'uploader_id': '407429',
+                'duration': 7251,
+                'vcodec': 'none',
+            }
+        },
     ]
 
     def _real_extract(self, url):
@@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
                     'url': real_url,
                     'format_id': role,
                     'format_note': media_type,
-                    'vcodec': media_content.get(blip('vcodec')),
+                    'vcodec': media_content.get(blip('vcodec')) or 'none',
                     'acodec': media_content.get(blip('acodec')),
                     'filesize': media_content.get('filesize'),
-                    'width': int(media_content.get('width')),
-                    'height': int(media_content.get('height')),
+                    'width': int_or_none(media_content.get('width')),
+                    'height': int_or_none(media_content.get('height')),
                 })
         self._sort_formats(formats)
 
diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py
new file mode 100644 (file)
index 0000000..0c9a24b
--- /dev/null
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+    xpath_text,
+)
+
+
+class CinchcastIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
+    _TEST = {
+        # Actual test is run in generic, look for undergroundwellness
+        'url': 'http://player.cinchcast.com/?platformId=1&#038;assetType=single&#038;assetId=7141703',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        doc = self._download_xml(
+            'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
+            video_id)
+
+        item = doc.find('.//item')
+        title = xpath_text(item, './title', fatal=True)
+        date_str = xpath_text(
+            item, './{http://developer.longtailvideo.com/trac/}date')
+        upload_date = unified_strdate(date_str, day_first=False)
+        # duration is present but wrong
+        formats = []
+        formats.append({
+            'format_id': 'main',
+            'url': item.find(
+                './{http://search.yahoo.com/mrss/}content').attrib['url'],
+        })
+        backup_url = xpath_text(
+            item, './{http://developer.longtailvideo.com/trac/}backupContent')
+        if backup_url:
+            formats.append({
+                'preference': 2,  # seems to be more reliable
+                'format_id': 'backup',
+                'url': backup_url,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
index 7cbd846f60c22daf8343b242f9ef017bb3dcc60b..d302fe45fdea0bc7556fdbda4f321d64d86c2c7c 100644 (file)
@@ -118,6 +118,7 @@ class InfoExtractor(object):
 
     The following fields are optional:
 
+    alt_title:      A secondary title of the video.
     display_id      An alternative identifier for the video, not necessarily
                     unique, but available before title. Typically, id is
                     something like "4234987", title "Dancing naked mole rats",
@@ -129,7 +130,7 @@ class InfoExtractor(object):
                         * "resolution" (optional, string "{width}x{height"},
                                         deprecated)
     thumbnail:      Full URL to a video thumbnail image.
-    description:    One-line video description.
+    description:    Full video description.
     uploader:       Full name of the video uploader.
     timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
@@ -158,8 +159,8 @@ class InfoExtractor(object):
 
 
     _type "playlist" indicates multiple videos.
-    There must be a key "entries", which is a list or a PagedList object, each
-    element of which is a valid dictionary under this specfication.
+    There must be a key "entries", which is a list, an iterable, or a PagedList
+    object, each element of which is a valid dictionary by this specification.
 
     Additionally, playlists can have "title" and "id" attributes with the same
     semantics as videos (see above).
@@ -174,9 +175,10 @@ class InfoExtractor(object):
     _type "url" indicates that the video must be extracted from another
     location, possibly by a different extractor. Its only required key is:
     "url" - the next URL to extract.
-
-    Additionally, it may have properties believed to be identical to the
-    resolved entity, for example "title" if the title of the referred video is
+    The key "ie_key" can be set to the class name (minus the trailing "IE",
+    e.g. "Youtube") if the extractor class is known in advance.
+    Additionally, the dictionary may have any properties of the resolved entity
+    known in advance, for example "title" if the title of the referred video is
     known ahead of time.
 
 
@@ -443,7 +445,7 @@ class InfoExtractor(object):
         return video_info
 
     @staticmethod
-    def playlist_result(entries, playlist_id=None, playlist_title=None):
+    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
         """Returns a playlist"""
         video_info = {'_type': 'playlist',
                       'entries': entries}
@@ -451,6 +453,8 @@ class InfoExtractor(object):
             video_info['id'] = playlist_id
         if playlist_title:
             video_info['title'] = playlist_title
+        if playlist_description:
+            video_info['description'] = playlist_description
         return video_info
 
     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -794,6 +798,49 @@ class InfoExtractor(object):
         self._sort_formats(formats)
         return formats
 
+    # TODO: improve extraction
+    def _extract_smil_formats(self, smil_url, video_id):
+        smil = self._download_xml(
+            smil_url, video_id, 'Downloading SMIL file',
+            'Unable to download SMIL file')
+
+        base = smil.find('./head/meta').get('base')
+
+        formats = []
+        rtmp_count = 0
+        for video in smil.findall('./body/switch/video'):
+            src = video.get('src')
+            if not src:
+                continue
+            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+            width = int_or_none(video.get('width'))
+            height = int_or_none(video.get('height'))
+            proto = video.get('proto')
+            if not proto:
+                if base:
+                    if base.startswith('rtmp'):
+                        proto = 'rtmp'
+                    elif base.startswith('http'):
+                        proto = 'http'
+            ext = video.get('ext')
+            if proto == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+            elif proto == 'rtmp':
+                rtmp_count += 1
+                streamer = video.get('streamer') or base
+                formats.append({
+                    'url': streamer,
+                    'play_path': src,
+                    'ext': 'flv',
+                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+                    'tbr': bitrate,
+                    'width': width,
+                    'height': height,
+                })
+        self._sort_formats(formats)
+
+        return formats
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
@@ -823,7 +870,8 @@ class InfoExtractor(object):
         return res
 
     def _set_cookie(self, domain, name, value, expire_time=None):
-        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+        cookie = compat_cookiejar.Cookie(
+            0, name, value, None, None, domain, None,
             None, '/', True, False, expire_time, '', None, None, None)
         self._downloader.cookiejar.set_cookie(cookie)
 
index 2139f68aa3cb16facdc45b5fd9e014621e1c6674..1ad4e77a8a334dc0bfec62a0fb4752676e2e1435 100644 (file)
@@ -13,9 +13,10 @@ from ..compat import (
     compat_urllib_request,
 )
 from ..utils import (
-    urlencode_postdata,
     ExtractorError,
+    int_or_none,
     limit_length,
+    urlencode_postdata,
 )
 
 
@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '637842556329505',
             'ext': 'mp4',
-            'duration': 38,
             'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
         }
     }, {
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
         self._login()
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
             'id': video_id,
             'title': video_title,
             'url': video_url,
-            'duration': int(video_data['video_duration']),
-            'thumbnail': video_data['thumbnail_src'],
+            'duration': int_or_none(video_data.get('video_duration')),
+            'thumbnail': video_data.get('thumbnail_src'),
         }
diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py
new file mode 100644 (file)
index 0000000..08b8ea3
--- /dev/null
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FoxgayIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
+    _TEST = {
+        'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
+        'md5': '80d72beab5d04e1655a56ad37afe6841',
+        'info_dict': {
+            'id': '2582',
+            'ext': 'mp4',
+            'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
+            'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
+            'age_limit': 18,
+            'thumbnail': 're:https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<title>(?P<title>.*?)</title>',
+            webpage, 'title', fatal=False)
+        description = self._html_search_regex(
+            r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
+            webpage, 'description', fatal=False)
+
+        # Find the URL for the iFrame which contains the actual video.
+        iframe = self._download_webpage(
+            self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
+            video_id)
+        video_url = self._html_search_regex(
+            r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
+        thumb_url = self._html_search_regex(
+            r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+            'thumbnail': thumb_url,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
new file mode 100644 (file)
index 0000000..917f76b
--- /dev/null
@@ -0,0 +1,94 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    int_or_none,
+)
+
+
+class FoxNewsIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
+            'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
+            'info_dict': {
+                'id': '3937480',
+                'ext': 'flv',
+                'title': 'Frozen in Time',
+                'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+                'duration': 265,
+                'timestamp': 1304411491,
+                'upload_date': '20110503',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
+            'md5': '5846c64a1ea05ec78175421b8323e2df',
+            'info_dict': {
+                'id': '3922535568001',
+                'ext': 'mp4',
+                'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
+                'description': "Congressman discusses the president's executive action",
+                'duration': 292,
+                'timestamp': 1417662047,
+                'upload_date': '20141204',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
+
+        item = video['channel']['item']
+        title = item['title']
+        description = item['description']
+        timestamp = parse_iso8601(item['dc-date'])
+
+        media_group = item['media-group']
+        duration = None
+        formats = []
+        for media in media_group['media-content']:
+            attributes = media['@attributes']
+            video_url = attributes['url']
+            if video_url.endswith('.f4m'):
+                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
+            elif video_url.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
+            elif not video_url.endswith('.smil'):
+                duration = int_or_none(attributes.get('duration'))
+                formats.append({
+                    'url': video_url,
+                    'format_id': media['media-category']['@attributes']['label'],
+                    'preference': 1,
+                    'vbr': int_or_none(attributes.get('bitrate')),
+                    'filesize': int_or_none(attributes.get('fileSize'))
+                })
+        self._sort_formats(formats)
+
+        media_thumbnail = media_group['media-thumbnail']['@attributes']
+        thumbnails = [{
+            'url': media_thumbnail['url'],
+            'width': int_or_none(media_thumbnail.get('width')),
+            'height': int_or_none(media_thumbnail.get('height')),
+        }] if media_thumbnail else []
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
index 328301de396e5dd289b139808754ef20e1af652b..2b4d8c62f5696fce9c144bac7c1867e552d3e625 100644 (file)
@@ -467,8 +467,17 @@ class GenericIE(InfoExtractor):
             'expected_warnings': [
                 'URL could be a direct video link, returning it as such.'
             ]
-        }
-
+        },
+        # Cinchcast embed
+        {
+            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+            'info_dict': {
+                'id': '7141703',
+                'ext': 'mp3',
+                'upload_date': '20141126',
+                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+            }
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -962,6 +971,13 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'SBS')
 
+        # Look for embedded Cinchcast player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Cinchcast')
+
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
             webpage)
diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py
new file mode 100644 (file)
index 0000000..14934b7
--- /dev/null
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_filesize,
+)
+
+
+class MinhatecaIE(InfoExtractor):
+    _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
+    _TEST = {
+        'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
+        'info_dict': {
+            'id': '125848331',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'filesize_approx': 1530000,
+            'duration': 9,
+            'view_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        token = self._html_search_regex(
+            r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
+            webpage, 'request token')
+        token_data = [
+            ('fileId', video_id),
+            ('__RequestVerificationToken', token),
+        ]
+        req = compat_urllib_request.Request(
+            'http://minhateca.com.br/action/License/Download',
+            data=compat_urllib_parse.urlencode(token_data))
+        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        data = self._download_json(
+            req, video_id, note='Downloading metadata')
+
+        video_url = data['redirectUrl']
+        title_str = self._html_search_regex(
+            r'<h1.*?>(.*?)</h1>', webpage, 'title')
+        title, _, ext = title_str.rpartition('.')
+        filesize_approx = parse_filesize(self._html_search_regex(
+            r'<p class="fileSize">(.*?)</p>',
+            webpage, 'file size approximation', fatal=False))
+        duration = parse_duration(self._html_search_regex(
+            r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
+            webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r'<p class="downloadsCounter">([0-9]+)</p>',
+            webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': ext,
+            'filesize_approx': filesize_approx,
+            'duration': duration,
+            'view_count': view_count,
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
index bb8937c4d53d33df6b560aff7d56df80740bf1cc..55cc33a3e4b94014f9d4642eff1d1a3d6dc26d05 100644 (file)
@@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor):
             raise ExtractorError('Unable to extract track url')
 
         PREFIX = (
-            r'<div class="cloudcast-play-button-container[^"]*?"'
+            r'<span class="play-button[^"]*?"'
             r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
         title = self._html_search_regex(
             PREFIX + r'm-title="([^"]+)"', webpage, 'title')
index e626146705cfcc961ebcffad93c889b4a4f90e62..83414a2325586d7319c06247fa037c42bb2b199a 100644 (file)
@@ -88,6 +88,7 @@ class MySpaceIE(InfoExtractor):
                 self.report_warning(
                     '%s: No downloadable song on this page' % video_id)
                 return
+
             def search_data(name):
                 return self._search_regex(
                     r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py
new file mode 100644 (file)
index 0000000..a94ab83
--- /dev/null
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MyVidsterIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
+
+    _TEST = {
+        'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
+        'md5': '95296d0231c1363222c3441af62dc4ca',
+        'info_dict': {
+            'id': '3685814',
+            'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
+            'upload_date': '20141027',
+            'uploader_id': 'utkualp',
+            'ext': 'mp4',
+            'age_limit': 18,
+        },
+        'add_ie': ['XHamster'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        return self.url_result(self._html_search_regex(
+            r'rel="videolink" href="(?P<real_url>.*)">',
+            webpage, 'real video url'))
index f69fe0925ee4d5d68699f09d84a568ce34f787ec..862b706bf96719aa071f1f89c73f2a4ef45a20b1 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     remove_end,
@@ -10,8 +8,8 @@ from ..utils import (
 
 
 class NBAIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
-    _TEST = {
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
+    _TESTS = [{
         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
         'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
         'info_dict': {
@@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):
             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
             'duration': 181,
         },
-    }
+    }, {
+        'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
@@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):
 
         description = self._og_search_description(webpage)
         duration = parse_duration(
-            self._html_search_meta('duration', webpage, 'duration', fatal=False))
+            self._html_search_meta('duration', webpage, 'duration'))
 
         return {
             'id': shortened_video_id,
index 0244368e954a37715ca8b6b750f88059f999376e..b2f40344f59d75caf94028167dcf5db7ce0f83fd 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import re
 import json
+import os
 
 from .common import InfoExtractor
 from ..compat import (
@@ -26,7 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
         initial_video_url = info['publishPoint']
         if info['formats'] == '1':
             parsed_url = compat_urllib_parse_urlparse(initial_video_url)
-            path = parsed_url.path.replace('.', '_sd.', 1)
+            filename, ext = os.path.splitext(parsed_url.path)
+            path = '%s_sd%s' % (filename, ext)
             data = compat_urllib_parse.urlencode({
                 'type': 'fvod',
                 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
index 13c8d79cd8ac6346dbe4e9810bc8bf0b20825dcc..ee740cd9c0fe71a48b79aee00c40ea610e81ea99 100644 (file)
@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
                 'rtmp_conn': 'B:1',
                 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                 'page_url': 'http://www.ntv.ru',
-                'flash_ver': 'LNX 11,2,202,341',
+                'flash_version': 'LNX 11,2,202,341',
                 'rtmp_live': True,
                 'ext': 'flv',
                 'filesize': int(size.text),
index bac484c67dbb01bbafa319c117b7c6d152b7dd5d..954dfccb75954d50a9a46bc14bdb1d0dcbd5588c 100644 (file)
@@ -8,7 +8,6 @@ from ..utils import (
     int_or_none,
     js_to_json,
     qualities,
-    determine_ext,
 )
 
 
@@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):
         thumbnail = self._search_regex(
             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
 
-        quality = qualities(['SD', 'HD'])
-        formats = [{
-            'url': source['file'],
-            'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
-            'quality': quality(source['label']),
-        } for source in json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
+        quality = qualities(['sd', 'hd'])
+        sources = json.loads(js_to_json(self._search_regex(
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+        formats = []
+        for container, s in sources.items():
+            for qname, video_url in s.items():
+                formats.append({
+                    'url': video_url,
+                    'container': container,
+                    'format_id': '%s-%s' % (container, qname),
+                    'quality': quality(qname),
+                })
         self._sort_formats(formats)
 
         return {
index 32d747ede0188a7347637aa1ee8075161ec5c1f8..1262793c820f335dfe940eafb224495af168495e 100644 (file)
@@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Im Interview: Kai Wiesinger',
                 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
-                'upload_date': '20140225',
+                'upload_date': '20140203',
                 'duration': 522.56,
             },
             'params': {
@@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
                 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
-                'upload_date': '20140225',
+                'upload_date': '20141014',
                 'duration': 2410.44,
             },
             'params': {
@@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+            'info_dict': {
+                'id': '439664',
+                'title': 'Episode 8 - Ganze Folge - Playlist',
+                'description': 'md5:63b8963e71f481782aeea877658dec84',
+            },
+            'playlist_count': 2,
+        },
     ]
 
     _CLIPID_REGEXES = [
         r'"clip_id"\s*:\s+"(\d+)"',
         r'clipid: "(\d+)"',
         r'clip[iI]d=(\d+)',
+        r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
     ]
     _TITLE_REGEXES = [
         r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
         r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
         r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
     ]
+    _PAGE_TYPE_REGEXES = [
+        r'<meta name="page_type" content="([^"]+)">',
+        r"'itemType'\s*:\s*'([^']*)'",
+    ]
+    _PLAYLIST_ID_REGEXES = [
+        r'content[iI]d=(\d+)',
+        r"'itemId'\s*:\s*'([^']*)'",
+    ]
+    _PLAYLIST_CLIP_REGEXES = [
+        r'(?s)data-qvt=.+?<a href="([^"]+)"',
+    ]
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
+    def _extract_clip(self, url, webpage):
         clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
 
         access_token = 'testclient'
@@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+    def _extract_playlist(self, url, webpage):
+        playlist_id = self._html_search_regex(
+            self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
+        for regex in self._PLAYLIST_CLIP_REGEXES:
+            playlist_clips = re.findall(regex, webpage)
+            if playlist_clips:
+                title = self._html_search_regex(
+                    self._TITLE_REGEXES, webpage, 'title')
+                description = self._html_search_regex(
+                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+                entries = [
+                    self.url_result(
+                        re.match('(.+?//.+?)/', url).group(1) + clip_path,
+                        'ProSiebenSat1')
+                    for clip_path in playlist_clips]
+                return self.playlist_result(entries, playlist_id, title, description)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        page_type = self._search_regex(
+            self._PAGE_TYPE_REGEXES, webpage,
+            'page type', default='clip').lower()
+        if page_type == 'clip':
+            return self._extract_clip(url, webpage)
+        elif page_type == 'playlist':
+            return self._extract_playlist(url, webpage)
diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py
new file mode 100644 (file)
index 0000000..f95bc94
--- /dev/null
@@ -0,0 +1,55 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+
+
+class RadioDeIE(InfoExtractor):
+    IE_NAME = 'radio.de'
+    _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
+    _TEST = {
+        'url': 'http://ndr2.radio.de/',
+        'md5': '3b4cdd011bc59174596b6145cda474a4',
+        'info_dict': {
+            'id': 'ndr2',
+            'ext': 'mp3',
+            'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:591c49c702db1a33751625ebfb67f273',
+            'thumbnail': 're:^https?://.*\.png',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        radio_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, radio_id)
+
+        broadcast = json.loads(self._search_regex(
+            r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
+            webpage, 'broadcast'))
+
+        title = self._live_title(broadcast['name'])
+        description = broadcast.get('description') or broadcast.get('shortDescription')
+        thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
+
+        formats = [{
+            'url': stream['streamUrl'],
+            'ext': stream['streamContentFormat'].lower(),
+            'acodec': stream['streamContentFormat'],
+            'abr': stream['bitRate'],
+            'asr': stream['sampleRate']
+        } for stream in broadcast['streamUrls']]
+        self._sort_formats(formats)
+
+        return {
+            'id': radio_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'is_live': True,
+            'formats': formats,
+        }
similarity index 50%
rename from youtube_dl/extractor/cinemassacre.py
rename to youtube_dl/extractor/screenwavemedia.py
index b7fa73c3bfc8f8c290d899662ce0cb102ac86670..6c9fdb7c1aceb35efc166c9207fd503603040b9b 100644 (file)
@@ -5,61 +5,27 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
     int_or_none,
+    unified_strdate,
 )
 
 
-class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': '19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': '521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        }
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-
-        webpage = self._download_webpage(url, display_id)
-        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-        video_id = mobj.group('video_id')
-        full_video_id = mobj.group('full_video_id')
+class ScreenwaveMediaIE(InfoExtractor):
+    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
 
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
+    _TESTS = [{
+        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
+        'only_matching': True,
+    }]
 
-        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
 
+        vidtitle = self._search_regex(
+            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
         vidurl = self._search_regex(
-            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
 
         videolist_url = None
 
@@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):
         if mobj:
             videoserver = mobj.group('videoserver')
             mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
-            vidid = mobj.group('vidid') if mobj else full_video_id
+            vidid = mobj.group('vidid') if mobj else video_id
             videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
         else:
             mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
@@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):
                 file_ = src.partition(':')[-1]
                 width = int_or_none(video.get('width'))
                 height = int_or_none(video.get('height'))
-                bitrate = int_or_none(video.get('system-bitrate'))
+                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
                 format = {
                     'url': baseurl + file_,
                     'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                 }
                 if width or height:
                     format.update({
-                        'tbr': bitrate // 1000 if bitrate else None,
+                        'tbr': bitrate,
                         'width': width,
                         'height': height,
                     })
                 else:
                     format.update({
-                        'abr': bitrate // 1000 if bitrate else None,
+                        'abr': bitrate,
                         'vcodec': 'none',
                     })
                 formats.append(format)
-            self._sort_formats(formats)
         else:
             formats = [{
                 'url': vidurl,
             }]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': video_title,
+            'title': vidtitle,
             'formats': formats,
+        }
+
+
+class CinemassacreIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+    _TESTS = [
+        {
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'info_dict': {
+                'id': 'Cinemassacre-19911',
+                'ext': 'mp4',
+                'upload_date': '20121110',
+                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+            },
+        },
+        {
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'info_dict': {
+                'id': 'Cinemassacre-521be8ef82b16',
+                'ext': 'mp4',
+                'upload_date': '20131002',
+                'title': 'The Mummy’s Hand (1940)',
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
+            webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
+
+
+class TeamFourIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
+    _TEST = {
+        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
+        'info_dict': {
+            'id': 'TeamFourStar-5292a02f20bfa',
+            'ext': 'mp4',
+            'upload_date': '20130401',
+            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
+            'title': 'A Moment With TFS Episode 4',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+
+        video_title = self._html_search_regex(
+            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
+            webpage, 'title')
+        video_date = unified_strdate(self._html_search_regex(
+            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
+            webpage, 'date', fatal=False))
+        video_description = self._html_search_regex(
+            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
+            webpage, 'description', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
             'description': video_description,
             'upload_date': video_date,
             'thumbnail': video_thumbnail,
+            'url': playerdata_url,
         }
index 0751efc6111c96ca1c089c66183429f9bde6147c..646af3cc9c9686b7d09fdc87b305c0b7c6c0f8ce 100644 (file)
@@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):
         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
 
         if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
-            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
+            raise ExtractorError(
+                'Broadcast %s does not exist' % broadcast_id, expected=True)
 
         # Adult content
         if re.search('EroConfirmText">', broadcast_page) is not None:
 
             (username, password) = self._get_login_info()
             if username is None:
-                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
-                                     'use --username and --password options to provide account credentials.', expected=True)
+                raise ExtractorError(
+                    'Erotic broadcasts allowed only for registered users, '
+                    'use --username and --password options to provide account credentials.',
+                    expected=True)
 
             login_form = {
                 'login-hint53': '1',
@@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):
                 'password': password,
             }
 
-            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+            request = compat_urllib_request.Request(
+                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
             request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+            broadcast_page = self._download_webpage(
+                request, broadcast_id, 'Logging in and confirming age')
 
             if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                 raise ExtractorError('Unable to log in: bad username or password', expected=True)
@@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):
             adult_content = False
 
         ticket = self._html_search_regex(
-            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
             broadcast_page, 'broadcast ticket')
 
         url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
@@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):
         if broadcast_password:
             url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
 
-        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
+        broadcast_json_page = self._download_webpage(
+            url, broadcast_id, 'Downloading broadcast JSON')
 
         try:
             broadcast_json = json.loads(broadcast_json_page)
 
             protected_broadcast = broadcast_json['_pass_protected'] == 1
             if protected_broadcast and not broadcast_password:
-                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError(
+                    'This broadcast is protected by a password, use the --video-password option',
+                    expected=True)
 
             broadcast_offline = broadcast_json['is_play'] == 0
             if broadcast_offline:
                 raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
 
             rtmp_url = broadcast_json['_server']
-            if not rtmp_url.startswith('rtmp://'):
+            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
+            if not mobj:
                 raise ExtractorError('Unexpected broadcast rtmp URL')
 
             broadcast_playpath = broadcast_json['_streamName']
+            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
             broadcast_thumbnail = broadcast_json['_imgURL']
-            broadcast_title = broadcast_json['title']
+            broadcast_title = self._live_title(broadcast_json['title'])
             broadcast_description = broadcast_json['description']
             broadcaster_nick = broadcast_json['nick']
             broadcaster_login = broadcast_json['login']
@@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):
             'age_limit': 18 if adult_content else 0,
             'ext': 'flv',
             'play_path': broadcast_playpath,
+            'player_url': 'http://pics.smotri.com/broadcast_play.swf',
+            'app': broadcast_app,
             'rtmp_live': True,
-            'rtmp_conn': rtmp_conn
+            'rtmp_conn': rtmp_conn,
+            'is_live': True,
         }
index b870474515ba61ee33641c86554d53d68a6bf46d..bfe07b02417a2a44f23a09c10c25d48ec18b5535 100644 (file)
@@ -4,10 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import parse_filesize
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
@@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor):
             'description': 'md5:69da3c61275b426426d711bde96463ab',
             'thumbnail': 're:^http:.*\.jpg$',
         },
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
+        'md5': '3c54c1f6243d279b706bde660ceec633',
+        'info_dict': {
+            'id': '5727',
+            'ext': 'mp4',
+            'description': 'md5:695c01bfd98b7e313c501386327aea59',
+            'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
+            'thumbnail': 're:^http:.*\.jpg$',
+        }
     }]
 
     _FORMATS = {
@@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        if video_id.startswith('-'):
-            display_id = video_id.strip('-')
-        else:
-            display_id = video_id
-
+        video_id = self._match_id(url)
+        display_id = video_id.lstrip('-')
         webpage = self._download_webpage(url, display_id)
 
-        playerpage = self._download_webpage(
-            'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
-            display_id, 'Downloading player page')
-
-        medias = re.findall(
-            r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
-            playerpage)
+        player_url = self._html_search_meta(
+            'twitter:player', webpage, 'player URL', default=None)
+        if player_url:
+            playerpage = self._download_webpage(
+                player_url, display_id, 'Downloading player page')
 
-        formats = []
-        for url, ext, res in medias:
-            f = {
-                'format_id': res + '_' + ext,
-                'url': url,
-                'ext': ext,
-            }
-            f.update(self._FORMATS.get(res, {}))
-            formats.append(f)
+            medias = re.findall(
+                r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+                playerpage)
+            formats = []
+            for url, ext, res in medias:
+                f = {
+                    'format_id': res + '_' + ext,
+                    'url': url,
+                    'ext': ext,
+                }
+                f.update(self._FORMATS.get(res, {}))
+                formats.append(f)
+            thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+            title = self._og_search_title(webpage).strip()
+            description = self._og_search_description(webpage).strip()
+        else:
+            download_text = self._search_regex(
+                r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
+                webpage, 'download links')
+            links = re.finditer(
+                r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+                download_text)
+            formats = []
+            for l in links:
+                format_id = self._search_regex(
+                    r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
+                format = {
+                    'format_id': format_id,
+                    'url': l.group('url'),
+                    'format_name': l.group('name'),
+                }
+                m = re.match(
+                    r'''(?x)
+                        Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+                        (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+                        (?P<vbr>[0-9]+)kbps&\#10;
+                        Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+                        Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+                    l.group('title'))
+                if m:
+                    format.update({
+                        'format_note': m.group('audio_desc'),
+                        'vcodec': m.group('vcodec'),
+                        'width': int(m.group('width')),
+                        'height': int(m.group('height')),
+                        'abr': int(m.group('abr')),
+                        'vbr': int(m.group('vbr')),
+                        'filesize_approx': parse_filesize(m.group('filesize_approx')),
+                    })
+                formats.append(format)
+            thumbnail_fn = self._search_regex(
+                r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
+                webpage, 'thumbnail', fatal=False)
+            description = self._html_search_regex(
+                r'(?s)<p class="teasertext">(.*?)</p>',
+                webpage, 'description', fatal=False)
+            title = self._html_search_regex(
+                r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
 
         self._sort_formats(formats)
-
-        thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+        thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
 
         return {
             'id': display_id,
-            'title': self._og_search_title(webpage).strip(),
-            'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+            'title': title,
+            'thumbnail': thumbnail,
             'formats': formats,
-            'description': self._og_search_description(webpage).strip(),
+            'description': description,
         }
index d81d1d1a67cef49d4f612f08bfb5b7b7002b51fd..ba65996dc01646e019cfd5820aa36c1934365d9b 100644 (file)
@@ -1,32 +1,30 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     float_or_none,
-    str_to_int,
+    parse_age_limit,
 )
 
 
 class TvigleIE(InfoExtractor):
     IE_NAME = 'tvigle'
     IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
+    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
 
     _TESTS = [
         {
-            'url': 'http://www.tvigle.ru/video/brat/',
-            'md5': 'ff4344a4894b0524441fb6f8218dc716',
+            'url': 'http://www.tvigle.ru/video/sokrat/',
+            'md5': '36514aed3657d4f70b4b2cef8eb520cd',
             'info_dict': {
-                'id': '5118490',
-                'display_id': 'brat',
-                'ext': 'mp4',
-                'title': 'Ð\91рат',
-                'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
-                'duration': 5722.6,
-                'age_limit': 16,
+                'id': '1848932',
+                'display_id': 'sokrat',
+                'ext': 'flv',
+                'title': 'Сократ',
+                'description': 'md5:a05bd01be310074d5833efc6743be95e',
+                'duration': 6586,
+                'age_limit': 0,
             },
         },
         {
@@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
@@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):
         title = item['title']
         description = item['description']
         thumbnail = item['thumbnail']
-        duration = float_or_none(item['durationMilliseconds'], 1000)
-        age_limit = str_to_int(item['ageRestrictions'])
+        duration = float_or_none(item.get('durationMilliseconds'), 1000)
+        age_limit = parse_age_limit(item.get('ageRestrictions'))
 
         formats = []
         for vcodec, fmts in item['videos'].items():
index eb94737546fb725e992249d356588417d90ad81a..9a53a3c74143d72a14842ea70ce4063a8d28a30c 100644 (file)
@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
     parse_iso8601,
     qualities,
 )
@@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor):
             'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
 
         if video['is_geo_blocked']:
-            raise ExtractorError(
-                'This content is not available in your country due to copyright reasons', expected=True)
+            self.report_warning(
+                'This content might not be available in your country due to copyright reasons')
 
         streams = self._download_json(
             'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
index 36aa1ad6ec578859d90c947dee9c39213dcfda59..397d167e89cef763d977b657f2998ae6a363dec3 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
@@ -5,6 +6,8 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
     ExtractorError,
     parse_iso8601,
 )
@@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor):
         """
     _PAGE_LIMIT = 100
     _API_BASE = 'https://api.twitch.tv'
+    _LOGIN_URL = 'https://secure.twitch.tv/user/login'
     _TESTS = [{
         'url': 'http://www.twitch.tv/riotgames/b/577357806',
         'info_dict': {
@@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor):
             'view_count': info['views'],
         }
 
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        authenticity_token = self._search_regex(
+            r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
+            login_page, 'authenticity token')
+
+        login_form = {
+            'utf8': '✓'.encode('utf-8'),
+            'authenticity_token': authenticity_token,
+            'redirect_on_login': '',
+            'embed_form': 'false',
+            'mp_source_action': '',
+            'follow': '',
+            'user[login]': username,
+            'user[password]': password,
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Referer', self._LOGIN_URL)
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        m = re.search(
+            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
+        if m:
+            raise ExtractorError(
+                'Unable to login: %s' % m.group('msg').strip(), expected=True)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         if mobj.group('chapterid'):
index 0e4d386a8ba32387f6f9025e633efb4c1ee59700..5271611ac9f883af6a63e371b919c68398adaca0 100644 (file)
@@ -97,11 +97,8 @@ class UdemyIE(InfoExtractor):
         if 'returnUrl' not in response:
             raise ExtractorError('Unable to log in')
 
-
-
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        lecture_id = mobj.group('id')
+        lecture_id = self._match_id(url)
 
         lecture = self._download_json(
             'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,
index 42995226e584b0ce4e0d207b6b702157ec6f4030..0b58fe0fe0b5188e9c9865e56ce064e94dbc45e5 100644 (file)
@@ -17,6 +17,7 @@ class VineIE(InfoExtractor):
             'id': 'b9KOOWX7HUx',
             'ext': 'mp4',
             'title': 'Chicken.',
+            'alt_title': 'Vine by Jack Dorsey',
             'description': 'Chicken.',
             'upload_date': '20130519',
             'uploader': 'Jack Dorsey',
@@ -25,30 +26,26 @@ class VineIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
 
         data = json.loads(self._html_search_regex(
             r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
 
-        formats = [
-            {
-                'url': data['videoLowURL'],
-                'ext': 'mp4',
-                'format_id': 'low',
-            },
-            {
-                'url': data['videoUrl'],
-                'ext': 'mp4',
-                'format_id': 'standard',
-            }
-        ]
+        formats = [{
+            'url': data['videoLowURL'],
+            'ext': 'mp4',
+            'format_id': 'low',
+        }, {
+            'url': data['videoUrl'],
+            'ext': 'mp4',
+            'format_id': 'standard',
+        }]
 
         return {
             'id': video_id,
             'title': self._og_search_title(webpage),
+            'alt_title': self._og_search_description(webpage),
             'description': data['description'],
             'thumbnail': data['thumbnailUrl'],
             'upload_date': unified_strdate(data['created']),
@@ -63,29 +60,36 @@ class VineIE(InfoExtractor):
 
 class VineUserIE(InfoExtractor):
     IE_NAME = 'vine:user'
-    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
+    _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
     _VINE_BASE_URL = "https://vine.co/"
-    _TEST = {
-        'url': 'https://vine.co/Visa',
-        'info_dict': {
-            'id': 'Visa',
+    _TESTS = [
+        {
+            'url': 'https://vine.co/Visa',
+            'info_dict': {
+                'id': 'Visa',
+            },
+            'playlist_mincount': 46,
         },
-        'playlist_mincount': 46,
-    }
+        {
+            'url': 'https://vine.co/u/941705360593584128',
+            'only_matching': True,
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         user = mobj.group('user')
+        u = mobj.group('u')
 
-        profile_url = "%sapi/users/profiles/vanity/%s" % (
-            self._VINE_BASE_URL, user)
+        profile_url = "%sapi/users/profiles/%s%s" % (
+            self._VINE_BASE_URL, 'vanity/' if not u else '', user)
         profile_data = self._download_json(
             profile_url, user, note='Downloading user profile data')
 
         user_id = profile_data['data']['userId']
         timeline_data = []
         for pagenum in itertools.count(1):
-            timeline_url = "%sapi/timelines/users/%s?page=%s" % (
+            timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
                 self._VINE_BASE_URL, user_id, pagenum)
             timeline_page = self._download_json(
                 timeline_url, user, note='Downloading page %d' % pagenum)
index f7e2e8ac9594ef45a1d329c6359e447796b70f4b..8c6241aedf7249343a725ab705968d0af963294a 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_chr,
@@ -25,6 +27,7 @@ class XMinusIE(InfoExtractor):
             'tbr': 320,
             'filesize_approx': 5900000,
             'view_count': int,
+            'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
         }
     }
 
@@ -48,6 +51,11 @@ class XMinusIE(InfoExtractor):
         view_count = int_or_none(self._html_search_regex(
             r'<div class="quality.*?► ([0-9]+)',
             webpage, 'view count', fatal=False))
+        description = self._html_search_regex(
+            r'(?s)<div id="song_texts">(.*?)</div><br',
+            webpage, 'song lyrics', fatal=False)
+        if description:
+            description = re.sub(' *\r *', '\n', description)
 
         enc_token = self._html_search_regex(
             r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
@@ -64,4 +72,5 @@ class XMinusIE(InfoExtractor):
             'filesize_approx': filesize_approx,
             'tbr': tbr,
             'view_count': view_count,
+            'description': description,
         }
index 8123928be982dbaccb98638ea0b915007ebc7066..d9c06a2ee6d934391560b6ca7db4ebecf40ff1d0 100644 (file)
@@ -45,7 +45,9 @@ class YouPornIE(InfoExtractor):
         age_limit = self._rta_search(webpage)
 
         # Get JSON parameters
-        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
+        json_params = self._search_regex(
+            r'var currentVideo = new Video\((.*)\)[,;]',
+            webpage, 'JSON parameters')
         try:
             params = json.loads(json_params)
         except:
index 1cba40387a0aae4a8f3913c78ddacc0f73b2e367..7b6179a2abd1261b787f1b19486ab04af31feddf 100644 (file)
@@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
     compat_chr,
     compat_parse_qs,
     compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
     compat_str,
-
+)
+from ..utils import (
     clean_html,
-    get_element_by_id,
-    get_element_by_attribute,
     ExtractorError,
+    get_element_by_attribute,
+    get_element_by_id,
     int_or_none,
     OnDemandPagedList,
+    orderedSet,
     unescapeHTML,
     unified_strdate,
-    orderedSet,
     uppercase_escape,
 )
 
@@ -44,9 +45,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _LOGIN_REQUIRED = False
 
     def _set_language(self):
-        self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+        self._set_cookie(
+            '.youtube.com', 'PREF', 'f1=50000000&hl=en',
             # YouTube sets the expire time to about two months
-            expire_time=time.time() + 60*24*3600)
+            expire_time=time.time() + 2 * 30 * 24 * 3600)
 
     def _login(self):
         """
@@ -416,6 +418,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'upload_date': '20140605',
             },
         },
+        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+        {
+            'url': '__2ABJjxzNo',
+            'info_dict': {
+                'id': '__2ABJjxzNo',
+                'ext': 'mp4',
+                'upload_date': '20100430',
+                'uploader_id': 'deadmau5',
+                'description': 'md5:12c56784b8032162bb936a5f76d55360',
+                'uploader': 'deadmau5',
+                'title': 'Deadmau5 - Some Chords (HD)',
+            },
+            'expected_warnings': [
+                'DASH manifest missing',
+            ]
+        },
+        # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+        {
+            'url': 'lqQg6PlCWgI',
+            'info_dict': {
+                'id': 'lqQg6PlCWgI',
+                'ext': 'mp4',
+                'upload_date': '20120731',
+                'uploader_id': 'olympic',
+                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+                'uploader': 'Olympics',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -665,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 
+    def _parse_dash_manifest(
+            self, video_id, dash_manifest_url, player_url, age_gate):
+        def decrypt_sig(mobj):
+            s = mobj.group(1)
+            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+            return '/signature/%s' % dec_s
+        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
+        dash_doc = self._download_xml(
+            dash_manifest_url, video_id,
+            note='Downloading DASH manifest',
+            errnote='Could not download DASH manifest')
+
+        formats = []
+        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+            if url_el is None:
+                continue
+            format_id = r.attrib['id']
+            video_url = url_el.text
+            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+            f = {
+                'format_id': format_id,
+                'url': video_url,
+                'width': int_or_none(r.attrib.get('width')),
+                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                'filesize': filesize,
+                'fps': int_or_none(r.attrib.get('frameRate')),
+            }
+            try:
+                existing_format = next(
+                    fo for fo in formats
+                    if fo['format_id'] == format_id)
+            except StopIteration:
+                f.update(self._formats.get(format_id, {}))
+                formats.append(f)
+            else:
+                existing_format.update(f)
+        return formats
+
     def _real_extract(self, url):
         proto = (
             'http' if self._downloader.params.get('prefer_insecure', False)
@@ -722,9 +796,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 # We fallback to the get_video_info pages (used by the embed page)
                 self.report_video_info_webpage_download(video_id)
                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                    video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                        % (video_id, el_type))
-                    video_info_webpage = self._download_webpage(video_info_url,
+                    video_info_url = (
+                        '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                        % (proto, video_id, el_type))
+                    video_info_webpage = self._download_webpage(
+                        video_info_url,
                         video_id, note=False,
                         errnote='unable to download video info webpage')
                     video_info = compat_parse_qs(video_info_webpage)
@@ -797,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
-            video_webpage, 'categories', fatal=False)
+            video_webpage, 'categories', default=None)
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -875,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'url': video_info['conn'][0],
                 'player_url': player_url,
             }]
-        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -940,51 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # Look for the DASH manifest
         if self._downloader.params.get('youtube_include_dash_manifest', True):
-            try:
-                # The DASH manifest used needs to be the one from the original video_webpage.
-                # The one found in get_video_info seems to be using different signatures.
-                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
-                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
-                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
-                dash_manifest_url = video_info.get('dashmpd')[0]
-
-                def decrypt_sig(mobj):
-                    s = mobj.group(1)
-                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
-                    return '/signature/%s' % dec_s
-                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
-                dash_doc = self._download_xml(
-                    dash_manifest_url, video_id,
-                    note='Downloading DASH manifest',
-                    errnote='Could not download DASH manifest')
-                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
-                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
-                    if url_el is None:
-                        continue
-                    format_id = r.attrib['id']
-                    video_url = url_el.text
-                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
-                    f = {
-                        'format_id': format_id,
-                        'url': video_url,
-                        'width': int_or_none(r.attrib.get('width')),
-                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
-                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
-                        'filesize': filesize,
-                        'fps': int_or_none(r.attrib.get('frameRate')),
-                    }
-                    try:
-                        existing_format = next(
-                            fo for fo in formats
-                            if fo['format_id'] == format_id)
-                    except StopIteration:
-                        f.update(self._formats.get(format_id, {}))
-                        formats.append(f)
-                    else:
-                        existing_format.update(f)
-
-            except (ExtractorError, KeyError) as e:
-                self.report_warning('Skipping DASH manifest: %r' % e, video_id)
+            dash_mpd = video_info.get('dashmpd')
+            if dash_mpd:
+                dash_manifest_url = dash_mpd[0]
+                try:
+                    dash_formats = self._parse_dash_manifest(
+                        video_id, dash_manifest_url, player_url, age_gate)
+                except (ExtractorError, KeyError) as e:
+                    self.report_warning(
+                        'Skipping DASH manifest: %r' % e, video_id)
+                else:
+                    formats.extend(dash_formats)
 
         self._sort_formats(formats)
 
@@ -1226,7 +1268,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
 
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = 'YouTube.com channels'
-    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
+    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
     IE_NAME = 'youtube:channel'
@@ -1244,13 +1286,8 @@ class YoutubeChannelIE(InfoExtractor):
         return ids_in_page
 
     def _real_extract(self, url):
-        # Extract channel id
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
+        channel_id = self._match_id(url)
 
-        # Download channel page
-        channel_id = mobj.group(1)
         video_ids = []
         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
         channel_page = self._download_webpage(url, channel_id)
@@ -1264,8 +1301,12 @@ class YoutubeChannelIE(InfoExtractor):
             # The videos are contained in a single page
             # the ajax pages can't be used, they are empty
             video_ids = self.extract_videos_from_page(channel_page)
-        else:
-            # Download all channel pages using the json-based channel_ajax query
+            entries = [
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in video_ids]
+            return self.playlist_result(entries, channel_id)
+
+        def _entries():
             for pagenum in itertools.count(1):
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
                 page = self._download_json(
@@ -1273,21 +1314,19 @@ class YoutubeChannelIE(InfoExtractor):
                     transform_source=uppercase_escape)
 
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
-                video_ids.extend(ids_in_page)
+                for video_id in ids_in_page:
+                    yield self.url_result(
+                        video_id, 'Youtube', video_id=video_id)
 
                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                     break
 
-        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
-        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
-                       for video_id in video_ids]
-        return self.playlist_result(url_entries, channel_id)
+        return self.playlist_result(_entries(), channel_id)
 
 
 class YoutubeUserIE(InfoExtractor):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1315,12 +1354,7 @@ class YoutubeUserIE(InfoExtractor):
             return super(YoutubeUserIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        # Extract username
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-
-        username = mobj.group(1)
+        username = self._match_id(url)
 
         # Download video ids using YouTube Data API. Result size per
         # query is limited (currently to 50 videos) so we need to query
index 9ff00e26c4235e0eaace73b219b945ea17cee175..74c76a9a0446482c303f3b4182f3ef2bd4942c0d 100644 (file)
@@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import functools
 import re
 
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     unified_strdate,
+    OnDemandPagedList,
 )
 
 
@@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):
 
 
 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
 
     _TEST = {
         'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
         return extract_from_xml_url(self, video_id, xml_url)
+
+
+class ZDFChannelIE(InfoExtractor):
+    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+        'info_dict': {
+            'id': '1586442',
+        },
+        'playlist_count': 4,
+    }
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, channel_id, page):
+        offset = page * self._PAGE_SIZE
+        xml_url = (
+            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
+            % (offset, self._PAGE_SIZE, channel_id))
+        doc = self._download_xml(
+            xml_url, channel_id,
+            note='Downloading channel info',
+            errnote='Failed to download channel info')
+
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        for asset in doc.findall('.//teasers/teaser'):
+            a_type = asset.find('./type').text
+            a_id = asset.find('./details/assetId').text
+            if a_type not in ('video', 'topic'):
+                continue
+            yield {
+                '_type': 'url',
+                'playlist_title': title,
+                'playlist_description': description,
+                'url': 'zdf:%s:%s' % (a_type, a_id),
+            }
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'entries': entries,
+        }
index 2e8c715084616a565472fb8700a8d995cb8c6d04..041ca83d88612c2aa9f56b8fce1b9349b2d96ab8 100644 (file)
@@ -163,7 +163,10 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--ignore-config',
         action='store_true',
-        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+        help='Do not read configuration files. '
+        'When given in the global configuration file /etc/youtube-dl.conf: '
+        'Do not read the user configuration in ~/.config/youtube-dl/config '
+        '(%APPDATA%/youtube-dl/config.txt on Windows)')
     general.add_option(
         '--flat-playlist',
         action='store_const', dest='extract_flat', const='in_playlist',
index 09db43611a7c288e77c2cacaf96f266c541b2bbc..75c0f7bbe86ef8e19f41fd61e1bbd58678474d8a 100644 (file)
@@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):
 
     def run(self, information):
         cmd = self.exec_cmd
-        if not '{}' in cmd:
+        if '{}' not in cmd:
             cmd += ' {}'
 
         cmd = cmd.replace('{}', shlex_quote(information['filepath']))
index 9303b8378b8065d84ed2d064ab76aacb463ef6bf..965ded4c1590eb3cccfebcfc3fd460f5a83d1c90 100644 (file)
@@ -37,11 +37,11 @@ class FFmpegPostProcessor(PostProcessor):
         if not self._executable:
             raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
 
-        REQUIRED_VERSION = '1.0'
+        required_version = '10-0' if self._uses_avconv() else '1.0'
         if is_outdated_version(
-                self._versions[self._executable], REQUIRED_VERSION):
+                self._versions[self._executable], required_version):
             warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
-                self._executable, self._executable, REQUIRED_VERSION)
+                self._executable, self._executable, required_version)
             if self._downloader:
                 self._downloader.report_warning(warning)
 
index 4c07a558e7ad2f2db422ed2a0124df49efc6b09c..2d2703368d8c2974e665985a10d1b6dcace8b235 100644 (file)
@@ -79,7 +79,7 @@ def update_self(to_screen, verbose):
             to_screen(compat_str(traceback.format_exc()))
         to_screen('ERROR: can\'t obtain versions info. Please try again later.')
         return
-    if not 'signature' in versions_info:
+    if 'signature' not in versions_info:
         to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
         return
     signature = versions_info['signature']
index 4d3cbac74aaebdbe0b314690b7dea07e2e2371e2..bbe554a657b6d3801ac70d2fbbce41125dca5bbd 100644 (file)
@@ -41,6 +41,7 @@ from .compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urlparse,
+    compat_WINFUNCTYPE,
     shlex_quote,
 )
 
@@ -166,7 +167,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
         xpath = xpath.encode('ascii')
 
     n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
         if fatal:
             name = xpath if name is None else name
             raise ExtractorError('Could not find XML element %s' % name)
@@ -644,17 +645,19 @@ def parse_iso8601(date_str, delimiter='T'):
     return calendar.timegm(dt.timetuple())
 
 
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
     """Return a string with the date in the format YYYYMMDD"""
 
     if date_str is None:
         return None
-
     upload_date = None
     # Replace commas
     date_str = date_str.replace(',', ' ')
     # %z (UTC offset) is only supported in python>=3.2
     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+
     format_expressions = [
         '%d %B %Y',
         '%d %b %Y',
@@ -669,7 +672,6 @@ def unified_strdate(date_str):
         '%d/%m/%Y',
         '%d/%m/%y',
         '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
         '%Y-%m-%d %H:%M:%S',
         '%Y-%m-%d %H:%M:%S.%f',
         '%d.%m.%Y %H:%M',
@@ -681,6 +683,14 @@ def unified_strdate(date_str):
         '%Y-%m-%dT%H:%M:%S.%f',
         '%Y-%m-%dT%H:%M',
     ]
+    if day_first:
+        format_expressions.extend([
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m/%d/%Y %H:%M:%S',
+        ])
     for expression in format_expressions:
         try:
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -712,8 +722,10 @@ def date_from_str(date_str):
     Return a datetime object from a string in the format YYYYMMDD or
     (now|today)[+-][0-9](day|week|month|year)(s)?"""
     today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
         return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
     if match is not None:
         sign = match.group('sign')
@@ -806,21 +818,21 @@ def _windows_write_string(s, out):
     if fileno not in WIN_OUTPUT_IDS:
         return False
 
-    GetStdHandle = ctypes.WINFUNCTYPE(
+    GetStdHandle = compat_WINFUNCTYPE(
         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
         ("GetStdHandle", ctypes.windll.kernel32))
     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 
-    WriteConsoleW = ctypes.WINFUNCTYPE(
+    WriteConsoleW = compat_WINFUNCTYPE(
         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
     written = ctypes.wintypes.DWORD(0)
 
-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = compat_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
     FILE_TYPE_CHAR = 0x0002
     FILE_TYPE_REMOTE = 0x8000
-    GetConsoleMode = ctypes.WINFUNCTYPE(
+    GetConsoleMode = compat_WINFUNCTYPE(
         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
         ctypes.POINTER(ctypes.wintypes.DWORD))(
         ("GetConsoleMode", ctypes.windll.kernel32))
@@ -1024,7 +1036,7 @@ def smuggle_url(url, data):
 
 
 def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
         return smug_url, default
     url, _, sdata = smug_url.rpartition('#')
     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
@@ -1090,11 +1102,14 @@ def parse_filesize(s):
     }
 
     units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
-    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
     if not m:
         return None
 
-    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
+    num_str = m.group('num').replace(',', '.')
+    mult = _UNIT_TABLE[m.group('unit')]
+    return int(float(num_str) * mult)
 
 
 def get_term_width():
@@ -1203,18 +1218,29 @@ def parse_duration(s):
 
     m = re.match(
         r'''(?ix)T?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
             (?:
                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
             )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
     if not m:
         return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
     if m.group('mins'):
         res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
     if m.group('ms'):
         res += float(m.group('ms'))
     return res
@@ -1488,7 +1514,7 @@ def limit_length(s, length):
 
 
 def version_tuple(v):
-    return [int(e) for e in v.split('.')]
+    return tuple(int(e) for e in re.split(r'[-.]', v))
 
 
 def is_outdated_version(version, limit, assume_new=True):
index 61902a8cc9e7487ce5c31ed00d626e9aa953ab2a..7289ea4d2ee7f00307382a1927c07e96f1e68c2d 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2014.12.03'
+__version__ = '2014.12.12.1'