Merge remote-tracking branch 'fstirlitz/master'

author Philipp Hagemeister <phihag@phihag.de>

Thu, 11 Dec 2014 16:11:25 +0000 (17:11 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 11 Dec 2014 16:11:25 +0000 (17:11 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 11 Dec 2014 16:11:25 +0000 (17:11 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 11 Dec 2014 16:11:25 +0000 (17:11 +0100)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 56dc3d461796f7c8897f20875ca8ac7c0301c3dd..248367039854ed99c32fdc51fe76d5bce02f7509 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -942,8 +942,12 @@ class YoutubeDL(object):
          if self.params.get('forceid', False):
              self.to_stdout(info_dict['id'])
          if self.params.get('forceurl', False):
-            # For RTMP URLs, also include the playpath
-            self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+            if info_dict.get('requested_formats') is not None:
+                for f in info_dict['requested_formats']:
+                    self.to_stdout(f['url'] + f.get('play_path', ''))
+            else:
+                # For RTMP URLs, also include the playpath
+                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
          if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
              self.to_stdout(info_dict['thumbnail'])
          if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index 27596687d0d2c354990e6112027e054865c1c79c..f4a85443eda5086c004040183d1779a7059c0c2e 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -247,7 +247,7 @@ else:
                  userhome = compat_getenv('HOME')
              elif 'USERPROFILE' in os.environ:
                  userhome = compat_getenv('USERPROFILE')
-            elif not 'HOMEPATH' in os.environ:
+            elif 'HOMEPATH' not in os.environ:
                  return path
              else:
                  try:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index c0dcdaf02167e365c442fae5a6ebed4984ce1b42..6b7660ab1585b998f6f8d807db6e8a0147b1526b 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -30,6 +30,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
  from .bbccouk import BBCCoUkIE
  from .beeg import BeegIE
  from .behindkink import BehindKinkIE
+from .bet import BetIE
  from .bild import BildIE
  from .bilibili import BiliBiliIE
  from .blinkx import BlinkxIE
@@ -525,7 +526,7 @@ from .youtube import (
      YoutubeUserIE,
      YoutubeWatchLaterIE,
  )
-from .zdf import ZDFIE
+from .zdf import ZDFIE, ZDFChannelIE
  from .zingmp3 import (
      ZingMp3SongIE,
      ZingMp3AlbumIE,
diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py

index 31fdc0dcc0614babf4ff3b48186566904cfcc57a..1bdc25812b6afb4cf133007f2d12b89fd56b353f 100644 (file)
--- a/youtube_dl/extractor/behindkink.py
+++ b/youtube_dl/extractor/behindkink.py
@@ -10,15 +10,15 @@ from ..utils import url_basename
  class BehindKinkIE(InfoExtractor):
      _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
      _TEST = {
-        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
-        'md5': '41ad01222b8442089a55528fec43ec01',
+        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
          'info_dict': {
-            'id': '36370',
+            'id': '37127',
              'ext': 'mp4',
-            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
-            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
-            'upload_date': '20140814',
-            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+            'title': 'What are you passionate about – Marley Blaze',
+            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+            'upload_date': '20141205',
+            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
              'age_limit': 18,
          }
      }
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          display_id = mobj.group('id')
-        year = mobj.group('year')
-        month = mobj.group('month')
-        day = mobj.group('day')
-        upload_date = year + month + day
  
          webpage = self._download_webpage(url, display_id)
  
          video_url = self._search_regex(
-            r"'file':\s*'([^']+)'",
-            webpage, 'URL base')
-
-        video_id = url_basename(video_url)
-        video_id = video_id.split('_')[0]
+            r'<source src="([^"]+)"', webpage, 'video URL')
+        video_id = url_basename(video_url).split('_')[0]
+        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
  
          return {
              'id': video_id,
+            'display_id': display_id,
              'url': video_url,
-            'ext': 'mp4',
              'title': self._og_search_title(webpage),
-            'display_id': display_id,
              'thumbnail': self._og_search_thumbnail(webpage),
              'description': self._og_search_description(webpage),
              'upload_date': upload_date,
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py

new file mode 100644 (file)

index 0000000..c1fc433
--- /dev/null
+++ b/youtube_dl/extractor/bet.py
@@ -0,0 +1,108 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    xpath_text,
+    xpath_with_ns,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class BetIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
+    _TESTS = [
+        {
+            'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
+            'info_dict': {
+                'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
+                'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
+                'ext': 'flv',
+                'title': 'BET News Presents: A Conversation With President Obama',
+                'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
+                'duration': 1534,
+                'timestamp': 1418075340,
+                'upload_date': '20141208',
+                'uploader': 'admin',
+                'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
+            'info_dict': {
+                'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
+                'display_id': 'justice-for-ferguson-a-community-reacts',
+                'ext': 'flv',
+                'title': 'Justice for Ferguson: A Community Reacts',
+                'description': 'A BET News special.',
+                'duration': 1696,
+                'timestamp': 1416942360,
+                'upload_date': '20141125',
+                'uploader': 'admin',
+                'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        media_url = compat_urllib_parse.unquote(self._search_regex(
+            [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
+            webpage, 'media URL'))
+
+        mrss = self._download_xml(media_url, display_id)
+
+        item = mrss.find('./channel/item')
+
+        NS_MAP = {
+            'dc': 'http://purl.org/dc/elements/1.1/',
+            'media': 'http://search.yahoo.com/mrss/',
+            'ka': 'http://kickapps.com/karss',
+        }
+
+        title = xpath_text(item, './title', 'title')
+        description = xpath_text(
+            item, './description', 'description', fatal=False)
+
+        video_id = xpath_text(item, './guid', 'video id', fatal=False)
+
+        timestamp = parse_iso8601(xpath_text(
+            item, xpath_with_ns('./dc:date', NS_MAP),
+            'upload date', fatal=False))
+        uploader = xpath_text(
+            item, xpath_with_ns('./dc:creator', NS_MAP),
+            'uploader', fatal=False)
+
+        media_content = item.find(
+            xpath_with_ns('./media:content', NS_MAP))
+        duration = int_or_none(media_content.get('duration'))
+        smil_url = media_content.get('url')
+
+        thumbnail = media_content.find(
+            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
+
+        formats = self._extract_smil_formats(smil_url, display_id)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py

index da47f27bdd6702d3927f3fde72fc0ebe064df53a..14b814120be3b8215a28fc00a95f87bd22e0c062 100644 (file)
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -4,13 +4,17 @@ import re
  
  from .common import InfoExtractor
  from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
+
+from ..compat import (
+    compat_str,
      compat_urllib_request,
-    unescapeHTML,
-    parse_iso8601,
      compat_urlparse,
+)
+from ..utils import (
      clean_html,
-    compat_str,
+    int_or_none,
+    parse_iso8601,
+    unescapeHTML,
  )
  
  
@@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):
                  'uploader': 'NostalgiaCritic',
                  'uploader_id': '246467',
              }
-        }
+        },
+        {
+            # https://github.com/rg3/youtube-dl/pull/4404
+            'note': 'Audio only',
+            'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
+            'md5': '76c0a56f24e769ceaab21fbb6416a351',
+            'info_dict': {
+                'id': '7103299',
+                'ext': 'flv',
+                'title': 'Weekly Manga Recap: Kingdom',
+                'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
+                'timestamp': 1417660321,
+                'upload_date': '20141204',
+                'uploader': 'The Rollo T',
+                'uploader_id': '407429',
+                'duration': 7251,
+                'vcodec': 'none',
+            }
+        },
      ]
  
      def _real_extract(self, url):
@@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
                      'url': real_url,
                      'format_id': role,
                      'format_note': media_type,
-                    'vcodec': media_content.get(blip('vcodec')),
+                    'vcodec': media_content.get(blip('vcodec')) or 'none',
                      'acodec': media_content.get(blip('acodec')),
                      'filesize': media_content.get('filesize'),
-                    'width': int(media_content.get('width')),
-                    'height': int(media_content.get('height')),
+                    'width': int_or_none(media_content.get('width')),
+                    'height': int_or_none(media_content.get('height')),
                  })
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 2faaf6226600182abba37efc2eccd67be6e5ef03..2277ec6ab1ddf3a3e345045104e9870488bc39d4 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -174,9 +174,10 @@ class InfoExtractor(object):
      _type "url" indicates that the video must be extracted from another
      location, possibly by a different extractor. Its only required key is:
      "url" - the next URL to extract.
-
-    Additionally, it may have properties believed to be identical to the
-    resolved entity, for example "title" if the title of the referred video is
+    The key "ie_key" can be set to the class name (minus the trailing "IE",
+    e.g. "Youtube") if the extractor class is known in advance.
+    Additionally, the dictionary may have any properties of the resolved entity
+    known in advance, for example "title" if the title of the referred video is
      known ahead of time.
  
  
@@ -792,6 +793,49 @@ class InfoExtractor(object):
          self._sort_formats(formats)
          return formats
  
+    # TODO: improve extraction
+    def _extract_smil_formats(self, smil_url, video_id):
+        smil = self._download_xml(
+            smil_url, video_id, 'Downloading SMIL file',
+            'Unable to download SMIL file')
+
+        base = smil.find('./head/meta').get('base')
+
+        formats = []
+        rtmp_count = 0
+        for video in smil.findall('./body/switch/video'):
+            src = video.get('src')
+            if not src:
+                continue
+            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+            width = int_or_none(video.get('width'))
+            height = int_or_none(video.get('height'))
+            proto = video.get('proto')
+            if not proto:
+                if base:
+                    if base.startswith('rtmp'):
+                        proto = 'rtmp'
+                    elif base.startswith('http'):
+                        proto = 'http'
+            ext = video.get('ext')
+            if proto == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+            elif proto == 'rtmp':
+                rtmp_count += 1
+                streamer = video.get('streamer') or base
+                formats.append({
+                    'url': streamer,
+                    'play_path': src,
+                    'ext': 'flv',
+                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+                    'tbr': bitrate,
+                    'width': width,
+                    'height': height,
+                })
+        self._sort_formats(formats)
+
+        return formats
+
      def _live_title(self, name):
          """ Generate the title for a live video """
          now = datetime.datetime.now()
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 2139f68aa3cb16facdc45b5fd9e014621e1c6674..1ad4e77a8a334dc0bfec62a0fb4752676e2e1435 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -13,9 +13,10 @@ from ..compat import (
      compat_urllib_request,
  )
  from ..utils import (
-    urlencode_postdata,
      ExtractorError,
+    int_or_none,
      limit_length,
+    urlencode_postdata,
  )
  
  
@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
          'info_dict': {
              'id': '637842556329505',
              'ext': 'mp4',
-            'duration': 38,
              'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
          }
      }, {
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
          self._login()
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
          url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
          webpage = self._download_webpage(url, video_id)
  
@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
              'id': video_id,
              'title': video_title,
              'url': video_url,
-            'duration': int(video_data['video_duration']),
-            'thumbnail': video_data['thumbnail_src'],
+            'duration': int_or_none(video_data.get('video_duration')),
+            'thumbnail': video_data.get('thumbnail_src'),
          }
diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py

index 13c8d79cd8ac6346dbe4e9810bc8bf0b20825dcc..ee740cd9c0fe71a48b79aee00c40ea610e81ea99 100644 (file)
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
                  'rtmp_conn': 'B:1',
                  'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                  'page_url': 'http://www.ntv.ru',
-                'flash_ver': 'LNX 11,2,202,341',
+                'flash_version': 'LNX 11,2,202,341',
                  'rtmp_live': True,
                  'ext': 'flv',
                  'filesize': int(size.text),
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py

index 0751efc6111c96ca1c089c66183429f9bde6147c..646af3cc9c9686b7d09fdc87b305c0b7c6c0f8ce 100644 (file)
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):
          broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
  
          if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
-            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
+            raise ExtractorError(
+                'Broadcast %s does not exist' % broadcast_id, expected=True)
  
          # Adult content
          if re.search('EroConfirmText">', broadcast_page) is not None:
  
              (username, password) = self._get_login_info()
              if username is None:
-                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
-                                     'use --username and --password options to provide account credentials.', expected=True)
+                raise ExtractorError(
+                    'Erotic broadcasts allowed only for registered users, '
+                    'use --username and --password options to provide account credentials.',
+                    expected=True)
  
              login_form = {
                  'login-hint53': '1',
@@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):
                  'password': password,
              }
  
-            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+            request = compat_urllib_request.Request(
+                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
              request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+            broadcast_page = self._download_webpage(
+                request, broadcast_id, 'Logging in and confirming age')
  
              if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                  raise ExtractorError('Unable to log in: bad username or password', expected=True)
@@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):
              adult_content = False
  
          ticket = self._html_search_regex(
-            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
              broadcast_page, 'broadcast ticket')
  
          url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
@@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):
          if broadcast_password:
              url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
  
-        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
+        broadcast_json_page = self._download_webpage(
+            url, broadcast_id, 'Downloading broadcast JSON')
  
          try:
              broadcast_json = json.loads(broadcast_json_page)
  
              protected_broadcast = broadcast_json['_pass_protected'] == 1
              if protected_broadcast and not broadcast_password:
-                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError(
+                    'This broadcast is protected by a password, use the --video-password option',
+                    expected=True)
  
              broadcast_offline = broadcast_json['is_play'] == 0
              if broadcast_offline:
                  raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
  
              rtmp_url = broadcast_json['_server']
-            if not rtmp_url.startswith('rtmp://'):
+            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
+            if not mobj:
                  raise ExtractorError('Unexpected broadcast rtmp URL')
  
              broadcast_playpath = broadcast_json['_streamName']
+            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
              broadcast_thumbnail = broadcast_json['_imgURL']
-            broadcast_title = broadcast_json['title']
+            broadcast_title = self._live_title(broadcast_json['title'])
              broadcast_description = broadcast_json['description']
              broadcaster_nick = broadcast_json['nick']
              broadcaster_login = broadcast_json['login']
@@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):
              'age_limit': 18 if adult_content else 0,
              'ext': 'flv',
              'play_path': broadcast_playpath,
+            'player_url': 'http://pics.smotri.com/broadcast_play.swf',
+            'app': broadcast_app,
              'rtmp_live': True,
-            'rtmp_conn': rtmp_conn
+            'rtmp_conn': rtmp_conn,
+            'is_live': True,
          }
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py

index d81d1d1a67cef49d4f612f08bfb5b7b7002b51fd..ba65996dc01646e019cfd5820aa36c1934365d9b 100644 (file)
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -1,32 +1,30 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      float_or_none,
-    str_to_int,
+    parse_age_limit,
  )
  
  
  class TvigleIE(InfoExtractor):
      IE_NAME = 'tvigle'
      IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
+    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
  
      _TESTS = [
          {
-            'url': 'http://www.tvigle.ru/video/brat/',
-            'md5': 'ff4344a4894b0524441fb6f8218dc716',
+            'url': 'http://www.tvigle.ru/video/sokrat/',
+            'md5': '36514aed3657d4f70b4b2cef8eb520cd',
              'info_dict': {
-                'id': '5118490',
-                'display_id': 'brat',
-                'ext': 'mp4',
-                'title': 'Ð\91рат',
-                'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
-                'duration': 5722.6,
-                'age_limit': 16,
+                'id': '1848932',
+                'display_id': 'sokrat',
+                'ext': 'flv',
+                'title': 'Ð¡Ð¾Ðºрат',
+                'description': 'md5:a05bd01be310074d5833efc6743be95e',
+                'duration': 6586,
+                'age_limit': 0,
              },
          },
          {
@@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):
      ]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)
  
          webpage = self._download_webpage(url, display_id)
  
@@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):
          title = item['title']
          description = item['description']
          thumbnail = item['thumbnail']
-        duration = float_or_none(item['durationMilliseconds'], 1000)
-        age_limit = str_to_int(item['ageRestrictions'])
+        duration = float_or_none(item.get('durationMilliseconds'), 1000)
+        age_limit = parse_age_limit(item.get('ageRestrictions'))
  
          formats = []
          for vcodec, fmts in item['videos'].items():
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py

index eb94737546fb725e992249d356588417d90ad81a..0157392cce30984f62024f1c9588cfed5b257cd2 100644 (file)
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -182,8 +182,8 @@ class TVPlayIE(InfoExtractor):
              'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
  
          if video['is_geo_blocked']:
-            raise ExtractorError(
-                'This content is not available in your country due to copyright reasons', expected=True)
+            self.report_warning(
+                'This content might not be available in your country due to copyright reasons')
  
          streams = self._download_json(
              'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 8b6e591a4379115b47a4d36cc2c5d5495e5fcd03..7b6179a2abd1261b787f1b19486ab04af31feddf 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor
  from .subtitles import SubtitlesInfoExtractor
  from ..jsinterp import JSInterpreter
  from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
      compat_chr,
      compat_parse_qs,
      compat_urllib_parse,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
-
+)
+from ..utils import (
      clean_html,
-    get_element_by_id,
-    get_element_by_attribute,
      ExtractorError,
+    get_element_by_attribute,
+    get_element_by_id,
      int_or_none,
      OnDemandPagedList,
+    orderedSet,
      unescapeHTML,
      unified_strdate,
-    orderedSet,
      uppercase_escape,
  )
  
@@ -417,6 +418,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'upload_date': '20140605',
              },
          },
+        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+        {
+            'url': '__2ABJjxzNo',
+            'info_dict': {
+                'id': '__2ABJjxzNo',
+                'ext': 'mp4',
+                'upload_date': '20100430',
+                'uploader_id': 'deadmau5',
+                'description': 'md5:12c56784b8032162bb936a5f76d55360',
+                'uploader': 'deadmau5',
+                'title': 'Deadmau5 - Some Chords (HD)',
+            },
+            'expected_warnings': [
+                'DASH manifest missing',
+            ]
+        },
+        # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+        {
+            'url': 'lqQg6PlCWgI',
+            'info_dict': {
+                'id': 'lqQg6PlCWgI',
+                'ext': 'mp4',
+                'upload_date': '20120731',
+                'uploader_id': 'olympic',
+                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+                'uploader': 'Olympics',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
      ]
  
      def __init__(self, *args, **kwargs):
@@ -666,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
          return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
  
+    def _parse_dash_manifest(
+            self, video_id, dash_manifest_url, player_url, age_gate):
+        def decrypt_sig(mobj):
+            s = mobj.group(1)
+            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+            return '/signature/%s' % dec_s
+        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
+        dash_doc = self._download_xml(
+            dash_manifest_url, video_id,
+            note='Downloading DASH manifest',
+            errnote='Could not download DASH manifest')
+
+        formats = []
+        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+            if url_el is None:
+                continue
+            format_id = r.attrib['id']
+            video_url = url_el.text
+            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+            f = {
+                'format_id': format_id,
+                'url': video_url,
+                'width': int_or_none(r.attrib.get('width')),
+                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                'filesize': filesize,
+                'fps': int_or_none(r.attrib.get('frameRate')),
+            }
+            try:
+                existing_format = next(
+                    fo for fo in formats
+                    if fo['format_id'] == format_id)
+            except StopIteration:
+                f.update(self._formats.get(format_id, {}))
+                formats.append(f)
+            else:
+                existing_format.update(f)
+        return formats
+
      def _real_extract(self, url):
          proto = (
              'http' if self._downloader.params.get('prefer_insecure', False)
@@ -800,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          m_cat_container = self._search_regex(
              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
-            video_webpage, 'categories', fatal=False)
+            video_webpage, 'categories', default=None)
          if m_cat_container:
              category = self._html_search_regex(
                  r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -878,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'url': video_info['conn'][0],
                  'player_url': player_url,
              }]
-        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
              encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
              if 'rtmpe%3Dyes' in encoded_url_map:
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -943,51 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # Look for the DASH manifest
          if self._downloader.params.get('youtube_include_dash_manifest', True):
-            try:
-                # The DASH manifest used needs to be the one from the original video_webpage.
-                # The one found in get_video_info seems to be using different signatures.
-                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
-                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
-                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
-                dash_manifest_url = video_info.get('dashmpd')[0]
-
-                def decrypt_sig(mobj):
-                    s = mobj.group(1)
-                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
-                    return '/signature/%s' % dec_s
-                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
-                dash_doc = self._download_xml(
-                    dash_manifest_url, video_id,
-                    note='Downloading DASH manifest',
-                    errnote='Could not download DASH manifest')
-                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
-                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
-                    if url_el is None:
-                        continue
-                    format_id = r.attrib['id']
-                    video_url = url_el.text
-                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
-                    f = {
-                        'format_id': format_id,
-                        'url': video_url,
-                        'width': int_or_none(r.attrib.get('width')),
-                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
-                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
-                        'filesize': filesize,
-                        'fps': int_or_none(r.attrib.get('frameRate')),
-                    }
-                    try:
-                        existing_format = next(
-                            fo for fo in formats
-                            if fo['format_id'] == format_id)
-                    except StopIteration:
-                        f.update(self._formats.get(format_id, {}))
-                        formats.append(f)
-                    else:
-                        existing_format.update(f)
-
-            except (ExtractorError, KeyError) as e:
-                self.report_warning('Skipping DASH manifest: %r' % e, video_id)
+            dash_mpd = video_info.get('dashmpd')
+            if dash_mpd:
+                dash_manifest_url = dash_mpd[0]
+                try:
+                    dash_formats = self._parse_dash_manifest(
+                        video_id, dash_manifest_url, player_url, age_gate)
+                except (ExtractorError, KeyError) as e:
+                    self.report_warning(
+                        'Skipping DASH manifest: %r' % e, video_id)
+                else:
+                    formats.extend(dash_formats)
  
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py

index 9ff00e26c4235e0eaace73b219b945ea17cee175..74c76a9a0446482c303f3b4182f3ef2bd4942c0d 100644 (file)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -1,12 +1,14 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import functools
  import re
  
  from .common import InfoExtractor
  from ..utils import (
      int_or_none,
      unified_strdate,
+    OnDemandPagedList,
  )
  
  
@@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):
  
  
  class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
  
      _TEST = {
          'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-
          xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
          return extract_from_xml_url(self, video_id, xml_url)
+
+
+class ZDFChannelIE(InfoExtractor):
+    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+        'info_dict': {
+            'id': '1586442',
+        },
+        'playlist_count': 4,
+    }
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, channel_id, page):
+        offset = page * self._PAGE_SIZE
+        xml_url = (
+            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
+            % (offset, self._PAGE_SIZE, channel_id))
+        doc = self._download_xml(
+            xml_url, channel_id,
+            note='Downloading channel info',
+            errnote='Failed to download channel info')
+
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        for asset in doc.findall('.//teasers/teaser'):
+            a_type = asset.find('./type').text
+            a_id = asset.find('./details/assetId').text
+            if a_type not in ('video', 'topic'):
+                continue
+            yield {
+                '_type': 'url',
+                'playlist_title': title,
+                'playlist_description': description,
+                'url': 'zdf:%s:%s' % (a_type, a_id),
+            }
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'entries': entries,
+        }
diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py

index 09db43611a7c288e77c2cacaf96f266c541b2bbc..75c0f7bbe86ef8e19f41fd61e1bbd58678474d8a 100644 (file)
--- a/youtube_dl/postprocessor/execafterdownload.py
+++ b/youtube_dl/postprocessor/execafterdownload.py
@@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):
  
      def run(self, information):
          cmd = self.exec_cmd
-        if not '{}' in cmd:
+        if '{}' not in cmd:
              cmd += ' {}'
  
          cmd = cmd.replace('{}', shlex_quote(information['filepath']))
diff --git a/youtube_dl/update.py b/youtube_dl/update.py

index 4c07a558e7ad2f2db422ed2a0124df49efc6b09c..2d2703368d8c2974e665985a10d1b6dcace8b235 100644 (file)
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -79,7 +79,7 @@ def update_self(to_screen, verbose):
              to_screen(compat_str(traceback.format_exc()))
          to_screen('ERROR: can\'t obtain versions info. Please try again later.')
          return
-    if not 'signature' in versions_info:
+    if 'signature' not in versions_info:
          to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
          return
      signature = versions_info['signature']
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 2e70cc791bca5eaf5b2c1067741b9e25334a3377..75f9594e6c8abcecfe128af1be2e73575d6eecc1 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -712,8 +712,10 @@ def date_from_str(date_str):
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
          return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
@@ -1024,7 +1026,7 @@ def smuggle_url(url, data):
  
  
  def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
          return smug_url, default
      url, _, sdata = smug_url.rpartition('#')
      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 3ac5255578f25cabce2f7d56dae2e21775904cf3..b2a285bb29119ccb976eab749471514fb7c65e6a 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2014.12.06.1'
+__version__ = '2014.12.11'
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 11 Dec 2014 16:11:25 +0000 (17:11 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 11 Dec 2014 16:11:25 +0000 (17:11 +0100)
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/behindkink.py		patch \| blob \| history
youtube_dl/extractor/bet.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/bliptv.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/ntv.py		patch \| blob \| history
youtube_dl/extractor/smotri.py		patch \| blob \| history
youtube_dl/extractor/tvigle.py		patch \| blob \| history
youtube_dl/extractor/tvplay.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/extractor/zdf.py		patch \| blob \| history
youtube_dl/postprocessor/execafterdownload.py		patch \| blob \| history
youtube_dl/update.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history