Merge pull request #4615 from dwemthy/https_xhamster

author Sergey M. <dstftw@gmail.com>

Fri, 2 Jan 2015 15:09:28 +0000 (21:09 +0600)

committer Sergey M. <dstftw@gmail.com>

Fri, 2 Jan 2015 15:09:28 +0000 (21:09 +0600)
author Sergey M. <dstftw@gmail.com>
Fri, 2 Jan 2015 15:09:28 +0000 (21:09 +0600)
committer Sergey M. <dstftw@gmail.com>
Fri, 2 Jan 2015 15:09:28 +0000 (21:09 +0600)
diff --git a/AUTHORS b/AUTHORS

index 29ce9e3e4792d45674da49e1a585ed231ea2a3a8..37306ac83db31932ee44b3405d83763c2a7d0b10 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -97,3 +97,4 @@ Petr Kutalek
  Will Glynn
  Max Reimann
  Cédric Luthi
+Thijs Vermeir
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 9848ff611c1ddc9326dc8afab240182471eb5f41..9ccd1b32e8e4fe8d3880516ed7917897c5b471b6 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -474,6 +474,7 @@ from .videott import VideoTtIE
  from .videoweed import VideoWeedIE
  from .vidme import VidmeIE
  from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
  from .vimeo import (
      VimeoIE,
      VimeoAlbumIE,
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py

index 53aea0ec9046ef985cf9dfc5fc239c33b3ad8dcb..73fe66b01e063581f6738c0d3e43107b030510ee 100644 (file)
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
  class BBCCoUkIE(SubtitlesInfoExtractor):
      IE_NAME = 'bbc.co.uk'
      IE_DESC = 'BBC iPlayer'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/(?:episode|playlist))/(?P<id>[\da-z]{8})'
  
      _TESTS = [
          {
@@ -84,6 +84,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                  # rtmp download
                  'skip_download': True,
              }
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+            'only_matching': True,
          }
      ]
  
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 40b2791c77351a8625894f709187b3ccfb8e1939..493afb57d89a2eade1707b09f6c5ed2ea21b680c 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -181,6 +181,14 @@ class GenericIE(InfoExtractor):
                  'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
              },
          },
+        # BBC iPlayer embeds
+        {
+            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
+            'info_dict': {
+                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
+            },
+            'playlist_mincount': 18,
+        },
          # RUTV embed
          {
              'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@ -699,9 +707,9 @@ class GenericIE(InfoExtractor):
              r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
  
          # Helper method
-        def _playlist_from_matches(matches, getter, ie=None):
+        def _playlist_from_matches(matches, getter=None, ie=None):
              urlrs = orderedSet(
-                self.url_result(self._proto_relative_url(getter(m)), ie)
+                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
                  for m in matches)
              return self.playlist_result(
                  urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -905,6 +913,11 @@ class GenericIE(InfoExtractor):
              return _playlist_from_matches(
                  matches, getter=unescapeHTML, ie='FunnyOrDie')
  
+        # Look for BBC iPlayer embed
+        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+        if matches:
+            return _playlist_from_matches(matches, ie='BBCCoUk')
+
          # Look for embedded RUTV player
          rutv_url = RUTVIE._extract_url(webpage)
          if rutv_url:
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py

new file mode 100644 (file)

index 0000000..0d9fb09
--- /dev/null
+++ b/youtube_dl/extractor/vier.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class VierIE(InfoExtractor):
+    IE_NAME = 'vier'
+    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+    _TESTS = [{
+        'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
+        'info_dict': {
+            'id': '16129',
+            'display_id': 'het-wordt-warm-de-moestuin',
+            'ext': 'mp4',
+            'title': 'Het wordt warm in De Moestuin',
+            'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.vier.be/video/v3/embed/16129',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        embed_id = mobj.group('embed_id')
+        display_id = mobj.group('display_id') or embed_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+        application = self._search_regex(
+            r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+        filename = self._search_regex(
+            r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+
+        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
+        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+
+        title = self._og_search_title(webpage, default=display_id)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
+
+class VierVideosIE(InfoExtractor):
+    IE_NAME = 'vier:videos'
+    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+))?'
+    _TESTS = [{
+        'url': 'http://www.vier.be/demoestuin/videos',
+        'info_dict': {
+            'id': 'demoestuin',
+        },
+        'playlist_mincount': 153,
+    }, {
+        'url': 'http://www.vier.be/demoestuin/videos?page=6',
+        'info_dict': {
+            'id': 'demoestuin-page6',
+        },
+        'playlist_mincount': 20,
+    }, {
+        'url': 'http://www.vier.be/demoestuin/videos?page=7',
+        'info_dict': {
+            'id': 'demoestuin-page7',
+        },
+        'playlist_mincount': 13,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        program = mobj.group('program')
+
+        webpage = self._download_webpage(url, program)
+
+        page_id = mobj.group('page')
+        if page_id:
+            page_id = int(page_id)
+            start_page = page_id
+            last_page = start_page + 1
+            playlist_id = '%s-page%d' % (program, page_id)
+        else:
+            start_page = 0
+            last_page = int(self._search_regex(
+                r'videos\?page=(\d+)">laatste</a>',
+                webpage, 'last page', default=0)) + 1
+            playlist_id = program
+
+        entries = []
+        for current_page_id in range(start_page, last_page):
+            current_page = self._download_webpage(
+                'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
+                program,
+                'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
+            page_entries = [
+                self.url_result('http://www.vier.be' + video_url, 'Vier')
+                for video_url in re.findall(
+                    r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+            entries.extend(page_entries)
+
+        return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 89707ceef8bdf326c13c544b2690bd75781fbbbb..58b5021dcffe15be4be0ec7aa58149308ce232ee 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.01.01'
+__version__ = '2015.01.02'
author	Sergey M. <dstftw@gmail.com>
	Fri, 2 Jan 2015 15:09:28 +0000 (21:09 +0600)
committer	Sergey M. <dstftw@gmail.com>
	Fri, 2 Jan 2015 15:09:28 +0000 (21:09 +0600)
AUTHORS		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/bbccouk.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/vier.py	[new file with mode: 0644]	patch \| blob
youtube_dl/version.py		patch \| blob \| history