From: Sergey M. <dstftw@gmail.com>
Date: Mon, 29 Dec 2014 14:21:39 +0000 (+0600)
Subject: Merge pull request #4543 from akretz/cnn_fix
X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=974739aab5935ddf19c005ea084c83a45c855348;hp=9532d723715f1d1a08c126ba8af6940113b88d3b;p=youtube-dl.git

Merge pull request #4543 from akretz/cnn_fix

[cnn] Add support for articles with videos (fixes #4541)
---

diff --git a/AUTHORS b/AUTHORS
index 6ea958fce..bb4d8b4d1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -95,3 +95,4 @@ Adrian Kretz
 Mathias Rav
 Petr Kutalek
 Will Glynn
+Max Reimann
diff --git a/test/helper.py b/test/helper.py
index 8a820526a..96d58b7c1 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -99,7 +99,7 @@ def gettestcases(include_onlymatching=False):
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 
 
-def expect_info_dict(self, expected_dict, got_dict):
+def expect_info_dict(self, got_dict, expected_dict):
     for info_field, expected in expected_dict.items():
         if isinstance(expected, compat_str) and expected.startswith('re:'):
             got = got_dict.get(info_field)
diff --git a/test/test_download.py b/test/test_download.py
index a009aa475..412f3dbce 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -155,7 +155,7 @@ def generator(test_case):
             if is_playlist:
                 self.assertEqual(res_dict['_type'], 'playlist')
                 self.assertTrue('entries' in res_dict)
-                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+                expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
 
             if 'playlist_mincount' in test_case:
                 assertGreaterEqual(
@@ -204,7 +204,7 @@ def generator(test_case):
                 with io.open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
 
-                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
+                expect_info_dict(self, info_dict, tc.get('info_dict', {}))
         finally:
             try_rm_tcs_files()
             if is_playlist and res_dict is not None and res_dict.get('entries'):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index ba12e3263..ab0f76862 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -7,6 +7,7 @@ from .adobetv import AdobeTVIE
 from .adultswim import AdultSwimIE
 from .aftonbladet import AftonbladetIE
 from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
 from .anitube import AnitubeIE
 from .anysex import AnySexIE
 from .aol import AolIE
@@ -109,6 +110,7 @@ from .elpais import ElPaisIE
 from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
@@ -407,6 +409,7 @@ from .ted import TEDIE
 from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
+from .teletask import TeleTaskIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dl/extractor/alphaporno.py
new file mode 100644
index 000000000..c34719d1f
--- /dev/null
+++ b/youtube_dl/extractor/alphaporno.py
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    parse_duration,
+    parse_filesize,
+    int_or_none,
+)
+
+
+class AlphaPornoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
+        'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+        'info_dict': {
+            'id': '258807',
+            'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
+            'ext': 'mp4',
+            'title': 'Sensual striptease porn with Samantha Alexandra',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'timestamp': 1418694611,
+            'upload_date': '20141216',
+            'duration': 387,
+            'filesize_approx': 54120000,
+            'tbr': 1145,
+            'categories': list,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
+
+        video_url = self._search_regex(
+            r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+        ext = self._html_search_meta(
+            'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+
+        title = self._search_regex(
+            [r'<meta content="([^"]+)" itemprop="description">',
+             r'class="title" itemprop="name">([^<]+)<'],
+            webpage, 'title')
+        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage, 'upload date'))
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration'))
+        filesize_approx = parse_filesize(self._html_search_meta(
+            'contentSize', webpage, 'file size'))
+        bitrate = int_or_none(self._html_search_meta(
+            'bitrate', webpage, 'bitrate'))
+        categories = self._html_search_meta(
+            'keywords', webpage, 'categories', default='').split(',')
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'ext': ext,
+            'title': title,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'filesize_approx': filesize_approx,
+            'tbr': bitrate,
+            'categories': categories,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 34ce8429b..9fc35a42b 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -1,42 +1,48 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    unified_strdate,
-)
+from ..utils import unified_strdate
 
 
 class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
-    _TEST = {
-        "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
-        'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
+    _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+    _TESTS = [{
+        'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'md5': '8af1d4cf447933ed3c7f4871162602db',
         'info_dict': {
-            "title": "1968 Demo - FJCC Conference Presentation Reel #1",
-            "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
-            "upload_date": "19681210",
-            "uploader": "SRI International"
+            'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+            'ext': 'ogv',
+            'title': '1968 Demo - FJCC Conference Presentation Reel #1',
+            'description': 'md5:1780b464abaca9991d8968c877bb53ed',
+            'upload_date': '19681210',
+            'uploader': 'SRI International'
+        }
+    }, {
+        'url': 'https://archive.org/details/Cops1922',
+        'md5': '18f2a19e6d89af8425671da1cf3d4e04',
+        'info_dict': {
+            'id': 'Cops1922',
+            'ext': 'ogv',
+            'title': 'Buster Keaton\'s "Cops" (1922)',
+            'description': 'md5:70f72ee70882f713d4578725461ffcc3',
         }
-    }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         json_url = url + ('?' if '?' in url else '&') + 'output=json'
-        json_data = self._download_webpage(json_url, video_id)
-        data = json.loads(json_data)
+        data = self._download_json(json_url, video_id)
+
+        def get_optional(data_dict, field):
+            return data_dict['metadata'].get(field, [None])[0]
 
-        title = data['metadata']['title'][0]
-        description = data['metadata']['description'][0]
-        uploader = data['metadata']['creator'][0]
-        upload_date = unified_strdate(data['metadata']['date'][0])
+        title = get_optional(data, 'title')
+        description = get_optional(data, 'description')
+        uploader = get_optional(data, 'creator')
+        upload_date = unified_strdate(get_optional(data, 'date'))
 
         formats = [
             {
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 219631b9b..929dd3cc5 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
             config_xml_url, video_id, note='Downloading configuration')
 
         formats = [{
-            'forma_id': q.attrib['quality'],
+            'format_id': q.attrib['quality'],
             # The playpath starts at 'mp4:', if we don't manually
             # split the url, rtmpdump will incorrectly parse them
             'url': q.text.split('mp4:', 1)[0],
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
                 'width': int_or_none(f.get('width')),
                 'height': int_or_none(f.get('height')),
                 'tbr': int_or_none(f.get('bitrate')),
-                'quality': qfunc(f['quality']),
+                'quality': qfunc(f.get('quality')),
                 'source_preference': source_pref,
             }
 
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py
index 2d2f742ae..f690dc803 100644
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -71,7 +71,20 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                 'skip_download': True,
             },
             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
-        },
+        }, {
+            'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
+            'info_dict': {
+                'id': 'b04v209v',
+                'ext': 'flv',
+                'title': 'Pete Tong, The Essential New Tune Special',
+                'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
+                'duration': 10800,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        }
     ]
 
     def _extract_asx_playlist(self, connection, programme_id):
@@ -203,6 +216,59 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 
         return formats, subtitles
 
+    def _download_playlist(self, playlist_id):
+        try:
+            playlist = self._download_json(
+                'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
+                playlist_id, 'Downloading playlist JSON')
+
+            version = playlist.get('defaultAvailableVersion')
+            if version:
+                smp_config = version['smpConfig']
+                title = smp_config['title']
+                description = smp_config['summary']
+                for item in smp_config['items']:
+                    kind = item['kind']
+                    if kind != 'programme' and kind != 'radioProgramme':
+                        continue
+                    programme_id = item.get('vpid')
+                    duration = int(item.get('duration'))
+                    formats, subtitles = self._download_media_selector(programme_id)
+                return programme_id, title, description, duration, formats, subtitles
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                raise
+
+        # fallback to legacy playlist
+        playlist = self._download_xml(
+                'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
+                playlist_id, 'Downloading legacy playlist XML')
+
+        no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+        if no_items is not None:
+            reason = no_items.get('reason')
+            if reason == 'preAvailability':
+                msg = 'Episode %s is not yet available' % playlist_id
+            elif reason == 'postAvailability':
+                msg = 'Episode %s is no longer available' % playlist_id
+            elif reason == 'noMedia':
+                msg = 'Episode %s is not currently available' % playlist_id
+            else:
+                msg = 'Episode %s is not available: %s' % (playlist_id, reason)
+            raise ExtractorError(msg, expected=True)
+
+        for item in self._extract_items(playlist):
+            kind = item.get('kind')
+            if kind != 'programme' and kind != 'radioProgramme':
+                continue
+            title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+            description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+            programme_id = item.get('identifier')
+            duration = int(item.get('duration'))
+            formats, subtitles = self._download_media_selector(programme_id)
+
+        return programme_id, title, description, duration, formats, subtitles
+
     def _real_extract(self, url):
         group_id = self._match_id(url)
 
@@ -219,32 +285,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             duration = player['duration']
             formats, subtitles = self._download_media_selector(programme_id)
         else:
-            playlist = self._download_xml(
-                'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
-                group_id, 'Downloading playlist XML')
-
-            no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
-            if no_items is not None:
-                reason = no_items.get('reason')
-                if reason == 'preAvailability':
-                    msg = 'Episode %s is not yet available' % group_id
-                elif reason == 'postAvailability':
-                    msg = 'Episode %s is no longer available' % group_id
-                elif reason == 'noMedia':
-                    msg = 'Episode %s is not currently available' % group_id
-                else:
-                    msg = 'Episode %s is not available: %s' % (group_id, reason)
-                raise ExtractorError(msg, expected=True)
-
-            for item in self._extract_items(playlist):
-                kind = item.get('kind')
-                if kind != 'programme' and kind != 'radioProgramme':
-                    continue
-                title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
-                description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
-                programme_id = item.get('identifier')
-                duration = int(item.get('duration'))
-                formats, subtitles = self._download_media_selector(programme_id)
+            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
 
         if self._downloader.params.get('listsubtitles', False):
             self._list_available_subtitles(programme_id, subtitles)
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
new file mode 100644
index 000000000..79e2fbd39
--- /dev/null
+++ b/youtube_dl/extractor/eroprofile.py
@@ -0,0 +1,45 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class EroProfileIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
+        'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
+        'info_dict': {
+            'id': '3733775',
+            'display_id': 'sexy-babe-softcore',
+            'ext': 'm4v',
+            'title': 'sexy babe softcore',
+            'thumbnail': 're:https?://.*\.jpg',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
+            webpage, 'video id', default=None)
+
+        video_url = self._search_regex(
+            r'<source src="([^"]+)', webpage, 'video url')
+        title = self._html_search_regex(
+            r'Title:</th><td>([^<]+)</td>', webpage, 'title')
+        thumbnail = self._search_regex(
+            r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
+            webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py
index 3022f539d..75f180928 100644
--- a/youtube_dl/extractor/gameone.py
+++ b/youtube_dl/extractor/gameone.py
@@ -6,7 +6,9 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     xpath_with_ns,
-    parse_iso8601
+    parse_iso8601,
+    float_or_none,
+    int_or_none,
 )
 
 NAMESPACE_MAP = {
@@ -21,21 +23,38 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
 
 class GameOneIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.gameone.de/tv/288',
-        'md5': '136656b7fb4c9cb4a8e2d500651c499b',
-        'info_dict': {
-            'id': '288',
-            'ext': 'mp4',
-            'title': 'Game One - Folge 288',
-            'duration': 1238,
-            'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
-            'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, SchÃ¶ner Trants Nerdquiz Folge 2 Runde 1',
-            'age_limit': 16,
-            'upload_date': '20140513',
-            'timestamp': 1399980122,
+    _TESTS = [
+        {
+            'url': 'http://www.gameone.de/tv/288',
+            'md5': '136656b7fb4c9cb4a8e2d500651c499b',
+            'info_dict': {
+                'id': '288',
+                'ext': 'mp4',
+                'title': 'Game One - Folge 288',
+                'duration': 1238,
+                'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
+                'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, SchÃ¶ner Trants Nerdquiz Folge 2 Runde 1',
+                'age_limit': 16,
+                'upload_date': '20140513',
+                'timestamp': 1399980122,
+            }
+        },
+        {
+            'url': 'http://gameone.de/tv/220',
+            'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
+            'info_dict': {
+                'id': '220',
+                'ext': 'mp4',
+                'upload_date': '20120918',
+                'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
+                'timestamp': 1347971451,
+                'title': 'Game One - Folge 220',
+                'duration': 896.62,
+                'age_limit': 16,
+            }
         }
-    }
+
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -66,13 +85,13 @@ class GameOneIE(InfoExtractor):
             video_id,
             'Downloading media:content')
         rendition_items = content.findall('.//rendition')
-        duration = int(rendition_items[0].get('duration'))
+        duration = float_or_none(rendition_items[0].get('duration'))
         formats = [
             {
                 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
-                'width': int(r.get('width')),
-                'height': int(r.get('height')),
-                'tbr': int(r.get('bitrate')),
+                'width': int_or_none(r.get('width')),
+                'height': int_or_none(r.get('height')),
+                'tbr': int_or_none(r.get('bitrate')),
             }
             for r in rendition_items
         ]
@@ -105,7 +124,8 @@ class GameOnePlaylistIE(InfoExtractor):
         webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
         max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
         entries = [
-            self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
+            self.url_result('http://www.gameone.de/tv/%d' %
+                            video_id, 'GameOne')
             for video_id in range(max_id, 0, -1)]
 
         return {
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index d031fe401..baef3daa0 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -69,6 +69,7 @@ class SmotriIE(InfoExtractor):
             'params': {
                 'videopassword': 'qwerty',
             },
+            'skip': 'Video is not approved by moderator',
         },
         # age limit + video-password
         {
@@ -86,7 +87,8 @@ class SmotriIE(InfoExtractor):
             },
             'params': {
                 'videopassword': '333'
-            }
+            },
+            'skip': 'Video is not approved by moderator',
         },
         # swf player
         {
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 07f514a46..c04791997 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -1,11 +1,10 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from .common import compat_str
 
 
 class SohuIE(InfoExtractor):
@@ -29,60 +28,73 @@ class SohuIE(InfoExtractor):
                 base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
             else:
                 base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
-            data_url = base_data_url + str(vid_id)
-            data_json = self._download_webpage(
-                data_url, video_id,
-                note='Downloading JSON data for ' + str(vid_id))
-            return json.loads(data_json)
+
+            return self._download_json(
+                base_data_url + vid_id, video_id,
+                'Downloading JSON data for %s' % vid_id)
 
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         mytv = mobj.group('mytv') is not None
 
         webpage = self._download_webpage(url, video_id)
-        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
-                                            webpage, 'video title')
+        raw_title = self._html_search_regex(
+            r'(?s)<title>(.+?)</title>',
+            webpage, 'video title')
         title = raw_title.partition('-')[0].strip()
 
-        vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
-                                      'video path')
-        data = _fetch_data(vid, mytv)
-
-        QUALITIES = ('ori', 'super', 'high', 'nor')
-        vid_ids = [data['data'][q + 'Vid']
-                   for q in QUALITIES
-                   if data['data'][q + 'Vid'] != 0]
-        if not vid_ids:
-            raise ExtractorError('No formats available for this video')
+        vid = self._html_search_regex(
+            r'var vid ?= ?["\'](\d+)["\']',
+            webpage, 'video path')
+        vid_data = _fetch_data(vid, mytv)
 
-        # For now, we just pick the highest available quality
-        vid_id = vid_ids[-1]
+        formats_json = {}
+        for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
+            vid_id = vid_data['data'].get('%sVid' % format_id)
+            if not vid_id:
+                continue
+            vid_id = compat_str(vid_id)
+            formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
 
-        format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
-        part_count = format_data['data']['totalBlocks']
-        allot = format_data['allot']
-        prot = format_data['prot']
-        clipsURL = format_data['data']['clipsURL']
-        su = format_data['data']['su']
+        part_count = vid_data['data']['totalBlocks']
 
         playlist = []
         for i in range(part_count):
-            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
-                        (allot, prot, clipsURL[i], su[i]))
-            part_str = self._download_webpage(
-                part_url, video_id,
-                note='Downloading part %d of %d' % (i + 1, part_count))
-
-            part_info = part_str.split('|')
-            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
-
-            video_info = {
-                'id': '%s_part%02d' % (video_id, i + 1),
+            formats = []
+            for format_id, format_data in formats_json.items():
+                allot = format_data['allot']
+                prot = format_data['prot']
+
+                data = format_data['data']
+                clips_url = data['clipsURL']
+                su = data['su']
+
+                part_str = self._download_webpage(
+                    'http://%s/?prot=%s&file=%s&new=%s' %
+                    (allot, prot, clips_url[i], su[i]),
+                    video_id,
+                    'Downloading %s video URL part %d of %d'
+                    % (format_id, i + 1, part_count))
+
+                part_info = part_str.split('|')
+                video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+                formats.append({
+                    'url': video_url,
+                    'format_id': format_id,
+                    'filesize': data['clipsBytes'][i],
+                    'width': data['width'],
+                    'height': data['height'],
+                    'fps': data['fps'],
+                })
+            self._sort_formats(formats)
+
+            playlist.append({
+                'id': '%s_part%d' % (video_id, i + 1),
                 'title': title,
-                'url': video_url,
-                'ext': 'mp4',
-            }
-            playlist.append(video_info)
+                'duration': vid_data['data']['clipsDuration'][i],
+                'formats': formats,
+            })
 
         if len(playlist) == 1:
             info = playlist[0]
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
index 2f57f5b7c..1a57aebf1 100644
--- a/youtube_dl/extractor/sportdeutschland.py
+++ b/youtube_dl/extractor/sportdeutschland.py
@@ -60,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor):
 
         categories = list(data.get('section', {}).get('tags', {}).values())
         asset = data['asset']
+        assets_info = self._download_json(asset['url'], video_id)
 
         formats = []
-        smil_url = asset['video']
+        smil_url = assets_info['video']
         if '.smil' in smil_url:
             m3u8_url = smil_url.replace('.smil', '.m3u8')
             formats.extend(
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index 263f09b46..8a333f1d2 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
-        description = self._html_search_meta('description', webpage, 'description')
+        title = self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
         thumbnail = self._html_search_regex(
             r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
 
         duration = parse_duration(self._search_regex(
-            r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
+            r'itemprop="duration">\s*(\d+:\d+)\s*<',
+            webpage, 'duration', fatal=False))
 
         view_count = int_or_none(self._html_search_regex(
-            r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
+            r'class="views">\s*(\d+)\s*<',
+            webpage, 'view count', fatal=False))
         comment_count = int_or_none(self._html_search_regex(
-            r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
+            r'(\d+)</b> Comments?',
+            webpage, 'comment count', fatal=False))
 
         formats = []
         quality = qualities(['mp4', 'flv'])
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py
new file mode 100644
index 000000000..e54145105
--- /dev/null
+++ b/youtube_dl/extractor/teletask.py
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class TeleTaskIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.tele-task.de/archive/video/html5/26168/',
+        'info_dict': {
+            'title': 'Duplicate Detection',
+        },
+        'playlist': [{
+            'md5': '290ef69fb2792e481169c3958dbfbd57',
+            'info_dict': {
+                'id': '26168-speaker',
+                'ext': 'mp4',
+                'title': 'Duplicate Detection',
+                'upload_date': '20141218',
+            }
+        }, {
+            'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
+            'info_dict': {
+                'id': '26168-slides',
+                'ext': 'mp4',
+                'title': 'Duplicate Detection',
+                'upload_date': '20141218',
+            }
+        }]
+    }
+
+    def _real_extract(self, url):
+        lecture_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, lecture_id)
+
+        title = self._html_search_regex(
+            r'itemprop="name">([^<]+)</a>', webpage, 'title')
+        upload_date = unified_strdate(self._html_search_regex(
+            r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
+
+        entries = [{
+            'id': '%s-%s' % (lecture_id, format_id),
+            'url': video_url,
+            'title': title,
+            'upload_date': upload_date,
+        } for format_id, video_url in re.findall(
+            r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
+
+        return self.playlist_result(entries, lecture_id, title)