]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge branch 'izlesene' of https://github.com/naglis/youtube-dl into naglis-izlesene
authorSergey M․ <dstftw@gmail.com>
Fri, 1 Aug 2014 11:16:47 +0000 (18:16 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 1 Aug 2014 11:16:47 +0000 (18:16 +0700)
19 files changed:
README.md
test/test_playlists.py
youtube_dl/__init__.py
youtube_dl/downloader/f4m.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/blinkx.py
youtube_dl/extractor/bloomberg.py
youtube_dl/extractor/br.py
youtube_dl/extractor/common.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/gamestar.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/godtube.py [new file with mode: 0644]
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/vidme.py [new file with mode: 0644]
youtube_dl/version.py

index af880ccc24444d91d41bee4e95e9f4dec259947e..a42dfb8567ffb86e926756aec59cbee94cee29bf 100644 (file)
--- a/README.md
+++ b/README.md
@@ -46,15 +46,15 @@ which means you can modify it, redistribute it or use it however you like.
                                      an empty string (--proxy "") for direct
                                      connection
     --socket-timeout None            Time to wait before giving up, in seconds
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
     --default-search PREFIX          Use this prefix for unqualified URLs. For
                                      example "gvsearch2:" downloads two videos
                                      from google videos for  youtube-dl "large
                                      apple". Use the value "auto" to let
-                                     youtube-dl guess. The default value "error"
-                                     just throws an error.
+                                     youtube-dl guess ("auto_warning" to emit a
+                                     warning when guessing). "error" just throws
+                                     an error. The default value "fixup_error"
+                                     repairs broken URLs, but emits an error if
+                                     this is not possible instead of searching.
     --ignore-config                  Do not read configuration files. When given
                                      in the global configuration file /etc
                                      /youtube-dl.conf: do not read the user
@@ -213,6 +213,9 @@ which means you can modify it, redistribute it or use it however you like.
     --add-header FIELD:VALUE         specify a custom HTTP header and its value,
                                      separated by a colon ':'. You can use this
                                      option multiple times
+    --bidi-workaround                Work around terminals that lack
+                                     bidirectional text support. Requires bidiv
+                                     or fribidi executable in PATH
 
 ## Video Format Options:
     -f, --format FORMAT              video format code, specify the order of
index c221c47b99daabc668607f09744d078e726b09ff..4f188345bf2b9bd7fee5d886cbcfec3ba15dae6e 100644 (file)
@@ -193,10 +193,10 @@ class TestPlaylists(unittest.TestCase):
     def test_bandcamp_album(self):
         dl = FakeYDL()
         ie = BandcampAlbumIE(dl)
-        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+        result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
         self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Nightmare Night EP')
-        assertGreaterEqual(self, len(result['entries']), 4)
+        self.assertEqual(result['title'], 'Hierophany of the Open Grave')
+        assertGreaterEqual(self, len(result['entries']), 9)
         
     def test_smotri_community(self):
         dl = FakeYDL()
index 6ff0be00f0014ffe2b82cf18e382d5fb5158d643..429630ce5c61289140b6b0188bba32cbf7a153a0 100644 (file)
@@ -252,13 +252,10 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--socket-timeout', dest='socket_timeout',
         type=float, default=None, help=u'Time to wait before giving up, in seconds')
-    general.add_option(
-        '--bidi-workaround', dest='bidi_workaround', action='store_true',
-        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
     general.add_option(
         '--ignore-config',
         action='store_true',
@@ -386,6 +383,9 @@ def parseOpts(overrideArguments=None):
         dest='headers', action='append',
         help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
     )
+    workarounds.add_option(
+        '--bidi-workaround', dest='bidi_workaround', action='store_true',
+        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
 
     verbosity.add_option('-q', '--quiet',
             action='store_true', dest='quiet', help='activates quiet mode', default=False)
@@ -709,7 +709,7 @@ def _real_main(argv=None):
         date = DateRange.day(opts.date)
     else:
         date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
         parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
 
     # Do not download videos when there are audio-only formats
index e6be6ae6c878c9ede7cd2cf3b6be663e22bb8be1..71353f607daead364acbdad83b18b79e61a5bffa 100644 (file)
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
+        requested_bitrate = info_dict.get('tbr')
         self.to_screen('[download] Downloading f4m manifest')
         manifest = self.ydl.urlopen(man_url).read()
         self.report_destination(filename)
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
 
         doc = etree.fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
-        formats = sorted(formats, key=lambda f: f[0])
-        rate, media = formats[-1]
+        if requested_bitrate is None:
+            # get the best format
+            formats = sorted(formats, key=lambda f: f[0])
+            rate, media = formats[-1]
+        else:
+            rate, media = list(filter(
+                lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
         bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
         metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
index 2bf8bc5e86987f13fcf764e40e8b5878cd30e9ca..f6c0ee7951d11c73f0f1c10e7119aecf4fd0b95d 100644 (file)
@@ -112,9 +112,11 @@ from .funnyordie import FunnyOrDieIE
 from .gamekings import GamekingsIE
 from .gameone import GameOneIE
 from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
+from .godtube import GodTubeIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
@@ -346,6 +348,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
+from .vidme import VidmeIE
 from .vimeo import (
     VimeoIE,
     VimeoChannelIE,
index 30a85c8c1c8d1b3a10a40ac55a577e3402cb487a..957bdefcbec7666473530815df9e3eb5dc88e096 100644 (file)
@@ -8,6 +8,8 @@ from ..utils import (
     determine_ext,
     ExtractorError,
     qualities,
+    compat_urllib_parse_urlparse,
+    compat_urllib_parse,
 )
 
 
@@ -44,6 +46,9 @@ class ARDIE(InfoExtractor):
         else:
             video_id = m.group('video_id')
 
+        urlp = compat_urllib_parse_urlparse(url)
+        url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
+
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(
index 7d558e262ecea44df6b025f0db716b82d975b314..3e461e715e141b1ff4a294eb01b7657d16f05d4b 100644 (file)
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
                     'height': int(m['h']),
                 })
             elif m['type'] == 'original':
-                duration = m['d']
+                duration = float(m['d'])
             elif m['type'] == 'youtube':
                 yt_id = m['link']
                 self.to_screen('Youtube video detected: %s' % yt_id)
index 25fb79e146b18f50962ba506d01560fbd845dbf2..c51a97ce4327cff934216927948587131dedfa80 100644 (file)
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        # The md5 checksum changes
         'info_dict': {
             'id': 'qurhIVlJSB6hzkVi229d8g',
             'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
         return {
             'id': name.split('-')[-1],
             'title': title,
-            'url': f4m_url,
-            'ext': 'flv',
+            'formats': self._extract_f4m_formats(f4m_url, name),
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
         }
index f7f2f713a59446a68f806b7e62140bfcbc808313..86f0c2861e35f296f594a4ac45bbfe74b799d9e0 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
+    parse_duration,
 )
 
 
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
             'info_dict': {
                 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
                 'ext': 'mp4',
-                'title': 'Am 1. und 2. August in Oberammergau',
-                'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
+                'title': 'Wenn das Traditions-Theater wackelt',
+                'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
+                'duration': 34,
             }
         },
         {
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Über den Pass',
                 'description': 'Die Eroberung der Alpen: Über den Pass',
+                'duration': 2588,
             }
         },
         {
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
                 'ext': 'aac',
                 'title': '"Keine neuen Schulden im nächsten Jahr"',
                 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+                'duration': 64,
             }
         },
         {
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Umweltbewusster Häuslebauer',
                 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+                'duration': 116,
             }
         },
         {
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Folge 1 - Metaphysik',
                 'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+                'duration': 893,
                 'uploader': 'Eva Maria Steimle',
                 'upload_date': '20140117',
             }
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
             media = {
                 'id': xml_media.get('externalId'),
                 'title': xml_media.find('title').text,
+                'duration': parse_duration(xml_media.find('duration').text),
                 'formats': self._extract_formats(xml_media.find('assets')),
                 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
                 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
index 52c00186e458fd131e28eb178ea09df8e9d9ce0d..342bfb8b3b53bcb76951613002090be8737bbe29 100644 (file)
@@ -18,6 +18,7 @@ from ..utils import (
     clean_html,
     compiled_regex_type,
     ExtractorError,
+    int_or_none,
     RegexNotFoundError,
     sanitize_filename,
     unescapeHTML,
@@ -590,6 +591,24 @@ class InfoExtractor(object):
         self.to_screen(msg)
         time.sleep(timeout)
 
+    def _extract_f4m_formats(self, manifest_url, video_id):
+        manifest = self._download_xml(
+            manifest_url, video_id, 'Downloading f4m manifest',
+            'Unable to download f4m manifest')
+
+        formats = []
+        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+            formats.append({
+                'url': manifest_url,
+                'ext': 'flv',
+                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'width': int_or_none(media_el.attrib.get('width')),
+                'height': int_or_none(media_el.attrib.get('height')),
+            })
+        self._sort_formats(formats)
+
+        return formats
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 1fbe6d1759b8900160b7bc94b0a2396406acc016..1b0e8e5d59dc23d52d7fb15d7e46e0b1383a7435 100644 (file)
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             + video_id, video_id, 'Downloading XML config')
 
         manifest_url = info.find('videos/video/url').text
-        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
-        video_url = video_url.replace('/z/', '/i/')
+        manifest_url = manifest_url.replace('/z/', '/i/')
+        
+        if manifest_url.startswith('rtmp'):
+            formats = [{'url': manifest_url, 'ext': 'flv'}]
+        else:
+            formats = []
+            available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
+            for index, format_descr in enumerate(available_formats.split(',')):
+                format_info = {
+                    'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
+                    'ext': 'mp4',
+                }
+                m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
+                if m_resolution is not None:
+                    format_info.update({
+                        'width': int(m_resolution.group('width')),
+                        'height': int(m_resolution.group('height')),
+                    })
+                formats.append(format_info)
+
         thumbnail_path = info.find('image').text
 
-        return {'id': video_id,
-                'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
-                'url': video_url,
-                'title': info.find('titre').text,
-                'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
-                'description': info.find('synopsis').text,
-                }
+        return {
+            'id': video_id,
+            'title': info.find('titre').text,
+            'formats': formats,
+            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
+            'description': info.find('synopsis').text,
+        }
 
 
 class PluzzIE(FranceTVBaseInfoExtractor):
diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py
new file mode 100644 (file)
index 0000000..50f8fc7
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    str_to_int,
+    unified_strdate,
+)
+
+
+class GameStarIE(InfoExtractor):
+    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
+        'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
+        'info_dict': {
+            'id': '76110',
+            'ext': 'mp4',
+            'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
+            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
+            'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
+            'upload_date': '20140728',
+            'duration': 17
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        og_title = self._og_search_title(webpage)
+        title = og_title.replace(' - Video bei GameStar.de', '').strip()
+
+        url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
+
+        description = self._og_search_description(webpage).strip()
+
+        thumbnail = self._proto_relative_url(
+            self._og_search_thumbnail(webpage), scheme='http:')
+
+        upload_date = unified_strdate(self._html_search_regex(
+            r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)&nbsp;&nbsp;',
+            webpage, 'upload_date', fatal=False))
+
+        duration = parse_duration(self._html_search_regex(
+            r'&nbsp;&nbsp;Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
+            fatal=False))
+
+        view_count = str_to_int(self._html_search_regex(
+            r'&nbsp;&nbsp;Zuschauer: ([0-9\.]+)&nbsp;&nbsp;', webpage,
+            'view_count', fatal=False))
+
+        comment_count = int_or_none(self._html_search_regex(
+            r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
+            fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': url,
+            'ext': 'mp4',
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count
+        }
index 9db27f9aa32730460af728f690ec131014667185..bcb0765940df39656be9f78c1ab144976adb5e5e 100644 (file)
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
         if not parsed_url.scheme:
             default_search = self._downloader.params.get('default_search')
             if default_search is None:
-                default_search = 'error'
+                default_search = 'fixup_error'
 
-            if default_search in ('auto', 'auto_warning'):
+            if default_search in ('auto', 'auto_warning', 'fixup_error'):
                 if '/' in url:
                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                     return self.url_result('http://' + url)
-                else:
+                elif default_search != 'fixup_error':
                     if default_search == 'auto_warning':
                         if re.match(r'^(?:url|URL)$', url):
                             raise ExtractorError(
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
                             self._downloader.report_warning(
                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                     return self.url_result('ytsearch:' + url)
-            elif default_search == 'error':
+
+            if default_search in ('error', 'fixup_error'):
                 raise ExtractorError(
                     ('%r is not a valid URL. '
                      'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py
new file mode 100644 (file)
index 0000000..73bd6d8
--- /dev/null
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class GodTubeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
+    _TESTS = [
+        {
+            'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
+            'md5': '77108c1e4ab58f48031101a1a2119789',
+            'info_dict': {
+                'id': '0C0CNNNU',
+                'ext': 'mp4',
+                'title': 'Woman at the well.',
+                'duration': 159,
+                'timestamp': 1205712000,
+                'uploader': 'beverlybmusic',
+                'upload_date': '20080317',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        config = self._download_xml(
+            'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
+            video_id, 'Downloading player config XML')
+
+        video_url = config.find('.//file').text
+        uploader = config.find('.//author').text
+        timestamp = parse_iso8601(config.find('.//date').text)
+        duration = parse_duration(config.find('.//duration').text)
+        thumbnail = config.find('.//image').text
+
+        media = self._download_xml(
+            'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
+
+        title = media.find('.//title').text
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+        }
index 9faf3a5e3f677ae8b00454c492f6ef2bf129d329..172def221e1277298dc355a2cfdbea3ae4f9fdce 100644 (file)
@@ -1,4 +1,6 @@
 # coding: utf-8
+from __future__ import unicode_literals
+
 import re
 import time
 
@@ -10,18 +12,18 @@ from ..utils import (
 
 
 class StreamcloudIE(InfoExtractor):
-    IE_NAME = u'streamcloud.eu'
+    IE_NAME = 'streamcloud.eu'
     _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
 
     _TEST = {
-        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
-        u'file': u'skp9j99s4bpz.mp4',
-        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
-        u'info_dict': {
-            u'title': u'youtube-dl test video  \'/\\ ä ↭',
-            u'duration': 9,
+        'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
+        'md5': '6bea4c7fa5daaacc2a946b7146286686',
+        'info_dict': {
+            'id': 'skp9j99s4bpz',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video  \'/\\ ä ↭',
         },
-        u'skip': u'Only available from the EU'
+        'skip': 'Only available from the EU'
     }
 
     def _real_extract(self, url):
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
         req = compat_urllib_request.Request(url, post, headers)
 
         webpage = self._download_webpage(
-            req, video_id, note=u'Downloading video page ...')
+            req, video_id, note='Downloading video page ...')
         title = self._html_search_regex(
-            r'<h1[^>]*>([^<]+)<', webpage, u'title')
+            r'<h1[^>]*>([^<]+)<', webpage, 'title')
         video_url = self._search_regex(
-            r'file:\s*"([^"]+)"', webpage, u'video URL')
-        duration_str = self._search_regex(
-            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
-        duration = None if duration_str is None else int(duration_str)
+            r'file:\s*"([^"]+)"', webpage, 'video URL')
         thumbnail = self._search_regex(
-            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
+            r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
-            'duration': duration,
             'thumbnail': thumbnail,
         }
index 6c688c5202804986b6a0d6d154cb986e18789073..5d9d703673265ca4a53a54f28e34494d570cb206 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_duration
 
 
 class SWRMediathekIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
 
     _TESTS = [{
         'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
             'uploader': 'SWR 2',
             'uploader_id': '284670',
         }
+    }, {
+        'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+        'md5': '881531487d0633080a8cc88d31ef896f',
+        'info_dict': {
+            'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'Familienspaß am Bodensee',
+            'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 1784,
+            'upload_date': '20140727',
+            'uploader': 'SWR Fernsehen BW',
+            'uploader_id': '281130',
+        }
     }]
 
     def _real_extract(self, url):
index eada13ce920b9f4e892f952242ef87bfac504600..d2ffd1b6ba893f2cb2cc50f00a3131a835dba97d 100644 (file)
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
             self._downloader.report_warning(
                 'Cannot download SMIL information, falling back to JSON ..')
 
+        self._sort_formats(formats)
         timestamp_ms = int(self._search_regex(
             r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
 
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
new file mode 100644 (file)
index 0000000..5c89824
--- /dev/null
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    str_to_int,
+)
+
+
+class VidmeIE(InfoExtractor):
+    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
+    _TEST = {
+        'url': 'https://vid.me/QNB',
+        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+        'info_dict': {
+            'id': 'QNB',
+            'ext': 'mp4',
+            'title': 'Fishing for piranha - the easy way',
+            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
+            'duration': 119.92,
+            'timestamp': 1406313244,
+            'upload_date': '20140725',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default='')
+        thumbnail = self._og_search_thumbnail(webpage)
+        timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
+        width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
+        height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
+        duration = float_or_none(self._html_search_regex(
+            r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
+        view_count = str_to_int(self._html_search_regex(
+            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._html_search_regex(
+            r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
+            webpage, 'like count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
+            webpage, 'comment count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'width': width,
+            'height': height,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'comment_count': comment_count,
+        }
index e77494595dd2212b3940347d408b61ea300fcf34..6e7d56cf75bfcc0f0d74b1ac917dab02112be51b 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.07.25.1'
+__version__ = '2014.07.30'