]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge remote-tracking branch 'yan12125/IE_Letv'
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 26 Feb 2015 00:26:55 +0000 (01:26 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 26 Feb 2015 00:26:55 +0000 (01:26 +0100)
15 files changed:
AUTHORS
README.md
test/test_utils.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/airmozilla.py [new file with mode: 0644]
youtube_dl/extractor/eporner.py
youtube_dl/extractor/escapist.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/rtlnow.py
youtube_dl/extractor/telecinco.py
youtube_dl/options.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index bdd2a15dcf910938857ed3d3fb1161c3fd72b72e..4674a5af3cf129b33c716f88f323038477110c05 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -112,3 +112,4 @@ Frans de Jonge
 Robin de Rooij
 Ryan Schmidt
 Leslie P. Polzer
+Duncan Keall
index 699401b49b64f75b2a821f2b1abf86a7f050accb..2c53e22115eb7caaab770e876b606c004e527aca 100644 (file)
--- a/README.md
+++ b/README.md
@@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      dislike_count <? 50 & description" .
     --no-playlist                    If the URL refers to a video and a
                                      playlist, download only the video.
+    --yes-playlist                   If the URL refers to a video and a
+                                     playlist, download the playlist.
     --age-limit YEARS                download only videos suitable for the given
                                      age
     --download-archive FILE          Download only videos not listed in the
index 2f8996d7bb5088e4c38470974b41e268fb260377..3fba8ae11c3b516d86d82051bb179ac5e15b0a91 100644 (file)
@@ -246,6 +246,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('2.5 hours'), 9000)
         self.assertEqual(parse_duration('02:03:04'), 7384)
         self.assertEqual(parse_duration('01:02:03:04'), 93784)
+        self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
index 7b7d41adf1ec8b6f06789c633f5e8167901f3034..ddb9d6670e9cacbe7cb2f4ff89e7ab0d9a3bd93a 100644 (file)
@@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
 from .adultswim import AdultSwimIE
 from .aftenposten import AftenpostenIE
 from .aftonbladet import AftonbladetIE
+from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .anitube import AnitubeIE
diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py
new file mode 100644 (file)
index 0000000..611ad1e
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class AirMozillaIE(InfoExtractor):
+    _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
+    _TEST = {
+        'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
+        'md5': '2e3e7486ba5d180e829d453875b9b8bf',
+        'info_dict': {
+            'id': '6x4q2w',
+            'ext': 'mp4',
+            'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
+            'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
+            'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
+            'timestamp': 1422487800,
+            'upload_date': '20150128',
+            'location': 'SFO Commons',
+            'duration': 3780,
+            'view_count': int,
+            'categories': ['Main'],
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
+
+        embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
+        jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
+        metadata = self._parse_json(jwconfig, video_id)
+
+        formats = [{
+            'url': source['file'],
+            'ext': source['type'],
+            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
+            'format': source['label'],
+            'height': int(source['label'].rstrip('p')),
+        } for source in metadata['playlist'][0]['sources']]
+        self._sort_formats(formats)
+
+        view_count = int_or_none(self._html_search_regex(
+            r'Views since archived: ([0-9]+)',
+            webpage, 'view count', fatal=False))
+        timestamp = parse_iso8601(self._html_search_regex(
+            r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
+        duration = parse_duration(self._search_regex(
+            r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
+            webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'url': self._og_search_url(webpage),
+            'display_id': display_id,
+            'thumbnail': metadata['playlist'][0].get('image'),
+            'description': self._og_search_description(webpage),
+            'timestamp': timestamp,
+            'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
+            'duration': duration,
+            'view_count': view_count,
+            'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
+        }
index 4de8d4bc5c9107ddc361a8351ea4a63d3da40783..e006921ec3f8d2a0aff0e6bb0595148469b1c256 100644 (file)
@@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
         title = self._html_search_regex(
             r'<title>(.*?) - EPORNER', webpage, 'title')
 
-        redirect_code = self._html_search_regex(
-            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
-            webpage, 'redirect_code')
-        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
         player_code = self._download_webpage(
             redirect_url, display_id, note='Downloading player config')
 
@@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'formats': formats,
-            'age_limit': self._rta_search(webpage),
+            'age_limit': 18,
         }
index 51ffec7ee381ec74b78442a054825592ff128446..b45c1dbd07650d9717408591c7b20077bf62475e 100644 (file)
@@ -44,14 +44,15 @@ class EscapistIE(InfoExtractor):
         config_url = compat_urllib_parse.unquote(self._html_search_regex(
             r'''(?x)
             (?:
-                <param\s+name="flashvars"\s+value="config=|
+                <param\s+name="flashvars".*?\s+value="config=|
                 flashvars=&quot;config=
             )
-            ([^"&]+)
+            (https?://[^"&]+)
             ''',
             webpage, 'config URL'))
 
         formats = []
+        ad_formats = []
 
         def _add_format(name, cfgurl, quality):
             config = self._download_json(
@@ -61,14 +62,19 @@ class EscapistIE(InfoExtractor):
                 transform_source=js_to_json)
 
             playlist = config['playlist']
-            video_url = next(
-                p['url'] for p in playlist
-                if p.get('eventCategory') == 'Video')
-            formats.append({
-                'url': video_url,
-                'format_id': name,
-                'quality': quality,
-            })
+            for p in playlist:
+                if p.get('eventCategory') == 'Video':
+                    ar = formats
+                elif p.get('eventCategory') == 'Video Postroll':
+                    ar = ad_formats
+                else:
+                    continue
+
+                ar.append({
+                    'url': p['url'],
+                    'format_id': name,
+                    'quality': quality,
+                })
 
         _add_format('normal', config_url, quality=0)
         hq_url = (config_url +
@@ -77,10 +83,9 @@ class EscapistIE(InfoExtractor):
             _add_format('hq', hq_url, quality=1)
         except ExtractorError:
             pass  # That's fine, we'll just use normal quality
-
         self._sort_formats(formats)
 
-        return {
+        res = {
             'id': video_id,
             'formats': formats,
             'uploader': uploader,
@@ -89,3 +94,19 @@ class EscapistIE(InfoExtractor):
             'thumbnail': self._og_search_thumbnail(webpage),
             'description': description,
         }
+
+        if self._downloader.params.get('include_ads') and ad_formats:
+            self._sort_formats(ad_formats)
+            ad_res = {
+                'id': '%s-ad' % video_id,
+                'title': '%s (Postroll)' % title,
+                'formats': ad_formats,
+            }
+            return {
+                '_type': 'playlist',
+                'entries': [res, ad_res],
+                'title': title,
+                'id': video_id,
+            }
+
+        return res
index 875e1bf05ff274a41f46518c48e990954b7e12e5..3aff57e30302d3c33ce5e468f9df642cda0f6ff8 100644 (file)
@@ -1208,7 +1208,9 @@ class GenericIE(InfoExtractor):
             return entries[0]
         else:
             for num, e in enumerate(entries, start=1):
-                e['title'] = '%s (%d)' % (e['title'], num)
+                # 'url' results don't have a title
+                if e.get('title') is not None:
+                    e['title'] = '%s (%d)' % (e['title'], num)
             return {
                 '_type': 'playlist',
                 'entries': entries,
index 2567583235617e52b6420419863dbc8d319c8201..d8897eb90d526b7b7d2e5a5ace5bec84ebb40031 100644 (file)
@@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
     IE_NAME = 'mitele.es'
     _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
         'md5': '6a75fe9d0d3275bead0cb683c616fddb',
         'info_dict': {
@@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
             'display_id': 'programa-144',
             'duration': 2913,
         },
-    }
+    }]
 
     def _real_extract(self, url):
         episode = self._match_id(url)
index 46f493cfca6d6926d9ba6c8b1cf73aa73fe15476..1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -158,7 +159,9 @@ class NRKTVIE(InfoExtractor):
     def _get_subtitles(self, subtitlesurl, video_id, baseurl):
         url = "%s%s" % (baseurl, subtitlesurl)
         self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
+            url, video_id, 'Downloading subtitles',
+            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
         lang = captions.get('lang', 'no')
         ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
         srt = ''
@@ -167,8 +170,7 @@ class NRKTVIE(InfoExtractor):
             duration = parse_duration(p.get('dur'))
             starttime = self._seconds2str(begin)
             endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
         return {lang: [
             {'ext': 'ttml', 'url': url},
             {'ext': 'srt', 'data': srt},
index fd93cc66f5e1d377341cc016e67825b0f7f2782d..785a8045e09d65f31405fbd15106dbeb684afcc9 100644 (file)
@@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
                 mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
                 if mobj:
                     fmt = {
-                        'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
+                        'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
                         'play_path': 'mp4:' + mobj.group('play_path'),
                         'page_url': url,
                         'player_url': video_page_url + 'includes/vodplayer.swf',
index be3f72df7c11043346b015528ae905913a3d05df..251a686804b6f26915c3fa25d9f6b2cc1f98ed4b 100644 (file)
@@ -6,9 +6,9 @@ from .mitele import MiTeleIE
 
 class TelecincoIE(MiTeleIE):
     IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
         'info_dict': {
             'id': 'MDSVID20141015_0058',
@@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
             'title': 'Con Martín Berasategui, hacer un bacalao al ...',
             'duration': 662,
         },
-    }
+    }, {
+        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+        'only_matching': True,
+    }]
index 5c2d153b13b9e060e4f5b4d1d9991d10a72413c4..886ce96132d4ec6b67b3bf3114425463f6687597 100644 (file)
@@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
         '--no-playlist',
         action='store_true', dest='noplaylist', default=False,
         help='If the URL refers to a video and a playlist, download only the video.')
+    selection.add_option(
+        '--yes-playlist',
+        action='store_false', dest='noplaylist', default=False,
+        help='If the URL refers to a video and a playlist, download the playlist.')
     selection.add_option(
         '--age-limit',
         metavar='YEARS', dest='age_limit', default=None, type=int,
index e2631dccda7a13679da2280b1e8d38301b7ffe47..1f3bfef7d562e3fb0e63db16e644b86819eaaa5c 100644 (file)
@@ -54,7 +54,7 @@ from .compat import (
 compiled_regex_type = type(re.compile(''))
 
 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Encoding': 'gzip, deflate',
@@ -1290,6 +1290,7 @@ def parse_duration(s):
             (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
             (?P<only_hours>[0-9.]+)\s*(?:hours?)|
 
+            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
             (?:
                 (?:
                     (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@@ -1308,10 +1309,14 @@ def parse_duration(s):
         return float_or_none(m.group('only_hours'), invscale=60 * 60)
     if m.group('secs'):
         res += int(m.group('secs'))
+    if m.group('mins_reversed'):
+        res += int(m.group('mins_reversed')) * 60
     if m.group('mins'):
         res += int(m.group('mins')) * 60
     if m.group('hours'):
         res += int(m.group('hours')) * 60 * 60
+    if m.group('hours_reversed'):
+        res += int(m.group('hours_reversed')) * 60 * 60
     if m.group('days'):
         res += int(m.group('days')) * 24 * 60 * 60
     if m.group('ms'):
index d23c6ae3d7c622f285aa7bb0e432b082fde29472..0cbf66ed1e12c1e9f5d801932957e2db85590374 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.02.24.2'
+__version__ = '2015.02.26'