]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge remote-tracking branch 'naglis/nosvideo'
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 1 Sep 2014 22:14:58 +0000 (00:14 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 1 Sep 2014 22:14:58 +0000 (00:14 +0200)
15 files changed:
test/test_all_urls.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/anysex.py
youtube_dl/extractor/beeg.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/drtuber.py [new file with mode: 0644]
youtube_dl/extractor/eporner.py
youtube_dl/extractor/hornbunny.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/sunporno.py
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index 5f1092939e1acb8492a2568bfca874e81964525e..84b05da39e1e28d0df4d65acb6248aa77d7b6b65 100644 (file)
@@ -143,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
         self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
 
-    def test_ComedyCentralShows(self):
-        self.assertMatch(
-            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
-            ['ComedyCentralShows'])
-
     def test_yahoo_https(self):
         # https://github.com/rg3/youtube-dl/issues/2701
         self.assertMatch(
index b1569505369a3669f46040d29ea1cd0277130460..bf616e3b60c6f6a012898e69f950fd4521a85101 100644 (file)
@@ -876,7 +876,7 @@ def _real_main(argv=None):
                 ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
             else:
                 if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
-                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
+                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir)
                     retcode = 141
                 else:
                     ydl.to_screen(
index dede0cde58d7d7663090e25359f9e70dfc19c06b..6c7668fe27c8196a3363d9fc11d6b25add806228 100644 (file)
@@ -70,6 +70,7 @@ from .daum import DaumIE
 from .dfb import DFBIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
+from .drtuber import DrTuberIE
 from .drtv import DRTVIE
 from .dump import DumpIE
 from .defense import DefenseGouvFrIE
index adeacba01b3bd77a501076f80757898ac090a3e1..bc64423a3f69cd3407701e7b02df333a4f7e628c 100644 (file)
@@ -21,6 +21,7 @@ class AnySexIE(InfoExtractor):
             'description': 'md5:de9e418178e2931c10b62966474e1383',
             'categories': ['Erotic'],
             'duration': 270,
+            'age_limit': 18,
         }
     }
 
@@ -43,7 +44,6 @@ class AnySexIE(InfoExtractor):
 
         duration = parse_duration(self._search_regex(
             r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
-
         view_count = int_or_none(self._html_search_regex(
             r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
 
@@ -57,4 +57,5 @@ class AnySexIE(InfoExtractor):
             'categories': categories,
             'duration': duration,
             'view_count': view_count,
+            'age_limit': 18,
         }
index c2692cfdcc0d7d6a94620a290fb4e7f365953891..d7301fe18ccf7f2324e3cb233ac93102ed076ba6 100644 (file)
@@ -17,6 +17,7 @@ class BeegIE(InfoExtractor):
             'description': 'md5:6db3c6177972822aaba18652ff59c773',
             'categories': list,  # NSFW
             'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
         }
     }
 
@@ -41,7 +42,9 @@ class BeegIE(InfoExtractor):
 
         categories_str = self._html_search_regex(
             r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
-        categories = categories_str.split(',')
+        categories = (
+            None if categories_str is None
+            else categories_str.split(','))
 
         return {
             'id': video_id,
@@ -50,4 +53,5 @@ class BeegIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'categories': categories,
+            'age_limit': 18,
         }
index c81ce5a96f03b539d2f5e98975218fcdd0ed861d..035046120152f264278b4edc4bd5b11e0183da98 100644 (file)
@@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor):
                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                           (?P<clip>
-                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
+                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                           )|
                           (?P<interview>
                               extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
                      (?:[?#].*|$)'''
-    _TEST = {
+    _TESTS = [{
         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
         'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
         'info_dict': {
@@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor):
             'uploader': 'thedailyshow',
             'title': 'thedailyshow kristen-stewart part 1',
         }
-    }
+    }, {
+        'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+        'only_matching': True,
+    }, {
+        'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+        'only_matching': True,
+    }, {
+        'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
+        'only_matching': True,
+    }, {
+        'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
+        'only_matching': True,
+    }]
 
     _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
 
@@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor):
                     'ext': self._video_extensions.get(format, 'mp4'),
                     'height': h,
                     'width': w,
+
+                    'format_note': 'HTTP 400 at the moment (patches welcome!)',
+                    'preference': -100,
                 })
                 formats.append({
                     'format_id': 'rtmp-%s' % format,
index 8453321c5995a7e752c8653d5f3d5a36fd1e7ee0..929dd1e97efd70e5699dc333d222fe7a97a8de9a 100644 (file)
@@ -677,9 +677,12 @@ class InfoExtractor(object):
                 }
                 codecs = last_info.get('CODECS')
                 if codecs:
-                    video, audio = codecs.split(',')
-                    f['vcodec'] = video.partition('.')[0]
-                    f['acodec'] = audio.partition('.')[0]
+                    # TODO: looks like video codec is not always necessarily goes first
+                    va_codecs = codecs.split(',')
+                    if va_codecs[0]:
+                        f['vcodec'] = va_codecs[0].partition('.')[0]
+                    if len(va_codecs) > 1 and va_codecs[1]:
+                        f['acodec'] = va_codecs[1].partition('.')[0]
                 resolution = last_info.get('RESOLUTION')
                 if resolution:
                     width_str, height_str = resolution.split('x')
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
new file mode 100644 (file)
index 0000000..9a13925
--- /dev/null
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DrTuberIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)'
+    _TEST = {
+        'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
+        'md5': '93e680cf2536ad0dfb7e74d94a89facd',
+        'info_dict': {
+            'id': '1740434',
+            'ext': 'mp4',
+            'title': 'Hot Perky Blonde Naked Golf',
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'<source src="([^"]+)"', webpage, 'video URL')
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
+
+        thumbnail = self._html_search_regex(
+            r'poster="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        cats_str = self._html_search_regex(
+            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
+        categories = None if cats_str is None else cats_str.split(' ')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'age_limit': self._rta_search(webpage),
+        }
index 4c2c074cbcf0c13a147ab1e0856bb0e8f6efc1d9..6926fcda327e848809dc791feeea7ac9bb99da9d 100644 (file)
@@ -21,6 +21,7 @@ class EpornerIE(InfoExtractor):
             'title': 'Infamous Tiffany Teen Strip Tease Video',
             'duration': 194,
             'view_count': int,
+            'age_limit': 18,
         }
     }
 
@@ -35,9 +36,10 @@ class EpornerIE(InfoExtractor):
             r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
             webpage, 'redirect_code')
         redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
-        webpage2 = self._download_webpage(redirect_url, video_id)
+        player_code = self._download_webpage(
+            redirect_url, video_id, note='Downloading player config')
         video_url = self._html_search_regex(
-            r'file: "(.*?)",', webpage2, 'video_url')
+            r'file: "(.*?)",', player_code, 'video_url')
 
         duration = parse_duration(self._search_regex(
             r'class="mbtim">([0-9:]+)</div>', webpage, 'duration',
@@ -52,4 +54,5 @@ class EpornerIE(InfoExtractor):
             'title': title,
             'duration': duration,
             'view_count': view_count,
+            'age_limit': self._rta_search(webpage),
         }
index a42fba0cbaf548247d9fbb77365a211fc4a1ca30..7e7714438ce9099e4d7f8d6efe9f1204dc6f2690 100644 (file)
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
+
 
 class HornBunnyIE(InfoExtractor):
     _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
@@ -15,7 +19,8 @@ class HornBunnyIE(InfoExtractor):
             'id': '5227',
             'ext': 'flv',
             'title': 'panty slut jerk off instruction',
-            'duration': 550
+            'duration': 550,
+            'age_limit': 18,
         }
     }
 
@@ -23,16 +28,22 @@ class HornBunnyIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
-        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'class="title">(.*?)</h2>', webpage, 'title')
-        redirect_url = self._html_search_regex(r'pg&settings=(.*?)\|0"\);', webpage, 'title')
+        webpage = self._download_webpage(
+            url, video_id, note='Downloading initial webpage')
+        title = self._html_search_regex(
+            r'class="title">(.*?)</h2>', webpage, 'title')
+        redirect_url = self._html_search_regex(
+            r'pg&settings=(.*?)\|0"\);', webpage, 'title')
         webpage2 = self._download_webpage(redirect_url, video_id)
-        video_url = self._html_search_regex(r'flvMask:(.*?);', webpage2, 'video_url')
+        video_url = self._html_search_regex(
+            r'flvMask:(.*?);', webpage2, 'video_url')
         
-        mobj = re.search(r'<strong>Runtime:</strong> (?P<minutes>\d+):(?P<seconds>\d+)</div>', webpage)
-        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
-
-        view_count = self._html_search_regex(r'<strong>Views:</strong>  (\d+)</div>', webpage, 'view count', fatal=False)
+        duration = parse_duration(self._search_regex(
+            r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
+            webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'<strong>Views:</strong>\s*(\d+)</div>',
+            webpage, 'view count', fatal=False))
 
         return {
             'id': video_id,
@@ -40,5 +51,6 @@ class HornBunnyIE(InfoExtractor):
             'title': title,
             'ext': 'flv',
             'duration': duration,
-            'view_count': int_or_none(view_count),
+            'view_count': view_count,
+            'age_limit': 18,
         }
index 12e85a716fec900cf01d72157ab4159bc69ae8f8..902d6294498dec327699f51a1bc309f4557b3f87 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     unified_strdate,
+    qualities,
 )
 
 
@@ -17,7 +18,7 @@ class NPOIE(InfoExtractor):
         'md5': '4b3f9c429157ec4775f2c9cb7b911016',
         'info_dict': {
             'id': 'VPWON_1220719',
-            'ext': 'mp4',
+            'ext': 'm4v',
             'title': 'Nieuwsuur',
             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
             'upload_date': '20140622',
@@ -39,24 +40,32 @@ class NPOIE(InfoExtractor):
             video_id,
             note='Downloading token'
         )
-        token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
-        streams_info = self._download_json(
-            'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
-            video_id
-        )
+        token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
 
-        stream_info = self._download_json(
-            streams_info['streams'][0] + '&type=json',
-            video_id,
-            'Downloading stream info'
-        )
+        formats = []
+        quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
+        for format_id in metadata['pubopties']:
+            streams_info = self._download_json(
+                'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
+                video_id, 'Downloading %s streams info' % format_id)
+            stream_info = self._download_json(
+                streams_info['streams'][0] + '&type=json',
+                video_id, 'Downloading %s stream info' % format_id)
+            if format_id == 'adaptive':
+                formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
+            else:
+                formats.append({
+                    'url': stream_info['url'],
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': metadata['titel'],
-            'ext': 'mp4',
-            'url': stream_info['url'],
             'description': metadata['info'],
             'thumbnail': metadata['images'][-1]['url'],
             'upload_date': unified_strdate(metadata['gidsdatum']),
+            'formats': formats,
         }
index c7a46eb718ea6beecd1b8b1998334b20bdd43943..7de3c9dd5014586a118ab3eb55365897d160945d 100644 (file)
@@ -23,6 +23,7 @@ class SunPornoIE(InfoExtractor):
             'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 302,
+            'age_limit': 18,
         }
     }
 
@@ -65,4 +66,5 @@ class SunPornoIE(InfoExtractor):
             'view_count': view_count,
             'comment_count': comment_count,
             'formats': formats,
+            'age_limit': 18,
         }
index 78f3b7e7bddb1b085c96ae41696837178c2f42aa..08a04737c3497eab4c43786a4f3e7e0a45417131 100644 (file)
@@ -1055,21 +1055,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         self._login()
 
     def _ids_to_results(self, ids):
-        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
-                       for vid_id in ids]
+        return [
+            self.url_result(vid_id, 'Youtube', video_id=vid_id)
+            for vid_id in ids]
 
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a a single video
         # the id of the playlist is just 'RD' + video_id
         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
-        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        webpage = self._download_webpage(
+            url, playlist_id, u'Downloading Youtube mix')
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-        title_span = (search_title('playlist-title') or
-            search_title('title long-title') or search_title('title'))
+        title_span = (
+            search_title('playlist-title') or
+            search_title('title long-title') or
+            search_title('title'))
         title = clean_html(title_span)
-        video_re = r'''(?x)data-video-username=".*?".*?
-                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
-        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
+        ids = orderedSet(re.findall(
+            r'''(?xs)data-video-username=".*?".*?
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+            webpage))
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
@@ -1162,6 +1167,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
             msg = u'Downloading Youtube mix'
             if i > 0:
                 msg += ', retry #%d' % i
+
             webpage = self._download_webpage(url, title, msg)
             ids = orderedSet(re.findall(video_re, webpage))
             if ids:
index 8a36e619ae7246da1f18a4d5fd6cee7b364b81b7..6fe05723484303839947357124fc9d9ebfadcf4e 100644 (file)
@@ -1141,10 +1141,10 @@ else:
     import fcntl
 
     def _lock_file(f, exclusive):
-        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 
     def _unlock_file(f):
-        fcntl.lockf(f, fcntl.LOCK_UN)
+        fcntl.flock(f, fcntl.LOCK_UN)
 
 
 class locked_file(object):
index 59f6b473653e0a959218b6ec414777145e50ef29..c9005afe152c7d23af315163f0b0ce583bbf7b40 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.09.01.1'
+__version__ = '2014.09.01.2'