X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=cea30dad81fa4224a848732159aa19684c7d5dbc;hb=33d94a6c999ae784be7529aaaea42adadeab0c27;hp=cfaef29045d95d45fbf7a8baf0b70874e881d0d7;hpb=4cc391461ae5cb9f2626172426a10a146458aa96;p=youtube-dl.git
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index cfaef2904..cea30dad8 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -213,9 +213,9 @@ class YoutubeIE(InfoExtractor):
return srt
def _print_formats(self, formats):
- print 'Available formats:'
+ print('Available formats:')
for x in formats:
- print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
+ print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
def _real_initialize(self):
if self._downloader is None:
@@ -238,7 +238,7 @@ class YoutubeIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
- self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
# Set language
@@ -247,7 +247,7 @@ class YoutubeIE(InfoExtractor):
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
return
# No authentication to be performed
@@ -270,7 +270,7 @@ class YoutubeIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
# Confirm age
@@ -283,7 +283,7 @@ class YoutubeIE(InfoExtractor):
self.report_age_confirmation()
age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -305,7 +305,7 @@ class YoutubeIE(InfoExtractor):
try:
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Attempt to extract SWF player URL
@@ -327,7 +327,7 @@ class YoutubeIE(InfoExtractor):
if 'token' in video_info:
break
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if 'token' not in video_info:
if 'reason' in video_info:
@@ -390,7 +390,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list:
@@ -407,7 +407,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@@ -526,7 +526,7 @@ class MetacafeIE(InfoExtractor):
self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
return
# Confirm age
@@ -539,7 +539,7 @@ class MetacafeIE(InfoExtractor):
self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -563,7 +563,7 @@ class MetacafeIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -656,7 +656,7 @@ class DailymotionIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -754,7 +754,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -793,7 +793,7 @@ class GoogleIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'
', webpage)
if mobj is None:
@@ -849,7 +849,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -919,7 +919,7 @@ class YahooIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -943,7 +943,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract uploader and title from webpage
@@ -1001,7 +1001,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract media URL from playlist XML
@@ -1030,7 +1030,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
+ _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@@ -1059,7 +1059,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Now we begin extracting as much information as we can from what we
@@ -1100,21 +1100,32 @@ class VimeoIE(InfoExtractor):
timestamp = config['request']['timestamp']
# Vimeo specific: extract video codec and quality information
+ # First consider quality, then codecs, then take everything
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
- for codec in codecs:
- if codec[0] in config["video"]["files"]:
- video_codec = codec[0]
- video_extension = codec[1]
- if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
- else: quality = 'sd'
+ files = { 'hd': [], 'sd': [], 'other': []}
+ for codec_name, codec_extension in codecs:
+ if codec_name in config["video"]["files"]:
+ if 'hd' in config["video"]["files"][codec_name]:
+ files['hd'].append((codec_name, codec_extension, 'hd'))
+ elif 'sd' in config["video"]["files"][codec_name]:
+ files['sd'].append((codec_name, codec_extension, 'sd'))
+ else:
+ files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+
+ for quality in ('hd', 'sd', 'other'):
+ if len(files[quality]) > 0:
+ video_quality = files[quality][0][2]
+ video_codec = files[quality][0][0]
+ video_extension = files[quality][0][1]
+ self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
break
else:
self._downloader.trouble(u'ERROR: no known codec found')
return
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- %(video_id, sig, timestamp, quality, video_codec.upper())
+ %(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
@@ -1129,6 +1140,143 @@ class VimeoIE(InfoExtractor):
}]
+class ArteTvIE(InfoExtractor):
+ """arte.tv information extractor."""
+
+ _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+ _LIVE_URL = r'index-[0-9]+\.html$'
+
+ IE_NAME = u'arte.tv'
+
+ def __init__(self, downloader=None):
+ InfoExtractor.__init__(self, downloader)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
+
+ def report_extraction(self, video_id):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
+
+ def fetch_webpage(self, url):
+ self._downloader.increment_downloads()
+ request = urllib2.Request(url)
+ try:
+ self.report_download_webpage(url)
+ webpage = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ return
+ except ValueError, err:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+ return webpage
+
+ def grep_webpage(self, url, regex, regexFlags, matchTuples):
+ page = self.fetch_webpage(url)
+ mobj = re.search(regex, page, regexFlags)
+ info = {}
+
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+
+ for (i, key, err) in matchTuples:
+ if mobj.group(i) is None:
+ self._downloader.trouble(err)
+ return
+ else:
+ info[key] = mobj.group(i)
+
+ return info
+
+ def extractLiveStream(self, url):
+ video_lang = url.split('/')[-4]
+ info = self.grep_webpage(
+ url,
+ r'src="(.*?/videothek_js.*?\.js)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ http_host = url.split('/')[2]
+ next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
+ info = self.grep_webpage(
+ next_url,
+ r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
+ '(http://.*?\.swf).*?' +
+ '(rtmp://.*?)\'',
+ re.DOTALL,
+ [
+ (1, 'path', u'ERROR: could not extract video path: %s' % url),
+ (2, 'player', u'ERROR: could not extract video player: %s' % url),
+ (3, 'url', u'ERROR: could not extract video url: %s' % url)
+ ]
+ )
+ video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+
+ def extractPlus7Stream(self, url):
+ video_lang = url.split('/')[-3]
+ info = self.grep_webpage(
+ url,
+ r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ next_url = urllib.unquote(info.get('url'))
+ info = self.grep_webpage(
+ next_url,
+ r'