X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube-dl;h=c526071e5c6f110a5222cec60a236d5865e0c7d0;hb=c8619e01637ae33ff6ed2a770a6222d792cf0771;hp=ba760da477625c1def3168182b0781f595867fbc;hpb=f995f7127c42b1f912bfbfd9f35b22267c9bf3e7;p=youtube-dl.git diff --git a/youtube-dl b/youtube-dl index ba760da47..c526071e5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -239,6 +239,48 @@ class FileDownloader(object): """Report download finished.""" self.to_stdout(u'') + def process_info(self, info_dict): + """Process a single dictionary returned by an InfoExtractor.""" + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'] + if self.params.get('forceurl', False): + print info_dict['url'] + + # Do nothing else if in simulate mode + if self.params.get('simulate', False): + return 0 + + try: + filename = self.params['outtmpl'] % info_dict + self.report_destination(filename) + except (ValueError, KeyError), err: + return self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + if self.params['nooverwrites'] and os.path.exists(filename): + self.to_stderr('WARNING: file exists: %s; skipping' % filename) + return 0 + try: + self.pmkdir(filename) + except (OSError, IOError), err: + return self.trouble('ERROR: unable to create directories: %s' % str(err)) + try: + outstream = open(filename, 'wb') + except (OSError, IOError), err: + return self.trouble('ERROR: unable to open for writing: %s' % str(err)) + try: + self._do_download(outstream, info_dict['url']) + outstream.close() + except (OSError, IOError), err: + return self.trouble('ERROR: unable to write video data: %s' % str(err)) + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + return self.trouble('ERROR: unable to download video data: %s' % str(err)) + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + return self.trouble('ERROR: postprocessing: %s' % str(err)) + + return 0 + def download(self, url_list): """Download a given list of URLs.""" retcode = 0 @@ -248,64 +290,36 @@ class FileDownloader(object): for url in url_list: suitable_found = False for ie in self._ies: + # Go to next InfoExtractor if not suitable if not ie.suitable(url): continue + # Suitable InfoExtractor found suitable_found = True + + # Extract information from URL all_results = ie.extract(url) results = [x for x in all_results if x is not None] + + # See if there were problems extracting any information if len(results) != len(all_results): retcode = self.trouble() + # Two results could go to the same file if len(results) > 1 and self.fixed_template(): raise SameFileError(self.params['outtmpl']) + # Process each result for result in results: - # Forced printings - if self.params.get('forcetitle', False): - print result['title'] - if self.params.get('forceurl', False): - print result['url'] - - # Do nothing else if in simulate mode - if self.params.get('simulate', False): - continue - - try: - filename = self.params['outtmpl'] % result - self.report_destination(filename) - except (ValueError, KeyError), err: - retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) - continue - if self.params['nooverwrites'] and os.path.exists(filename): - self.to_stderr('WARNING: file exists: %s; skipping' % filename) - continue - try: - self.pmkdir(filename) - except (OSError, IOError), err: - retcode = self.trouble('ERROR: unable to create directories: %s' % str(err)) - continue - try: - outstream = open(filename, 'wb') - except (OSError, IOError), err: - retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err)) - continue - try: - self._do_download(outstream, result['url']) - outstream.close() - except (OSError, IOError), err: - retcode = self.trouble('ERROR: unable to write video data: %s' % str(err)) - continue - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - retcode = self.trouble('ERROR: unable to download video data: %s' % str(err)) - continue - try: - self.post_process(filename, result) - except (PostProcessingError), err: - retcode = self.trouble('ERROR: postprocessing: %s' % str(err)) - continue + result = self.process_info(result) + # Do not overwrite an error code with a success code + if result != 0: + retcode = result + + # Suitable InfoExtractor had been found; go to next URL break + if not suitable_found: retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) @@ -435,6 +449,29 @@ class YoutubeIE(InfoExtractor): def suitable(url): return (re.match(YoutubeIE._VALID_URL, url) is not None) + @staticmethod + def htmlentity_transform(matchobj): + """Transforms an HTML entity to a Unicode character.""" + entity = matchobj.group(1) + + # Known non-numeric HTML entity + if entity in htmlentitydefs.name2codepoint: + return unichr(htmlentitydefs.name2codepoint[entity]) + + # Unicode character + mobj = re.match(ur'(?u)#(x?\d+)', entity) + if mobj is not None: + numstr = mobj.group(1) + if numstr.startswith(u'x'): + base = 16 + numstr = u'0%s' % numstr + else: + base = 10 + return unichr(long(numstr, base)) + + # Unknown entity in name, return its literal representation + return (u'&%s;' % entity) + def report_lang(self): """Report attempt to set language.""" self.to_stdout(u'[youtube] Setting language') @@ -458,7 +495,7 @@ class YoutubeIE(InfoExtractor): def report_video_url(self, video_id, video_real_url): """Report extracted video URL.""" self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) - + def _real_initialize(self): if self._downloader is None: return @@ -585,7 +622,7 @@ class YoutubeIE(InfoExtractor): self.to_stderr(u'ERROR: unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') - video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) + video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') # simplified title @@ -728,6 +765,7 @@ class YoutubeSearchIE(InfoExtractor): _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' _MORE_PAGES_INDICATOR = r'>Next' _youtube_ie = None + _max_youtube_results = 1000 def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -755,13 +793,16 @@ class YoutubeSearchIE(InfoExtractor): if prefix == '': return self._download_n_results(query, 1) elif prefix == 'all': - return self._download_n_results(query, -1) + return self._download_n_results(query, self._max_youtube_results) else: try: n = int(prefix) if n <= 0: self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return [None] + elif n > self._max_youtube_results: + self.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + n = self._max_youtube_results return self._download_n_results(query, n) except ValueError: # parsing prefix as int fails return self._download_n_results(query, 1)