X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube-dl;h=cedbf5977667d2100fe27184a222f00908be8f4c;hb=3aaf887e9890ca48858751b175d998efc35acb02;hp=7c01f4fdb4d6e10ed15465609236035b4c861588;hpb=c6b311c5248ec53bfa9267f274ed690783cbc3f1;p=youtube-dl.git

diff --git a/youtube-dl b/youtube-dl
index 7c01f4fdb..cedbf5977 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -18,8 +18,8 @@ import time
 import urllib
 import urllib2
 
-std_headers = {	
-	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5',
+std_headers = {
+	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8',
 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 	'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
 	'Accept-Language': 'en-us,en;q=0.5',
@@ -95,11 +95,13 @@ class FileDownloader(object):
 	params = None
 	_ies = []
 	_pps = []
+	_download_retcode = None
 
 	def __init__(self, params):
 		"""Create a FileDownloader object with the given options."""
 		self._ies = []
 		self._pps = []
+		self._download_retcode = 0
 		self.params = params
 	
 	@staticmethod
@@ -203,15 +205,13 @@ class FileDownloader(object):
 
 		Depending on if the downloader has been configured to ignore
 		download errors or not, this method may throw an exception or
-		not when errors are found, after printing the message. If it
-		doesn't raise, it returns an error code suitable to be returned
-		later as a program exit code to indicate error.
+		not when errors are found, after printing the message.
 		"""
 		if message is not None:
 			self.to_stderr(message)
 		if not self.params.get('ignoreerrors', False):
 			raise DownloadError(message)
-		return 1
+		self._download_retcode = 1
 
 	def slow_down(self, start_time, byte_counter):
 		"""Sleep if the download speed is over the rate limit."""
@@ -239,77 +239,91 @@ class FileDownloader(object):
 		"""Report download finished."""
 		self.to_stdout(u'')
 
+	def process_info(self, info_dict):
+		"""Process a single dictionary returned by an InfoExtractor."""
+		# Forced printings
+		if self.params.get('forcetitle', False):
+			print info_dict['title']
+		if self.params.get('forceurl', False):
+			print info_dict['url']
+			
+		# Do nothing else if in simulate mode
+		if self.params.get('simulate', False):
+			return
+
+		try:
+			filename = self.params['outtmpl'] % info_dict
+			self.report_destination(filename)
+		except (ValueError, KeyError), err:
+			self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
+		if self.params['nooverwrites'] and os.path.exists(filename):
+			self.to_stderr('WARNING: file exists: %s; skipping' % filename)
+			return
+		try:
+			self.pmkdir(filename)
+		except (OSError, IOError), err:
+			self.trouble('ERROR: unable to create directories: %s' % str(err))
+			return
+		try:
+			outstream = open(filename, 'wb')
+		except (OSError, IOError), err:
+			self.trouble('ERROR: unable to open for writing: %s' % str(err))
+			return
+		try:
+			self._do_download(outstream, info_dict['url'])
+			outstream.close()
+		except (OSError, IOError), err:
+			self.trouble('ERROR: unable to write video data: %s' % str(err))
+			return
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self.trouble('ERROR: unable to download video data: %s' % str(err))
+			return
+		try:
+			self.post_process(filename, info_dict)
+		except (PostProcessingError), err:
+			self.trouble('ERROR: postprocessing: %s' % str(err))
+			return
+
+		return
+
 	def download(self, url_list):
 		"""Download a given list of URLs."""
-		retcode = 0
 		if len(url_list) > 1 and self.fixed_template():
 			raise SameFileError(self.params['outtmpl'])
 
 		for url in url_list:
 			suitable_found = False
 			for ie in self._ies:
+				# Go to next InfoExtractor if not suitable
 				if not ie.suitable(url):
 					continue
+
 				# Suitable InfoExtractor found
 				suitable_found = True
+
+				# Extract information from URL
 				all_results = ie.extract(url)
 				results = [x for x in all_results if x is not None]
+
+				# See if there were problems extracting any information
 				if len(results) != len(all_results):
-					retcode = self.trouble()
+					self.trouble()
 
+				# Two results could go to the same file
 				if len(results) > 1 and self.fixed_template():
 					raise SameFileError(self.params['outtmpl'])
 
+				# Process each result
 				for result in results:
-					# Forced printings
-					if self.params.get('forcetitle', False):
-						print result['title']
-					if self.params.get('forceurl', False):
-						print result['url']
-						
-					# Do nothing else if in simulate mode
-					if self.params.get('simulate', False):
-						continue
-
-					try:
-						filename = self.params['outtmpl'] % result
-						self.report_destination(filename)
-					except (ValueError, KeyError), err:
-						retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-						continue
-					if self.params['nooverwrites'] and os.path.exists(filename):
-						self.to_stderr('WARNING: file exists: %s; skipping' % filename)
-						continue
-					try:
-						self.pmkdir(filename)
-					except (OSError, IOError), err:
-						retcode = self.trouble('ERROR: unable to create directories: %s' % str(err))
-						continue
-					try:
-						outstream = open(filename, 'wb')
-					except (OSError, IOError), err:
-						retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err))
-						continue
-					try:
-						self._do_download(outstream, result['url'])
-						outstream.close()
-					except (OSError, IOError), err:
-						retcode = self.trouble('ERROR: unable to write video data: %s' % str(err))
-						continue
-					except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-						retcode = self.trouble('ERROR: unable to download video data: %s' % str(err))
-						continue
-					try:
-						self.post_process(filename, result)
-					except (PostProcessingError), err:
-						retcode = self.trouble('ERROR: postprocessing: %s' % str(err))
-						continue
+					self.process_info(result)
 
+				# Suitable InfoExtractor had been found; go to next URL
 				break
+
 			if not suitable_found:
-				retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
+				self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 
-		return retcode
+		return self._download_retcode
 
 	def post_process(self, filename, ie_info):
 		"""Run the postprocessing chain on the given file."""
@@ -405,15 +419,6 @@ class InfoExtractor(object):
 		"""Sets the downloader for this IE."""
 		self._downloader = downloader
 	
-	def to_stdout(self, message):
-		"""Print message to stdout if downloader is not in quiet mode."""
-		if self._downloader is None or not self._downloader.params.get('quiet', False):
-			print message
-	
-	def to_stderr(self, message):
-		"""Print message to stderr."""
-		print >>sys.stderr, message
-
 	def _real_initialize(self):
 		"""Real initialization process. Redefine in subclasses."""
 		pass
@@ -435,30 +440,53 @@ class YoutubeIE(InfoExtractor):
 	def suitable(url):
 		return (re.match(YoutubeIE._VALID_URL, url) is not None)
 
+	@staticmethod
+	def htmlentity_transform(matchobj):
+		"""Transforms an HTML entity to a Unicode character."""
+		entity = matchobj.group(1)
+
+		# Known non-numeric HTML entity
+		if entity in htmlentitydefs.name2codepoint:
+			return unichr(htmlentitydefs.name2codepoint[entity])
+
+		# Unicode character
+		mobj = re.match(ur'(?u)#(x?\d+)', entity)
+		if mobj is not None:
+			numstr = mobj.group(1)
+			if numstr.startswith(u'x'):
+				base = 16
+				numstr = u'0%s' % numstr
+			else:
+				base = 10
+			return unichr(long(numstr, base))
+
+		# Unknown entity in name, return its literal representation
+		return (u'&%s;' % entity)
+
 	def report_lang(self):
 		"""Report attempt to set language."""
-		self.to_stdout(u'[youtube] Setting language')
+		self._downloader.to_stdout(u'[youtube] Setting language')
 
 	def report_login(self):
 		"""Report attempt to log in."""
-		self.to_stdout(u'[youtube] Logging in')
+		self._downloader.to_stdout(u'[youtube] Logging in')
 	
 	def report_age_confirmation(self):
 		"""Report attempt to confirm age."""
-		self.to_stdout(u'[youtube] Confirming age')
+		self._downloader.to_stdout(u'[youtube] Confirming age')
 	
 	def report_webpage_download(self, video_id):
 		"""Report attempt to download webpage."""
-		self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
+		self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
 	
 	def report_information_extraction(self, video_id):
 		"""Report attempt to extract video information."""
-		self.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
+		self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 	
 	def report_video_url(self, video_id, video_real_url):
 		"""Report extracted video URL."""
-		self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
-
+		self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
+	
 	def _real_initialize(self):
 		if self._downloader is None:
 			return
@@ -480,7 +508,7 @@ class YoutubeIE(InfoExtractor):
 				else:
 					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 			except (IOError, netrc.NetrcParseError), err:
-				self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 				return
 
 		# Set language
@@ -489,7 +517,7 @@ class YoutubeIE(InfoExtractor):
 			self.report_lang()
 			urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+			self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 			return
 
 		# No authentication to be performed
@@ -509,10 +537,10 @@ class YoutubeIE(InfoExtractor):
 			self.report_login()
 			login_results = urllib2.urlopen(request).read()
 			if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
-				self.to_stderr(u'WARNING: unable to log in: bad username or password')
+				self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 				return
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 			return
 	
 		# Confirm age
@@ -525,14 +553,14 @@ class YoutubeIE(InfoExtractor):
 			self.report_age_confirmation()
 			age_results = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
+			self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
 			return
 
 	def _real_extract(self, url):
 		# Extract video id from URL
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid URL: %s' % url)
+			self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url)
 			return [None]
 		video_id = mobj.group(2)
 
@@ -558,16 +586,16 @@ class YoutubeIE(InfoExtractor):
 			self.report_webpage_download(video_id)
 			video_webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err))
+			self._downloader.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err))
 			return [None]
 		self.report_information_extraction(video_id)
 		
 		# "t" param
 		mobj = re.search(r', "t": "([^"]+)"', video_webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract "t" parameter')
+			self._downloader.to_stderr(u'ERROR: unable to extract "t" parameter')
 			return [None]
-		video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
+		video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
 		if format_param is not None:
 			video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 		self.report_video_url(video_id, video_real_url)
@@ -575,24 +603,24 @@ class YoutubeIE(InfoExtractor):
 		# uploader
 		mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract uploader nickname')
+			self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname')
 			return [None]
 		video_uploader = mobj.group(1)
 
 		# title
 		mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract video title')
+			self._downloader.to_stderr(u'ERROR: unable to extract video title')
 			return [None]
 		video_title = mobj.group(1).decode('utf-8')
-		video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title)
+		video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
 		video_title = video_title.replace(os.sep, u'%')
 
 		# simplified title
 		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 		simple_title = simple_title.strip(ur'_')
 
-		# Return information
+		# Process video information
 		return [{
 			'id':		video_id.decode('utf-8'),
 			'url':		video_real_url.decode('utf-8'),
@@ -619,19 +647,19 @@ class MetacafeIE(InfoExtractor):
 
 	def report_disclaimer(self):
 		"""Report disclaimer retrieval."""
-		self.to_stdout(u'[metacafe] Retrieving disclaimer')
+		self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 
 	def report_age_confirmation(self):
 		"""Report attempt to confirm age."""
-		self.to_stdout(u'[metacafe] Confirming age')
+		self._downloader.to_stdout(u'[metacafe] Confirming age')
 	
 	def report_download_webpage(self, video_id):
 		"""Report webpage download."""
-		self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
+		self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 	
 	def report_extraction(self, video_id):
 		"""Report information extraction."""
-		self.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
+		self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 
 	def _real_initialize(self):
 		# Retrieve disclaimer
@@ -640,7 +668,7 @@ class MetacafeIE(InfoExtractor):
 			self.report_disclaimer()
 			disclaimer = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+			self._downloader.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 			return
 
 		# Confirm age
@@ -653,14 +681,14 @@ class MetacafeIE(InfoExtractor):
 			self.report_age_confirmation()
 			disclaimer = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
+			self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
 			return
 	
 	def _real_extract(self, url):
 		# Extract id and simplified title from URL
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid URL: %s' % url)
+			self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url)
 			return [None]
 
 		video_id = mobj.group(1)
@@ -679,20 +707,20 @@ class MetacafeIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err))
+			self._downloader.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err))
 			return [None]
 
 		# Extract URL, uploader and title from webpage
 		self.report_extraction(video_id)
 		mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract media URL')
+			self._downloader.to_stderr(u'ERROR: unable to extract media URL')
 			return [None]
 		mediaURL = mobj.group(1).replace('\\', '')
 
 		mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract gdaKey')
+			self._downloader.to_stderr(u'ERROR: unable to extract gdaKey')
 			return [None]
 		gdaKey = mobj.group(1)
 
@@ -700,13 +728,13 @@ class MetacafeIE(InfoExtractor):
 
 		mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract title')
+			self._downloader.to_stderr(u'ERROR: unable to extract title')
 			return [None]
 		video_title = mobj.group(1).decode('utf-8')
 
 		mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage)
 		if mobj is None:
-			self.to_stderr(u'ERROR: unable to extract uploader nickname')
+			self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname')
 			return [None]
 		video_uploader = re.sub(r'<.*?>', '', mobj.group(1))
 
@@ -728,8 +756,9 @@ class YoutubeSearchIE(InfoExtractor):
 	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
 	_MORE_PAGES_INDICATOR = r'>Next</a>'
 	_youtube_ie = None
+	_max_youtube_results = 1000
 
-	def __init__(self, youtube_ie, downloader=None): 
+	def __init__(self, youtube_ie, downloader=None):
 		InfoExtractor.__init__(self, downloader)
 		self._youtube_ie = youtube_ie
 	
@@ -739,7 +768,7 @@ class YoutubeSearchIE(InfoExtractor):
 
 	def report_download_page(self, query, pagenum):
 		"""Report attempt to download playlist page with given number."""
-		self.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
+		self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 
 	def _real_initialize(self):
 		self._youtube_ie.initialize()
@@ -747,21 +776,24 @@ class YoutubeSearchIE(InfoExtractor):
 	def _real_extract(self, query):
 		mobj = re.match(self._VALID_QUERY, query)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid search query "%s"' % query)
+			self._downloader.to_stderr(u'ERROR: invalid search query "%s"' % query)
 			return [None]
 
 		prefix, query = query.split(':')
 		prefix = prefix[8:]
-		if prefix == '': 
+		if prefix == '':
 			return self._download_n_results(query, 1)
-		elif prefix == 'all': 
-			return self._download_n_results(query, -1)
-		else: 
+		elif prefix == 'all':
+			return self._download_n_results(query, self._max_youtube_results)
+		else:
 			try:
 				n = int(prefix)
 				if n <= 0:
-					self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query))
+					self._downloader.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return [None]
+				elif n > self._max_youtube_results:
+					self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
+					n = self._max_youtube_results
 				return self._download_n_results(query, n)
 			except ValueError: # parsing prefix as int fails
 				return self._download_n_results(query, 1)
@@ -780,7 +812,7 @@ class YoutubeSearchIE(InfoExtractor):
 			try:
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
+				self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
 				return [None]
 
 			# Extract video identifiers
@@ -823,7 +855,7 @@ class YoutubePlaylistIE(InfoExtractor):
 
 	def report_download_page(self, playlist_id, pagenum):
 		"""Report attempt to download playlist page with given number."""
-		self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
+		self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
 
 	def _real_initialize(self):
 		self._youtube_ie.initialize()
@@ -832,7 +864,7 @@ class YoutubePlaylistIE(InfoExtractor):
 		# Extract playlist id
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
-			self.to_stderr(u'ERROR: invalid url: %s' % url)
+			self._downloader.to_stderr(u'ERROR: invalid url: %s' % url)
 			return [None]
 
 		# Download playlist pages
@@ -846,7 +878,7 @@ class YoutubePlaylistIE(InfoExtractor):
 			try:
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
+				self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
 				return [None]
 
 			# Extract video identifiers
@@ -887,15 +919,6 @@ class PostProcessor(object):
 	def __init__(self, downloader=None):
 		self._downloader = downloader
 
-	def to_stdout(self, message):
-		"""Print message to stdout if downloader is not in quiet mode."""
-		if self._downloader is None or not self._downloader.params.get('quiet', False):
-			print message
-	
-	def to_stderr(self, message):
-		"""Print message to stderr."""
-		print >>sys.stderr, message
-
 	def set_downloader(self, downloader):
 		"""Sets the downloader for this PP."""
 		self._downloader = downloader
@@ -935,7 +958,7 @@ if __name__ == '__main__':
 		# Parse command line
 		parser = optparse.OptionParser(
 				usage='Usage: %prog [options] url...',
-				version='2009.03.28',
+				version='INTERNAL',
 				conflict_handler='resolve',
 				)
 		parser.add_option('-h', '--help',
@@ -1013,7 +1036,7 @@ if __name__ == '__main__':
 		youtube_search_ie = YoutubeSearchIE(youtube_ie)
 
 		# File downloader
-		charset = locale.getdefaultlocale()[1]
+		charset = locale.getpreferredencoding()
 		if charset is None:
 			charset = 'ascii'
 		fd = FileDownloader({