X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=0fc39163ee2a74131ed84442c68f388a72fad0f1;hb=f36cd076850faf4b2859a168fcb740dfccb9eed6;hp=c9c563599ea1782b313fbb69dcd60a998fbe4583;hpb=d77c3dfd027e9af4d44fc7109fac0012451268c2;p=youtube-dl.git

diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index c9c563599..0fc39163e 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -12,29 +12,15 @@ import time
 import urllib
 import urllib2
 import email.utils
+import xml.etree.ElementTree
+from urlparse import parse_qs
 
 try:
 	import cStringIO as StringIO
 except ImportError:
 	import StringIO
 
-# parse_qs was moved from the cgi module to the urlparse module recently.
-try:
-	from urlparse import parse_qs
-except ImportError:
-	from cgi import parse_qs
-
-try:
-	import lxml.etree
-except ImportError:
-	pass # Handled below
-
-try:
-	import xml.etree.ElementTree
-except ImportError: # Python<2.5: Not officially supported, but let it slip
-	warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
-
-from Utils import *
+from utils import *
 
 
 class InfoExtractor(object):
@@ -53,7 +39,6 @@ class InfoExtractor(object):
 	url:		Final video URL.
 	uploader:	Nickname of the video uploader.
 	title:		Literal title.
-	stitle:		Simplified title.
 	ext:		Video filename extension.
 	format:		Video format.
 	player_url:	SWF Player URL (may be None).
@@ -117,8 +102,8 @@ class YoutubeIE(InfoExtractor):
 	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 	_NETRC_MACHINE = 'youtube'
 	# Listed in order of quality
-	_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
-	_available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
+	_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
+	_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
 	_video_extensions = {
 		'13': '3gp',
 		'17': 'mp4',
@@ -129,6 +114,7 @@ class YoutubeIE(InfoExtractor):
 		'43': 'webm',
 		'44': 'webm',
 		'45': 'webm',
+		'46': 'webm',
 	}
 	_video_dimensions = {
 		'5': '240x400',
@@ -144,6 +130,7 @@ class YoutubeIE(InfoExtractor):
 		'43': '360x640',
 		'44': '480x854',
 		'45': '720x1280',
+		'46': '1080x1920',
 	}	
 	IE_NAME = u'youtube'
 
@@ -193,8 +180,8 @@ class YoutubeIE(InfoExtractor):
 			end = start + float(dur)
 			start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
 			end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
-			caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption)
-			caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional
+			caption = unescapeHTML(caption)
+			caption = unescapeHTML(caption) # double cycle, inentional
 			srt += str(n) + '\n'
 			srt += start + ' --> ' + end + '\n'
 			srt += caption + '\n\n'
@@ -339,10 +326,6 @@ class YoutubeIE(InfoExtractor):
 			return
 		video_title = urllib.unquote_plus(video_info['title'][0])
 		video_title = video_title.decode('utf-8')
-		video_title = sanitize_title(video_title)
-
-		# simplified title
-		simple_title = simplify_title(video_title)
 
 		# thumbnail image
 		if 'thumbnail_url' not in video_info:
@@ -364,49 +347,39 @@ class YoutubeIE(InfoExtractor):
 					pass
 
 		# description
-		try:
-			lxml.etree
-		except NameError:
-			video_description = u'No description available.'
-			mobj = re.search(r'<meta name="description" content="(.*?)">', video_webpage)
-			if mobj is not None:
-				video_description = mobj.group(1).decode('utf-8')
-		else:
-			html_parser = lxml.etree.HTMLParser(encoding='utf-8')
-			vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
-			video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
-			# TODO use another parser
+		video_description = get_element_by_id("eow-description", video_webpage.decode('utf8'))
+		if video_description: video_description = clean_html(video_description)
+		else: video_description = ''
 			
 		# closed captions
 		video_subtitles = None
 		if self._downloader.params.get('writesubtitles', False):
-			self.report_video_subtitles_download(video_id)
-			request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
 			try:
-				srt_list = urllib2.urlopen(request).read()
-			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
-			else:
+				self.report_video_subtitles_download(video_id)
+				request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+				try:
+					srt_list = urllib2.urlopen(request).read()
+				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+					raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
 				srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list)
-				if srt_lang_list:
-					if self._downloader.params.get('subtitleslang', False):
-						srt_lang = self._downloader.params.get('subtitleslang')
-					elif 'en' in srt_lang_list:
-						srt_lang = 'en'
-					else:
-						srt_lang = srt_lang_list[0]
-					if not srt_lang in srt_lang_list:
-						self._downloader.trouble(u'WARNING: no closed captions found in the specified language')
-					else:
-						request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
-						try:
-							srt_xml = urllib2.urlopen(request).read()
-						except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-							self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
-						else:
-							video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+				if not srt_lang_list:
+					raise Trouble(u'WARNING: video has no closed captions')
+				if self._downloader.params.get('subtitleslang', False):
+					srt_lang = self._downloader.params.get('subtitleslang')
+				elif 'en' in srt_lang_list:
+					srt_lang = 'en'
 				else:
-					self._downloader.trouble(u'WARNING: video has no closed captions')
+					srt_lang = srt_lang_list[0]
+				if not srt_lang in srt_lang_list:
+					raise Trouble(u'WARNING: no closed captions found in the specified language')
+				request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
+				try:
+					srt_xml = urllib2.urlopen(request).read()
+				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+					raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+				video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+			except Trouble as trouble:
+				self._downloader.trouble(trouble[0])
 
 		# token
 		video_token = urllib.unquote_plus(video_info['token'][0])
@@ -458,31 +431,25 @@ class YoutubeIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
 			return
 
+		results = []
 		for format_param, video_real_url in video_url_list:
-			# At this point we have a new video
-			self._downloader.increment_downloads()
-
 			# Extension
 			video_extension = self._video_extensions.get(format_param, 'flv')
 
-			try:
-				# Process video information
-				self._downloader.process_info({
-					'id':		video_id.decode('utf-8'),
-					'url':		video_real_url.decode('utf-8'),
-					'uploader':	video_uploader.decode('utf-8'),
-					'upload_date':	upload_date,
-					'title':	video_title,
-					'stitle':	simple_title,
-					'ext':		video_extension.decode('utf-8'),
-					'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
-					'thumbnail':	video_thumbnail.decode('utf-8'),
-					'description':	video_description,
-					'player_url':	player_url,
-					'subtitles':	video_subtitles
-				})
-			except UnavailableVideoError, err:
-				self._downloader.trouble(u'\nERROR: unable to download video')
+			results.append({
+				'id':		video_id.decode('utf-8'),
+				'url':		video_real_url.decode('utf-8'),
+				'uploader':	video_uploader.decode('utf-8'),
+				'upload_date':	upload_date,
+				'title':	video_title,
+				'ext':		video_extension.decode('utf-8'),
+				'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
+				'thumbnail':	video_thumbnail.decode('utf-8'),
+				'description':	video_description,
+				'player_url':	player_url,
+				'subtitles':	video_subtitles
+			})
+		return results
 
 
 class MetacafeIE(InfoExtractor):
@@ -491,12 +458,10 @@ class MetacafeIE(InfoExtractor):
 	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
-	_youtube_ie = None
 	IE_NAME = u'metacafe'
 
-	def __init__(self, youtube_ie, downloader=None):
+	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)
-		self._youtube_ie = youtube_ie
 
 	def report_disclaimer(self):
 		"""Report disclaimer retrieval."""
@@ -549,14 +514,9 @@ class MetacafeIE(InfoExtractor):
 		# Check if video comes from YouTube
 		mobj2 = re.match(r'^yt-(.*)$', video_id)
 		if mobj2 is not None:
-			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
+			self._downloader.download(['http://www.youtube.com/watch?v=%s' % mobj2.group(1)])
 			return
 
-		# At this point we have a new video
-		self._downloader.increment_downloads()
-
-		simple_title = mobj.group(2).decode('utf-8')
-
 		# Retrieve video webpage to extract further information
 		request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 		try:
@@ -602,7 +562,6 @@ class MetacafeIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to extract title')
 			return
 		video_title = mobj.group(1).decode('utf-8')
-		video_title = sanitize_title(video_title)
 
 		mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 		if mobj is None:
@@ -610,21 +569,16 @@ class MetacafeIE(InfoExtractor):
 			return
 		video_uploader = mobj.group(1)
 
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		video_url.decode('utf-8'),
-				'uploader':	video_uploader.decode('utf-8'),
-				'upload_date':	u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		video_url.decode('utf-8'),
+			'uploader':	video_uploader.decode('utf-8'),
+			'upload_date':	u'NA',
+			'title':	video_title,
+			'ext':		video_extension.decode('utf-8'),
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 
 class DailymotionIE(InfoExtractor):
@@ -651,8 +605,6 @@ class DailymotionIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 			return
 
-		# At this point we have a new video
-		self._downloader.increment_downloads()
 		video_id = mobj.group(1)
 
 		video_extension = 'flv'
@@ -689,8 +641,6 @@ class DailymotionIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to extract title')
 			return
 		video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
-		video_title = sanitize_title(video_title)
-		simple_title = simplify_title(video_title)
 
 		mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
 		if mobj is None:
@@ -698,21 +648,16 @@ class DailymotionIE(InfoExtractor):
 			return
 		video_uploader = mobj.group(1)
 
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		video_url.decode('utf-8'),
-				'uploader':	video_uploader.decode('utf-8'),
-				'upload_date':	u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		video_url.decode('utf-8'),
+			'uploader':	video_uploader.decode('utf-8'),
+			'upload_date':	u'NA',
+			'title':	video_title,
+			'ext':		video_extension.decode('utf-8'),
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 
 class GoogleIE(InfoExtractor):
@@ -739,8 +684,6 @@ class GoogleIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return
 
-		# At this point we have a new video
-		self._downloader.increment_downloads()
 		video_id = mobj.group(1)
 
 		video_extension = 'mp4'
@@ -774,8 +717,6 @@ class GoogleIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to extract title')
 			return
 		video_title = mobj.group(1).decode('utf-8')
-		video_title = sanitize_title(video_title)
-		simple_title = simplify_title(video_title)
 
 		# Extract video description
 		mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
@@ -802,21 +743,16 @@ class GoogleIE(InfoExtractor):
 		else:	# we need something to pass to process_info
 			video_thumbnail = ''
 
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		video_url.decode('utf-8'),
-				'uploader':	u'NA',
-				'upload_date':	u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		video_url.decode('utf-8'),
+			'uploader':	u'NA',
+			'upload_date':	u'NA',
+			'title':	video_title,
+			'ext':		video_extension.decode('utf-8'),
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 
 class PhotobucketIE(InfoExtractor):
@@ -843,8 +779,6 @@ class PhotobucketIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return
 
-		# At this point we have a new video
-		self._downloader.increment_downloads()
 		video_id = mobj.group(1)
 
 		video_extension = 'flv'
@@ -873,26 +807,19 @@ class PhotobucketIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to extract title')
 			return
 		video_title = mobj.group(1).decode('utf-8')
-		video_title = sanitize_title(video_title)
-		simple_title = simplify_title(video_title)
 
 		video_uploader = mobj.group(2).decode('utf-8')
 
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		video_url.decode('utf-8'),
-				'uploader':	video_uploader,
-				'upload_date':	u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		video_url.decode('utf-8'),
+			'uploader':	video_uploader,
+			'upload_date':	u'NA',
+			'title':	video_title,
+			'ext':		video_extension.decode('utf-8'),
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 
 class YahooIE(InfoExtractor):
@@ -922,8 +849,6 @@ class YahooIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return
 
-		# At this point we have a new video
-		self._downloader.increment_downloads()
 		video_id = mobj.group(2)
 		video_extension = 'flv'
 
@@ -968,7 +893,6 @@ class YahooIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to extract video title')
 			return
 		video_title = mobj.group(1).decode('utf-8')
-		simple_title = simplify_title(video_title)
 
 		mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
 		if mobj is None:
@@ -1026,25 +950,20 @@ class YahooIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Unable to extract media URL')
 			return
 		video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
-		video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
-
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		video_url,
-				'uploader':	video_uploader,
-				'upload_date':	u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
-				'thumbnail':	video_thumbnail.decode('utf-8'),
-				'description':	video_description,
-				'thumbnail':	video_thumbnail,
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		video_url = unescapeHTML(video_url)
+
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		video_url,
+			'uploader':	video_uploader,
+			'upload_date':	u'NA',
+			'title':	video_title,
+			'ext':		video_extension.decode('utf-8'),
+			'thumbnail':	video_thumbnail.decode('utf-8'),
+			'description':	video_description,
+			'thumbnail':	video_thumbnail,
+			'player_url':	None,
+		}]
 
 
 class VimeoIE(InfoExtractor):
@@ -1072,8 +991,6 @@ class VimeoIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
 			return
 
-		# At this point we have a new video
-		self._downloader.increment_downloads()
 		video_id = mobj.group(1)
 
 		# Retrieve video webpage to extract further information
@@ -1100,7 +1017,6 @@ class VimeoIE(InfoExtractor):
 		
 		# Extract title
 		video_title = config["video"]["title"]
-		simple_title = simplify_title(video_title)
 
 		# Extract uploader
 		video_uploader = config["video"]["owner"]["name"]
@@ -1109,18 +1025,9 @@ class VimeoIE(InfoExtractor):
 		video_thumbnail = config["video"]["thumbnail"]
 
 		# Extract video description
-		try:
-			lxml.etree
-		except NameError:
-			video_description = u'No description available.'
-			mobj = re.search(r'<meta name="description" content="(.*?)" />', webpage, re.MULTILINE)
-			if mobj is not None:
-				video_description = mobj.group(1)
-		else:
-			html_parser = lxml.etree.HTMLParser()
-			vwebpage_doc = lxml.etree.parse(StringIO.StringIO(webpage), html_parser)
-			video_description = u''.join(vwebpage_doc.xpath('id("description")//text()')).strip()
-			# TODO use another parser
+		video_description = get_element_by_id("description", webpage.decode('utf8'))
+		if video_description: video_description = clean_html(video_description)
+		else: video_description = ''
 
 		# Extract upload date
 		video_upload_date = u'NA'
@@ -1149,22 +1056,17 @@ class VimeoIE(InfoExtractor):
 		video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 					%(video_id, sig, timestamp, quality, video_codec.upper())
 
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id,
-				'url':		video_url,
-				'uploader':	video_uploader,
-				'upload_date':	video_upload_date,
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension,
-				'thumbnail':	video_thumbnail,
-				'description':	video_description,
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'ERROR: unable to download video')
+		return [{
+			'id':		video_id,
+			'url':		video_url,
+			'uploader':	video_uploader,
+			'upload_date':	video_upload_date,
+			'title':	video_title,
+			'ext':		video_extension,
+			'thumbnail':	video_thumbnail,
+			'description':	video_description,
+			'player_url':	None,
+		}]
 
 
 class GenericIE(InfoExtractor):
@@ -1202,16 +1104,16 @@ class GenericIE(InfoExtractor):
 			"""
 			def redirect_request(self, req, fp, code, msg, headers, newurl): 
 				if code in (301, 302, 303, 307):
-				    newurl = newurl.replace(' ', '%20') 
-				    newheaders = dict((k,v) for k,v in req.headers.items()
-				                      if k.lower() not in ("content-length", "content-type"))
-				    return HeadRequest(newurl, 
-				                       headers=newheaders,
-				                       origin_req_host=req.get_origin_req_host(), 
-				                       unverifiable=True) 
+					newurl = newurl.replace(' ', '%20') 
+					newheaders = dict((k,v) for k,v in req.headers.items()
+									  if k.lower() not in ("content-length", "content-type"))
+					return HeadRequest(newurl, 
+									   headers=newheaders,
+									   origin_req_host=req.get_origin_req_host(), 
+									   unverifiable=True) 
 				else: 
-				    raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) 
-				    
+					raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) 
+
 		class HTTPMethodFallback(urllib2.BaseHandler):
 			"""
 			Fallback to GET if HEAD is not allowed (405 HTTP error)
@@ -1221,17 +1123,17 @@ class GenericIE(InfoExtractor):
 				fp.close()
 
 				newheaders = dict((k,v) for k,v in req.headers.items()
-				                  if k.lower() not in ("content-length", "content-type"))
+								  if k.lower() not in ("content-length", "content-type"))
 				return self.parent.open(urllib2.Request(req.get_full_url(), 
-				                                 headers=newheaders, 
-				                                 origin_req_host=req.get_origin_req_host(), 
-				                                 unverifiable=True))
+												 headers=newheaders, 
+												 origin_req_host=req.get_origin_req_host(), 
+												 unverifiable=True))
 
 		# Build our opener
 		opener = urllib2.OpenerDirector() 
 		for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
-				        HTTPMethodFallback, HEADRedirectHandler,
-				        urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+						HTTPMethodFallback, HEADRedirectHandler,
+						urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
 			opener.add_handler(handler())
 
 		response = opener.open(HeadRequest(url))
@@ -1245,9 +1147,6 @@ class GenericIE(InfoExtractor):
 
 	def _real_extract(self, url):
 		if self._test_redirect(url): return
-		
-		# At this point we have a new video
-		self._downloader.increment_downloads()
 
 		video_id = url.split('/')[-1]
 		request = urllib2.Request(url)
@@ -1297,8 +1196,6 @@ class GenericIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to extract title')
 			return
 		video_title = mobj.group(1).decode('utf-8')
-		video_title = sanitize_title(video_title)
-		simple_title = simplify_title(video_title)
 
 		# video uploader is domain name
 		mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
@@ -1307,43 +1204,33 @@ class GenericIE(InfoExtractor):
 			return
 		video_uploader = mobj.group(1).decode('utf-8')
 
-		try:
-			# Process video information
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		video_url.decode('utf-8'),
-				'uploader':	video_uploader,
-				'upload_date':	u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		video_url.decode('utf-8'),
+			'uploader':	video_uploader,
+			'upload_date':	u'NA',
+			'title':	video_title,
+			'ext':		video_extension.decode('utf-8'),
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 
 class YoutubeSearchIE(InfoExtractor):
 	"""Information Extractor for YouTube search queries."""
 	_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
 	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
-	_youtube_ie = None
 	_max_youtube_results = 1000
 	IE_NAME = u'youtube:search'
 
-	def __init__(self, youtube_ie, downloader=None):
+	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)
-		self._youtube_ie = youtube_ie
 
 	def report_download_page(self, query, pagenum):
 		"""Report attempt to download playlist page with given number."""
 		query = query.decode(preferredencoding())
 		self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 
-	def _real_initialize(self):
-		self._youtube_ie.initialize()
-
 	def _real_extract(self, query):
 		mobj = re.match(self._VALID_URL, query)
 		if mobj is None:
@@ -1401,7 +1288,7 @@ class YoutubeSearchIE(InfoExtractor):
 		if len(video_ids) > n:
 			video_ids = video_ids[:n]
 		for id in video_ids:
-			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+			self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
 		return
 
 
@@ -1411,22 +1298,17 @@ class GoogleSearchIE(InfoExtractor):
 	_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
 	_VIDEO_INDICATOR = r'<a href="http://video\.google\.com/videoplay\?docid=([^"\&]+)'
 	_MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
-	_google_ie = None
 	_max_google_results = 1000
 	IE_NAME = u'video.google:search'
 
-	def __init__(self, google_ie, downloader=None):
+	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)
-		self._google_ie = google_ie
 
 	def report_download_page(self, query, pagenum):
 		"""Report attempt to download playlist page with given number."""
 		query = query.decode(preferredencoding())
 		self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
 
-	def _real_initialize(self):
-		self._google_ie.initialize()
-
 	def _real_extract(self, query):
 		mobj = re.match(self._VALID_URL, query)
 		if mobj is None:
@@ -1481,12 +1363,12 @@ class GoogleSearchIE(InfoExtractor):
 					if len(video_ids) == n:
 						# Specified n videos reached
 						for id in video_ids:
-							self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
+							self._downloader.download(['http://video.google.com/videoplay?docid=%s' % id])
 						return
 
 			if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 				for id in video_ids:
-					self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
+					self._downloader.download(['http://video.google.com/videoplay?docid=%s' % id])
 				return
 
 			pagenum = pagenum + 1
@@ -1498,22 +1380,17 @@ class YahooSearchIE(InfoExtractor):
 	_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
 	_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
 	_MORE_PAGES_INDICATOR = r'\s*Next'
-	_yahoo_ie = None
 	_max_yahoo_results = 1000
 	IE_NAME = u'video.yahoo:search'
 
-	def __init__(self, yahoo_ie, downloader=None):
+	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)
-		self._yahoo_ie = yahoo_ie
 
 	def report_download_page(self, query, pagenum):
 		"""Report attempt to download playlist page with given number."""
 		query = query.decode(preferredencoding())
 		self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
 
-	def _real_initialize(self):
-		self._yahoo_ie.initialize()
-
 	def _real_extract(self, query):
 		mobj = re.match(self._VALID_URL, query)
 		if mobj is None:
@@ -1570,12 +1447,12 @@ class YahooSearchIE(InfoExtractor):
 					if len(video_ids) == n:
 						# Specified n videos reached
 						for id in video_ids:
-							self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
+							self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
 						return
 
 			if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 				for id in video_ids:
-					self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
+					self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
 				return
 
 			pagenum = pagenum + 1
@@ -1588,20 +1465,15 @@ class YoutubePlaylistIE(InfoExtractor):
 	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
 	_VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;list=PL%s&'
 	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
-	_youtube_ie = None
 	IE_NAME = u'youtube:playlist'
 
-	def __init__(self, youtube_ie, downloader=None):
+	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)
-		self._youtube_ie = youtube_ie
 
 	def report_download_page(self, playlist_id, pagenum):
 		"""Report attempt to download playlist page with given number."""
 		self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
 
-	def _real_initialize(self):
-		self._youtube_ie.initialize()
-
 	def _real_extract(self, url):
 		# Extract playlist id
 		mobj = re.match(self._VALID_URL, url)
@@ -1611,7 +1483,7 @@ class YoutubePlaylistIE(InfoExtractor):
 
 		# Single video case
 		if mobj.group(3) is not None:
-			self._youtube_ie.extract(mobj.group(3))
+			self._downloader.download([mobj.group(3)])
 			return
 
 		# Download playlist pages
@@ -1655,7 +1527,7 @@ class YoutubePlaylistIE(InfoExtractor):
 			video_ids = video_ids[playliststart:playlistend]
 
 		for id in video_ids:
-			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+			self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
 		return
 
 
@@ -1667,21 +1539,16 @@ class YoutubeUserIE(InfoExtractor):
 	_GDATA_PAGE_SIZE = 50
 	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
 	_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
-	_youtube_ie = None
 	IE_NAME = u'youtube:user'
 
-	def __init__(self, youtube_ie, downloader=None):
+	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)
-		self._youtube_ie = youtube_ie
 
 	def report_download_page(self, username, start_index):
 		"""Report attempt to download user page."""
 		self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
 				(username, start_index, start_index + self._GDATA_PAGE_SIZE))
 
-	def _real_initialize(self):
-		self._youtube_ie.initialize()
-
 	def _real_extract(self, url):
 		# Extract username
 		mobj = re.match(self._VALID_URL, url)
@@ -1744,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):
 				(username, all_ids_count, len(video_ids)))
 
 		for video_id in video_ids:
-			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
+			self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id])
 
 
 class DepositFilesIE(InfoExtractor):
@@ -1765,9 +1632,6 @@ class DepositFilesIE(InfoExtractor):
 		self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
 
 	def _real_extract(self, url):
-		# At this point we have a new file
-		self._downloader.increment_downloads()
-
 		file_id = url.split('/')[-1]
 		# Rebuild url in english locale
 		url = 'http://depositfiles.com/en/files/' + file_id
@@ -1804,21 +1668,16 @@ class DepositFilesIE(InfoExtractor):
 			return
 		file_title = mobj.group(1).decode('utf-8')
 
-		try:
-			# Process file information
-			self._downloader.process_info({
-				'id':		file_id.decode('utf-8'),
-				'url':		file_url.decode('utf-8'),
-				'uploader':	u'NA',
-				'upload_date':	u'NA',
-				'title':	file_title,
-				'stitle':	file_title,
-				'ext':		file_extension.decode('utf-8'),
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'ERROR: unable to download file')
+		return [{
+			'id':		file_id.decode('utf-8'),
+			'url':		file_url.decode('utf-8'),
+			'uploader':	u'NA',
+			'upload_date':	u'NA',
+			'title':	file_title,
+			'ext':		file_extension.decode('utf-8'),
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 
 class FacebookIE(InfoExtractor):
@@ -1959,9 +1818,6 @@ class FacebookIE(InfoExtractor):
 			return
 		video_title = video_info['title']
 		video_title = video_title.decode('utf-8')
-		video_title = sanitize_title(video_title)
-
-		simple_title = simplify_title(video_title)
 
 		# thumbnail image
 		if 'thumbnail' not in video_info:
@@ -2011,31 +1867,24 @@ class FacebookIE(InfoExtractor):
 					return
 				video_url_list = [(req_format, url_map[req_format])] # Specific format
 
+		results = []
 		for format_param, video_real_url in video_url_list:
-
-			# At this point we have a new video
-			self._downloader.increment_downloads()
-
 			# Extension
 			video_extension = self._video_extensions.get(format_param, 'mp4')
 
-			try:
-				# Process video information
-				self._downloader.process_info({
-					'id':		video_id.decode('utf-8'),
-					'url':		video_real_url.decode('utf-8'),
-					'uploader':	video_uploader.decode('utf-8'),
-					'upload_date':	upload_date,
-					'title':	video_title,
-					'stitle':	simple_title,
-					'ext':		video_extension.decode('utf-8'),
-					'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
-					'thumbnail':	video_thumbnail.decode('utf-8'),
-					'description':	video_description.decode('utf-8'),
-					'player_url':	None,
-				})
-			except UnavailableVideoError, err:
-				self._downloader.trouble(u'\nERROR: unable to download video')
+			results.append({
+				'id':		video_id.decode('utf-8'),
+				'url':		video_real_url.decode('utf-8'),
+				'uploader':	video_uploader.decode('utf-8'),
+				'upload_date':	upload_date,
+				'title':	video_title,
+				'ext':		video_extension.decode('utf-8'),
+				'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
+				'thumbnail':	video_thumbnail.decode('utf-8'),
+				'description':	video_description.decode('utf-8'),
+				'player_url':	None,
+			})
+		return results
 
 class BlipTVIE(InfoExtractor):
 	"""Information extractor for blip.tv"""
@@ -2078,7 +1927,6 @@ class BlipTVIE(InfoExtractor):
 					'id': title,
 					'url': url,
 					'title': title,
-					'stitle': simplify_title(title),
 					'ext': ext,
 					'urlhandle': urlh
 				}
@@ -2098,21 +1946,20 @@ class BlipTVIE(InfoExtractor):
 					data = json_data['Post']
 				else:
 					data = json_data
-	
+
 				upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
 				video_url = data['media']['url']
 				umobj = re.match(self._URL_EXT, video_url)
 				if umobj is None:
 					raise ValueError('Can not determine filename extension')
 				ext = umobj.group(1)
-	
+
 				info = {
 					'id': data['item_id'],
 					'url': video_url,
 					'uploader': data['display_name'],
 					'upload_date': upload_date,
 					'title': data['title'],
-					'stitle': simplify_title(data['title']),
 					'ext': ext,
 					'format': data['media']['mimeType'],
 					'thumbnail': data['thumbnailUrl'],
@@ -2123,12 +1970,7 @@ class BlipTVIE(InfoExtractor):
 				self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
 				return
 
-		self._downloader.increment_downloads()
-
-		try:
-			self._downloader.process_info(info)
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [info]
 
 
 class MyVideoIE(InfoExtractor):
@@ -2179,24 +2021,17 @@ class MyVideoIE(InfoExtractor):
 			return
 
 		video_title = mobj.group(1)
-		video_title = sanitize_title(video_title)
 
-		simple_title = simplify_title(video_title)
-
-		try:
-			self._downloader.process_info({
-				'id':		video_id,
-				'url':		video_url,
-				'uploader':	u'NA',
-				'upload_date':  u'NA',
-				'title':	video_title,
-				'stitle':	simple_title,
-				'ext':		u'flv',
-				'format':	u'NA',
-				'player_url':	None,
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: Unable to download video')
+		return [{
+			'id':		video_id,
+			'url':		video_url,
+			'uploader':	u'NA',
+			'upload_date':  u'NA',
+			'title':	video_title,
+			'ext':		u'flv',
+			'format':	u'NA',
+			'player_url':	None,
+		}]
 
 class ComedyCentralIE(InfoExtractor):
 	"""Information extractor for The Daily Show and Colbert Report """
@@ -2206,7 +2041,7 @@ class ComedyCentralIE(InfoExtractor):
 
 	def report_extraction(self, episode_id):
 		self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
-	
+
 	def report_config_download(self, episode_id):
 		self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
 
@@ -2278,6 +2113,8 @@ class ComedyCentralIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
 			return
 
+		results = []
+
 		idoc = xml.etree.ElementTree.fromstring(indexXml)
 		itemEls = idoc.findall('.//item')
 		for itemEl in itemEls:
@@ -2310,8 +2147,6 @@ class ComedyCentralIE(InfoExtractor):
 			# For now, just pick the highest bitrate
 			format,video_url = turls[-1]
 
-			self._downloader.increment_downloads()
-
 			effTitle = showId + u'-' + epTitle
 			info = {
 				'id': shortMediaId,
@@ -2319,7 +2154,6 @@ class ComedyCentralIE(InfoExtractor):
 				'uploader': showId,
 				'upload_date': officialDate,
 				'title': effTitle,
-				'stitle': simplify_title(effTitle),
 				'ext': 'mp4',
 				'format': format,
 				'thumbnail': None,
@@ -2327,11 +2161,9 @@ class ComedyCentralIE(InfoExtractor):
 				'player_url': playerUrl
 			}
 
-			try:
-				self._downloader.process_info(info)
-			except UnavailableVideoError, err:
-				self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
-				continue
+			results.append(info)
+			
+		return results
 
 
 class EscapistIE(InfoExtractor):
@@ -2347,8 +2179,6 @@ class EscapistIE(InfoExtractor):
 		self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
 
 	def _real_extract(self, url):
-		htmlParser = HTMLParser.HTMLParser()
-
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@@ -2358,17 +2188,18 @@ class EscapistIE(InfoExtractor):
 
 		self.report_extraction(showName)
 		try:
-			webPage = urllib2.urlopen(url).read()
+			webPageBytes = urllib2.urlopen(url).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
 			return
 
+		webPage = webPageBytes.decode('utf-8')
 		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
-		description = htmlParser.unescape(descMatch.group(1))
+		description = unescapeHTML(descMatch.group(1))
 		imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
-		imgUrl = htmlParser.unescape(imgMatch.group(1))
+		imgUrl = unescapeHTML(imgMatch.group(1))
 		playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
-		playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
+		playerUrl = unescapeHTML(playerUrlMatch.group(1))
 		configUrlMatch = re.search('config=(.*)$', playerUrl)
 		configUrl = urllib2.unquote(configUrlMatch.group(1))
 
@@ -2391,14 +2222,12 @@ class EscapistIE(InfoExtractor):
 		playlist = config['playlist']
 		videoUrl = playlist[1]['url']
 
-		self._downloader.increment_downloads()
 		info = {
 			'id': videoId,
 			'url': videoUrl,
 			'uploader': showName,
 			'upload_date': None,
 			'title': showName,
-			'stitle': simplify_title(showName),
 			'ext': 'flv',
 			'format': 'flv',
 			'thumbnail': imgUrl,
@@ -2406,10 +2235,7 @@ class EscapistIE(InfoExtractor):
 			'player_url': playerUrl,
 		}
 
-		try:
-			self._downloader.process_info(info)
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
+		return [info]
 
 
 class CollegeHumorIE(InfoExtractor):
@@ -2427,8 +2253,6 @@ class CollegeHumorIE(InfoExtractor):
 		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
 
 	def _real_extract(self, url):
-		htmlParser = HTMLParser.HTMLParser()
-
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@@ -2467,7 +2291,6 @@ class CollegeHumorIE(InfoExtractor):
 			videoNode = mdoc.findall('./video')[0]
 			info['description'] = videoNode.findall('./description')[0].text
 			info['title'] = videoNode.findall('./caption')[0].text
-			info['stitle'] = simplify_title(info['title'])
 			info['url'] = videoNode.findall('./file')[0].text
 			info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
 			info['ext'] = info['url'].rpartition('.')[2]
@@ -2476,12 +2299,7 @@ class CollegeHumorIE(InfoExtractor):
 			self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
 			return
 
-		self._downloader.increment_downloads()
-
-		try:
-			self._downloader.process_info(info)
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [info]
 
 
 class XVideosIE(InfoExtractor):
@@ -2499,8 +2317,6 @@ class XVideosIE(InfoExtractor):
 		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
 
 	def _real_extract(self, url):
-		htmlParser = HTMLParser.HTMLParser()
-
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@@ -2542,16 +2358,12 @@ class XVideosIE(InfoExtractor):
 			return
 		video_thumbnail = mobj.group(1).decode('utf-8')
 
-
-
-		self._downloader.increment_downloads()
 		info = {
 			'id': video_id,
 			'url': video_url,
 			'uploader': None,
 			'upload_date': None,
 			'title': video_title,
-			'stitle': simplify_title(video_title),
 			'ext': 'flv',
 			'format': 'flv',
 			'thumbnail': video_thumbnail,
@@ -2559,10 +2371,7 @@ class XVideosIE(InfoExtractor):
 			'player_url': None,
 		}
 
-		try:
-			self._downloader.process_info(info)
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
+		return [info]
 
 
 class SoundcloudIE(InfoExtractor):
@@ -2589,8 +2398,6 @@ class SoundcloudIE(InfoExtractor):
 		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
 
 	def _real_extract(self, url):
-		htmlParser = HTMLParser.HTMLParser()
-
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@@ -2600,7 +2407,7 @@ class SoundcloudIE(InfoExtractor):
 		uploader = mobj.group(1).decode('utf-8')
 		# extract simple title (uploader + slug of song title)
 		slug_title =  mobj.group(2).decode('utf-8')
-		simple_title = uploader + '-' + slug_title
+		simple_title = uploader + u'-' + slug_title
 
 		self.report_webpage('%s/%s' % (uploader, slug_title))
 
@@ -2622,7 +2429,9 @@ class SoundcloudIE(InfoExtractor):
 		# extract unsimplified title
 		mobj = re.search('"title":"(.*?)",', webpage)
 		if mobj:
-			title = mobj.group(1)
+			title = mobj.group(1).decode('utf-8')
+		else:
+			title = simple_title
 
 		# construct media url (with uid/token)
 		mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
@@ -2646,21 +2455,17 @@ class SoundcloudIE(InfoExtractor):
 		# for soundcloud, a request to a cross domain is required for cookies
 		request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
 
-		try:
-			self._downloader.process_info({
-				'id':		video_id.decode('utf-8'),
-				'url':		mediaURL,
-				'uploader':	uploader.decode('utf-8'),
-				'upload_date':  upload_date,
-				'title':	simple_title.decode('utf-8'),
-				'stitle':	simple_title.decode('utf-8'),
-				'ext':		u'mp3',
-				'format':	u'NA',
-				'player_url':	None,
-				'description': description.decode('utf-8')
-			})
-		except UnavailableVideoError:
-			self._downloader.trouble(u'\nERROR: unable to download video')
+		return [{
+			'id':		video_id.decode('utf-8'),
+			'url':		mediaURL,
+			'uploader':	uploader.decode('utf-8'),
+			'upload_date':  upload_date,
+			'title':	title,
+			'ext':		u'mp3',
+			'format':	u'NA',
+			'player_url':	None,
+			'description': description.decode('utf-8')
+		}]
 
 
 class InfoQIE(InfoExtractor):
@@ -2678,8 +2483,6 @@ class InfoQIE(InfoExtractor):
 		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
 
 	def _real_extract(self, url):
-		htmlParser = HTMLParser.HTMLParser()
-
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
@@ -2721,14 +2524,12 @@ class InfoQIE(InfoExtractor):
 		video_filename = video_url.split('/')[-1]
 		video_id, extension = video_filename.split('.')
 
-		self._downloader.increment_downloads()
 		info = {
 			'id': video_id,
 			'url': video_url,
 			'uploader': None,
 			'upload_date': None,
 			'title': video_title,
-			'stitle': simplify_title(video_title),
 			'ext': extension,
 			'format': extension, # Extension is always(?) mp4, but seems to be flv
 			'thumbnail': None,
@@ -2736,10 +2537,7 @@ class InfoQIE(InfoExtractor):
 			'player_url': None,
 		}
 
-		try:
-			self._downloader.process_info(info)
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
+		return [info]
 
 class MixcloudIE(InfoExtractor):
 	"""Information extractor for www.mixcloud.com"""
@@ -2768,7 +2566,6 @@ class MixcloudIE(InfoExtractor):
 			url_list = jsonData[fmt][bitrate]
 		except TypeError: # we have no bitrate info.
 			url_list = jsonData[fmt]
-				
 		return url_list
 
 	def check_urls(self, url_list):
@@ -2842,25 +2639,18 @@ class MixcloudIE(InfoExtractor):
 			file_url = self.check_urls(url_list)
 			format_param = req_format
 
-		# We have audio
-		self._downloader.increment_downloads()
-		try:
-			# Process file information
-			self._downloader.process_info({
-				'id': file_id.decode('utf-8'),
-				'url': file_url.decode('utf-8'),
-				'uploader':	uploader.decode('utf-8'),
-				'upload_date': u'NA',
-				'title': json_data['name'],
-				'stitle': simplify_title(json_data['name']),
-				'ext': file_url.split('.')[-1].decode('utf-8'),
-				'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
-				'thumbnail': json_data['thumbnail_url'],
-				'description': json_data['description'],
-				'player_url': player_url.decode('utf-8'),
-			})
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'ERROR: unable to download file')
+		return [{
+			'id': file_id.decode('utf-8'),
+			'url': file_url.decode('utf-8'),
+			'uploader':	uploader.decode('utf-8'),
+			'upload_date': u'NA',
+			'title': json_data['name'],
+			'ext': file_url.split('.')[-1].decode('utf-8'),
+			'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
+			'thumbnail': json_data['thumbnail_url'],
+			'description': json_data['description'],
+			'player_url': player_url.decode('utf-8'),
+		}]
 
 class StanfordOpenClassroomIE(InfoExtractor):
 	"""Information extractor for Stanford's Open ClassRoom"""
@@ -2886,9 +2676,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
 			course = mobj.group('course')
 			video = mobj.group('video')
 			info = {
-				'id': simplify_title(course + '_' + video),
+				'id': course + '_' + video,
 			}
-	
+
 			self.report_extraction(info['id'])
 			baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
 			xmlUrl = baseUrl + video + '.xml'
@@ -2904,20 +2694,13 @@ class StanfordOpenClassroomIE(InfoExtractor):
 			except IndexError:
 				self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
 				return
-			info['stitle'] = simplify_title(info['title'])
 			info['ext'] = info['url'].rpartition('.')[2]
 			info['format'] = info['ext']
-			self._downloader.increment_downloads()
-			try:
-				self._downloader.process_info(info)
-			except UnavailableVideoError, err:
-				self._downloader.trouble(u'\nERROR: unable to download video')
+			return [info]
 		elif mobj.group('course'): # A course page
-			unescapeHTML = HTMLParser.HTMLParser().unescape
-
 			course = mobj.group('course')
 			info = {
-				'id': simplify_title(course),
+				'id': course,
 				'type': 'playlist',
 			}
 
@@ -2933,7 +2716,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
 				info['title'] = unescapeHTML(m.group(1))
 			else:
 				info['title'] = info['id']
-			info['stitle'] = simplify_title(info['title'])
 
 			m = re.search('<description>([^<]+)</description>', coursepage)
 			if m:
@@ -2946,13 +2728,13 @@ class StanfordOpenClassroomIE(InfoExtractor):
 					'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
 				}
 					for vpage in links]
-
+			results = []
 			for entry in info['list']:
 				assert entry['type'] == 'reference'
-				self.extract(entry['url'])
+				results += self.extract(entry['url'])
+			return results
+			
 		else: # Root page
-			unescapeHTML = HTMLParser.HTMLParser().unescape
-
 			info = {
 				'id': 'Stanford OpenClassroom',
 				'type': 'playlist',
@@ -2967,7 +2749,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
 				return
 
 			info['title'] = info['id']
-			info['stitle'] = simplify_title(info['title'])
 
 			links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
 			info['list'] = [
@@ -2977,9 +2758,11 @@ class StanfordOpenClassroomIE(InfoExtractor):
 				}
 					for cpage in links]
 
+			results = []
 			for entry in info['list']:
 				assert entry['type'] == 'reference'
-				self.extract(entry['url'])
+				results += self.extract(entry['url'])
+			return results
 
 class MTVIE(InfoExtractor):
 	"""Information extractor for MTV.com"""
@@ -3059,18 +2842,13 @@ class MTVIE(InfoExtractor):
 			self._downloader.trouble('Invalid rendition field.')
 			return
 
-		self._downloader.increment_downloads()
 		info = {
 			'id': video_id,
 			'url': video_url,
 			'uploader': performer,
 			'title': video_title,
-			'stitle': simplify_title(video_title),
 			'ext': ext,
 			'format': format,
 		}
 
-		try:
-			self._downloader.process_info(info)
-		except UnavailableVideoError, err:
-			self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
+		return [info]