Allow empty titles because they do appear in some videos (fixes issue #53)

[youtube-dl.git] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index 472a8e1ccc677318be39d328c945be37b04a8435..f57343ea80f17cfa7523f4be91a5fc5f57c02278 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -33,15 +33,15 @@ def preferredencoding():
         Returns the best encoding scheme for the system, based on
         locale.getpreferredencoding() and some further tweaks.
         """
         Returns the best encoding scheme for the system, based on
         locale.getpreferredencoding() and some further tweaks.
         """
-       try:
-               pref = locale.getpreferredencoding()
-               # Mac OSX systems have this problem sometimes
-               if pref == '':
-                       return 'UTF-8'
-               return pref
-       except:
-               sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n')
-               return 'UTF-8'
+       def yield_preferredencoding():
+               try:
+                       pref = locale.getpreferredencoding()
+                       u'TEST'.encode(pref)
+               except:
+                       pref = 'UTF-8'
+               while True:
+                       yield pref
+       return yield_preferredencoding().next()
  
  class DownloadError(Exception):
         """Download Error exception.
  
  class DownloadError(Exception):
         """Download Error exception.
@@ -221,11 +221,13 @@ class FileDownloader(object):
  
         @staticmethod
         def verify_url(url):
  
         @staticmethod
         def verify_url(url):
-               """Verify a URL is valid and data could be downloaded."""
+               """Verify a URL is valid and data could be downloaded. Return real data URL."""
                 request = urllib2.Request(url, None, std_headers)
                 data = urllib2.urlopen(request)
                 data.read(1)
                 request = urllib2.Request(url, None, std_headers)
                 data = urllib2.urlopen(request)
                 data.read(1)
+               url = data.geturl()
                 data.close()
                 data.close()
+               return url
  
         def add_info_extractor(self, ie):
                 """Add an InfoExtractor object to the end of the list."""
  
         def add_info_extractor(self, ie):
                 """Add an InfoExtractor object to the end of the list."""
@@ -307,7 +309,7 @@ class FileDownloader(object):
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
                         try:
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
                         try:
-                               self.verify_url(info_dict['url'])
+                               info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
                                 raise UnavailableFormatError
  
                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
                                 raise UnavailableFormatError
  
@@ -325,7 +327,7 @@ class FileDownloader(object):
                         filename = self.params['outtmpl'] % template_dict
                 except (ValueError, KeyError), err:
                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
                         filename = self.params['outtmpl'] % template_dict
                 except (ValueError, KeyError), err:
                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-               if self.params['nooverwrites'] and os.path.exists(filename):
+               if self.params.get('nooverwrites', False) and os.path.exists(filename):
                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                         return
  
                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                         return
  
@@ -336,7 +338,7 @@ class FileDownloader(object):
                         return
  
                 try:
                         return
  
                 try:
-                       success = self._do_download(filename, info_dict['url'])
+                       success = self._do_download(filename, info_dict['url'].encode('utf-8'))
                 except (OSError, IOError), err:
                         raise UnavailableFormatError
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                 except (OSError, IOError), err:
                         raise UnavailableFormatError
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@@ -400,7 +402,7 @@ class FileDownloader(object):
                         resume_len = os.path.getsize(filename)
                 else:
                         resume_len = 0
                         resume_len = os.path.getsize(filename)
                 else:
                         resume_len = 0
-               if self.params['continuedl'] and resume_len != 0:
+               if self.params.get('continuedl', False) and resume_len != 0:
                         self.report_resuming_byte(resume_len)
                         request.add_header('Range','bytes=%d-' % resume_len)
  
                         self.report_resuming_byte(resume_len)
                         request.add_header('Range','bytes=%d-' % resume_len)
  
@@ -583,10 +585,6 @@ class YoutubeIE(InfoExtractor):
                 """Report attempt to extract video information."""
                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
         
                 """Report attempt to extract video information."""
                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
         
-       def report_video_url(self, video_id, video_real_url):
-               """Report extracted video URL."""
-               self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
-       
         def report_unavailable_format(self, video_id, format):
                 """Report extracted video URL."""
                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
         def report_unavailable_format(self, video_id, format):
                 """Report extracted video URL."""
                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
@@ -712,7 +710,6 @@ class YoutubeIE(InfoExtractor):
                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
                         if format_param is not None:
                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
                         if format_param is not None:
                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-                       self.report_video_url(video_id, video_real_url)
  
                         # uploader
                         mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
  
                         # uploader
                         mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
@@ -722,7 +719,7 @@ class YoutubeIE(InfoExtractor):
                         video_uploader = urllib.unquote(mobj.group(1))
  
                         # title
                         video_uploader = urllib.unquote(mobj.group(1))
  
                         # title
-                       mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage)
+                       mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage)
                         if mobj is None:
                                 self._downloader.trouble(u'ERROR: unable to extract video title')
                                 return
                         if mobj is None:
                                 self._downloader.trouble(u'ERROR: unable to extract video title')
                                 return