Update and correct (format,extension) table for YouTube

[youtube-dl.git] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index 0ec7adb1da10cebc467941a19d6ae1bddea6a240..9fe58a3547e37858ecc3c02482b7bf2c4654be79 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -190,7 +190,7 @@ class FileDownloader(object):
         def to_stdout(self, message, skip_eol=False):
                 """Print message to stdout if not in quiet mode."""
                 if not self.params.get('quiet', False):
         def to_stdout(self, message, skip_eol=False):
                 """Print message to stdout if not in quiet mode."""
                 if not self.params.get('quiet', False):
-                       print u'%s%s' % (message, [u'\n', u''][skip_eol]),
+                       print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()),
                         sys.stdout.flush()
         
         def to_stderr(self, message):
                         sys.stdout.flush()
         
         def to_stderr(self, message):
@@ -244,9 +244,9 @@ class FileDownloader(object):
                 """Process a single dictionary returned by an InfoExtractor."""
                 # Forced printings
                 if self.params.get('forcetitle', False):
                 """Process a single dictionary returned by an InfoExtractor."""
                 # Forced printings
                 if self.params.get('forcetitle', False):
-                       print info_dict['title']
+                       print info_dict['title'].encode(locale.getpreferredencoding())
                 if self.params.get('forceurl', False):
                 if self.params.get('forceurl', False):
-                       print info_dict['url']
+                       print info_dict['url'].encode(locale.getpreferredencoding())
                         
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
                         
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
@@ -561,7 +561,8 @@ class YoutubeIE(InfoExtractor):
  
                 # Extension
                 video_extension = {
  
                 # Extension
                 video_extension = {
-                       '17': '3gp',
+                       '13': '3gp',
+                       '17': 'mp4',
                         '18': 'mp4',
                         '22': 'mp4',
                 }.get(format_param, 'flv')
                         '18': 'mp4',
                         '22': 'mp4',
                 }.get(format_param, 'flv')
@@ -624,6 +625,7 @@ class MetacafeIE(InfoExtractor):
  
         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
  
         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
+       _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
         _youtube_ie = None
  
         def __init__(self, youtube_ie, downloader=None):
         _youtube_ie = None
  
         def __init__(self, youtube_ie, downloader=None):
@@ -665,7 +667,7 @@ class MetacafeIE(InfoExtractor):
                         'filters': '0',
                         'submit': "Continue - I'm over 18",
                         }
                         'filters': '0',
                         'submit': "Continue - I'm over 18",
                         }
-               request = urllib2.Request('http://www.metacafe.com/', urllib.urlencode(disclaimer_form), std_headers)
+               request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
                 try:
                         self.report_age_confirmation()
                         disclaimer = urllib2.urlopen(request).read()
                 try:
                         self.report_age_confirmation()
                         disclaimer = urllib2.urlopen(request).read()
@@ -702,13 +704,13 @@ class MetacafeIE(InfoExtractor):
  
                 # Extract URL, uploader and title from webpage
                 self.report_extraction(video_id)
  
                 # Extract URL, uploader and title from webpage
                 self.report_extraction(video_id)
-               mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage)
+               mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract media URL')
                         return
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract media URL')
                         return
-               mediaURL = mobj.group(1).replace('\\', '')
+               mediaURL = urllib.unquote(mobj.group(1))
  
  
-               mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage)
+               mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract gdaKey')
                         return
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract gdaKey')
                         return
@@ -722,11 +724,11 @@ class MetacafeIE(InfoExtractor):
                         return
                 video_title = mobj.group(1).decode('utf-8')
  
                         return
                 video_title = mobj.group(1).decode('utf-8')
  
-               mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage)
+               mobj = re.search(r'(?ms)<li id="ChnlUsr">.*?Submitter:.*?<a .*?>(.*?)<', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                         return
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                         return
-               video_uploader = re.sub(r'<.*?>', '', mobj.group(1))
+               video_uploader = mobj.group(1)
  
                 # Process video information
                 self._downloader.process_info({
  
                 # Process video information
                 self._downloader.process_info({
@@ -918,7 +920,7 @@ class PostProcessor(object):
                 """Run the PostProcessor.
  
                 The "information" argument is a dictionary like the ones
                 """Run the PostProcessor.
  
                 The "information" argument is a dictionary like the ones
-               returned by InfoExtractors. The only difference is that this
+               composed by InfoExtractors. The only difference is that this
                 one has an extra field called "filepath" that points to the
                 downloaded file.
  
                 one has an extra field called "filepath" that points to the
                 downloaded file.
  
@@ -996,7 +998,9 @@ if __name__ == '__main__':
                 batchurls = []
                 if opts.batchfile is not None:
                         try:
                 batchurls = []
                 if opts.batchfile is not None:
                         try:
-                               batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
+                               batchurls = open(opts.batchfile, 'r').readlines()
+                               batchurls = [x.strip() for x in batchurls]
+                               batchurls = [x for x in batchurls if len(x) > 0]
                         except IOError:
                                 sys.exit(u'ERROR: batch file could not be read')
                 all_urls = batchurls + args
                         except IOError:
                                 sys.exit(u'ERROR: batch file could not be read')
                 all_urls = batchurls + args
@@ -1027,9 +1031,6 @@ if __name__ == '__main__':
                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
  
                 # File downloader
                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
  
                 # File downloader
-               charset = locale.getpreferredencoding()
-               if charset is None:
-                       charset = 'ascii'
                 fd = FileDownloader({
                         'usenetrc': opts.usenetrc,
                         'username': opts.username,
                 fd = FileDownloader({
                         'usenetrc': opts.usenetrc,
                         'username': opts.username,
@@ -1039,7 +1040,7 @@ if __name__ == '__main__':
                         'forcetitle': opts.gettitle,
                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                         'format': opts.format,
                         'forcetitle': opts.gettitle,
                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                         'format': opts.format,
-                       'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(charset))
+                       'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                                 or u'%(id)s.%(ext)s'),
                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                                 or u'%(id)s.%(ext)s'),