Fix recognition of http://www.youtube.com/course?list=PL41FDABC6AA085E78&category...

[youtube-dl.git] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index 02c31e4dba9443aeffbd944fc444a280869f8f1d..8560059f1c3553c95adbedf7643561880e77ce05 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -15,7 +15,7 @@ __author__  = (
         )
  
  __license__ = 'Public Domain'
-__version__ = '2011.09.18c'
+__version__ = '2011.09.30'
  
  UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  
@@ -766,7 +766,8 @@ class FileDownloader(object):
                         try:
                                 infof = open(infofn, 'wb')
                                 try:
-                                       json.dump(info_dict, infof)
+                                       json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
+                                       json.dump(json_info_dict, infof)
                                 finally:
                                         infof.close()
                         except (OSError, IOError):
@@ -905,6 +906,8 @@ class FileDownloader(object):
                 while count <= retries:
                         # Establish connection
                         try:
+                               if count == 0 and 'urlhandle' in info_dict:
+                                       data = info_dict['urlhandle']
                                 data = urllib2.urlopen(request)
                                 break
                         except (urllib2.HTTPError, ), err:
@@ -1100,6 +1103,21 @@ class YoutubeIE(InfoExtractor):
                 '44': 'webm',
                 '45': 'webm',
         }
+       _video_dimensions = {
+               '5': '240x400',
+               '6': '???',
+               '13': '???',
+               '17': '144x176',
+               '18': '360x640',
+               '22': '720x1280',
+               '34': '360x640',
+               '35': '480x854',
+               '37': '1080x1920',
+               '38': '3072x4096',
+               '43': '360x640',
+               '44': '480x854',
+               '45': '720x1280',
+       }       
         IE_NAME = u'youtube'
  
         def report_lang(self):
@@ -1134,6 +1152,11 @@ class YoutubeIE(InfoExtractor):
                 """Indicate the download will use the RTMP protocol."""
                 self._downloader.to_screen(u'[youtube] RTMP download detected')
  
+       def _print_formats(self, formats):
+               print 'Available formats:'
+               for x in formats:
+                       print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
+
         def _real_initialize(self):
                 if self._downloader is None:
                         return
@@ -1327,6 +1350,9 @@ class YoutubeIE(InfoExtractor):
                         if len(existing_formats) == 0:
                                 self._downloader.trouble(u'ERROR: no known formats available for video')
                                 return
+                       if self._downloader.params.get('listformats', None):
+                               self._print_formats(existing_formats)
+                               return
                         if req_format is None or req_format == 'best':
                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
                         elif req_format == 'worst':
@@ -2432,7 +2458,7 @@ class YahooSearchIE(InfoExtractor):
  class YoutubePlaylistIE(InfoExtractor):
         """Information Extractor for YouTube playlists."""
  
-       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
+       _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@@ -2506,7 +2532,7 @@ class YoutubePlaylistIE(InfoExtractor):
  class YoutubeUserIE(InfoExtractor):
         """Information Extractor for YouTube users."""
  
-       _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
+       _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
         _GDATA_PAGE_SIZE = 50
         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
@@ -2594,7 +2620,7 @@ class YoutubeUserIE(InfoExtractor):
  class DepositFilesIE(InfoExtractor):
         """Information extractor for depositfiles.com"""
  
-       _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
         IE_NAME = u'DepositFiles'
  
         def __init__(self, downloader=None):
@@ -2671,7 +2697,7 @@ class DepositFilesIE(InfoExtractor):
  class FacebookIE(InfoExtractor):
         """Information Extractor for Facebook"""
  
-       _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+       _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/video/video\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
         _NETRC_MACHINE = 'facebook'
         _available_formats = ['highqual', 'lowqual']
@@ -2895,7 +2921,11 @@ class BlipTVIE(InfoExtractor):
  
         def report_extraction(self, file_id):
                 """Report information extraction."""
-               self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
+               self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
+
+       def report_direct_download(self, title):
+               """Report information extraction."""
+               self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
  
         def _simplify_title(self, title):
                 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
@@ -2915,43 +2945,64 @@ class BlipTVIE(InfoExtractor):
                 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
                 request = urllib2.Request(json_url)
                 self.report_extraction(mobj.group(1))
+               info = None
                 try:
-                       json_code = urllib2.urlopen(request).read()
+                       urlh = urllib2.urlopen(request)
+                       if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
+                               basename = url.split('/')[-1]
+                               title,ext = os.path.splitext(basename)
+                               ext = ext.replace('.', '')
+                               self.report_direct_download(title)
+                               info = {
+                                       'id': title,
+                                       'url': url,
+                                       'title': title,
+                                       'stitle': self._simplify_title(title),
+                                       'ext': ext,
+                                       'urlhandle': urlh
+                               }
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
                         return
-               try:
-                       json_data = json.loads(json_code)
-                       if 'Post' in json_data:
-                               data = json_data['Post']
-                       else:
-                               data = json_data
-
-                       upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
-                       video_url = data['media']['url']
-                       umobj = re.match(self._URL_EXT, video_url)
-                       if umobj is None:
-                               raise ValueError('Can not determine filename extension')
-                       ext = umobj.group(1)
+               if info is None: # Regular URL
+                       try:
+                               json_code = urlh.read()
+                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                               self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
+                               return
  
-                       self._downloader.increment_downloads()
+                       try:
+                               json_data = json.loads(json_code)
+                               if 'Post' in json_data:
+                                       data = json_data['Post']
+                               else:
+                                       data = json_data
+       
+                               upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
+                               video_url = data['media']['url']
+                               umobj = re.match(self._URL_EXT, video_url)
+                               if umobj is None:
+                                       raise ValueError('Can not determine filename extension')
+                               ext = umobj.group(1)
+       
+                               info = {
+                                       'id': data['item_id'],
+                                       'url': video_url,
+                                       'uploader': data['display_name'],
+                                       'upload_date': upload_date,
+                                       'title': data['title'],
+                                       'stitle': self._simplify_title(data['title']),
+                                       'ext': ext,
+                                       'format': data['media']['mimeType'],
+                                       'thumbnail': data['thumbnailUrl'],
+                                       'description': data['description'],
+                                       'player_url': data['embedUrl']
+                               }
+                       except (ValueError,KeyError), err:
+                               self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
+                               return
  
-                       info = {
-                               'id': data['item_id'],
-                               'url': video_url,
-                               'uploader': data['display_name'],
-                               'upload_date': upload_date,
-                               'title': data['title'],
-                               'stitle': self._simplify_title(data['title']),
-                               'ext': ext,
-                               'format': data['media']['mimeType'],
-                               'thumbnail': data['thumbnailUrl'],
-                               'description': data['description'],
-                               'player_url': data['embedUrl']
-                       }
-               except (ValueError,KeyError), err:
-                       self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
-                       return
+               self._downloader.increment_downloads()
  
                 try:
                         self._downloader.process_info(info)
@@ -3017,7 +3068,6 @@ class MyVideoIE(InfoExtractor):
                 video_title = sanitize_title(video_title)
  
                 try:
-                       print(video_url)
                         self._downloader.process_info({
                                 'id':           video_id,
                                 'url':          video_url,
@@ -3176,7 +3226,7 @@ class ComedyCentralIE(InfoExtractor):
  class EscapistIE(InfoExtractor):
         """Information extractor for The Escapist """
  
-       _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
+       _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
         IE_NAME = u'escapist'
  
         def report_extraction(self, showName):
@@ -3351,12 +3401,14 @@ class FFmpegExtractAudioPP(PostProcessor):
  
                 more_opts = []
                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
-                       if filecodec == 'aac' or filecodec == 'mp3':
+                       if filecodec in ['aac', 'mp3', 'vorbis']:
                                 # Lossless if possible
                                 acodec = 'copy'
                                 extension = filecodec
                                 if filecodec == 'aac':
                                         more_opts = ['-f', 'adts']
+                               if filecodec == 'vorbis':
+                                       extension = 'ogg'
                         else:
                                 # MP3 otherwise.
                                 acodec = 'libmp3lame'
@@ -3366,13 +3418,15 @@ class FFmpegExtractAudioPP(PostProcessor):
                                         more_opts += ['-ab', self._preferredquality]
                 else:
                         # We convert the audio (lossy)
-                       acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
+                       acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec]
                         extension = self._preferredcodec
                         more_opts = []
                         if self._preferredquality is not None:
                                 more_opts += ['-ab', self._preferredquality]
                         if self._preferredcodec == 'aac':
                                 more_opts += ['-f', 'adts']
+                       if self._preferredcodec == 'vorbis':
+                               extension = 'ogg'
  
                 (prefix, ext) = os.path.splitext(path)
                 new_path = prefix + '.' + extension
@@ -3533,6 +3587,8 @@ def parseOpts():
                         action='store_const', dest='format', help='download all available video formats', const='all')
         video_format.add_option('--max-quality',
                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
+       video_format.add_option('-F', '--list-formats',
+                       action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
  
  
         verbosity.add_option('-q', '--quiet',
@@ -3572,7 +3628,7 @@ def parseOpts():
                         action='store_true', dest='autonumber',
                         help='number downloaded files starting from 00000', default=False)
         filesystem.add_option('-o', '--output',
-                       dest='outtmpl', metavar='TEMPLATE', help='output filename template')
+                       dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent')
         filesystem.add_option('-a', '--batch-file',
                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
         filesystem.add_option('-w', '--no-overwrites',
@@ -3600,7 +3656,7 @@ def parseOpts():
         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-                       help='"best", "aac" or "mp3"; best by default')
+                       help='"best", "aac", "vorbis" or "mp3"; best by default')
         postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
                         help='ffmpeg audio bitrate specification, 128k by default')
         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
@@ -3734,7 +3790,7 @@ def main():
         except (TypeError, ValueError), err:
                 parser.error(u'invalid playlist end number specified')
         if opts.extractaudio:
-               if opts.audioformat not in ['best', 'aac', 'mp3']:
+               if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']:
                         parser.error(u'invalid audio format specified')
  
         # File downloader
@@ -3753,6 +3809,7 @@ def main():
                 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
                 'format': opts.format,
                 'format_limit': opts.format_limit,
+               'listformats': opts.listformats,
                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')