]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/InfoExtractors.py
default info_dict['format'] to info_dict['ext'] and make the YT one more verbose
[youtube-dl.git] / youtube_dl / InfoExtractors.py
index 64383fea41ba0a14e132f5200e7675b048528bb4..44b2472c2677334b935f9e0723d53d43cd915a5e 100644 (file)
@@ -29,33 +29,34 @@ class InfoExtractor(object):
        """Information Extractor class.
 
        Information extractors are the classes that, given a URL, extract
-       information from the video (or videos) the URL refers to. This
-       information includes the real video URL, the video title and simplified
-       title, author and others. The information is stored in a dictionary
-       which is then passed to the FileDownloader. The FileDownloader
-       processes this information possibly downloading the video to the file
-       system, among other possible outcomes. The dictionaries must include
-       the following fields:
-
-       id:             Video identifier.
-       url:            Final video URL.
-       uploader:       Nickname of the video uploader.
-       title:          Literal title.
-       ext:            Video filename extension.
-       format:         Video format.
-       player_url:     SWF Player URL (may be None).
-
-       The following fields are optional. Their primary purpose is to allow
-       youtube-dl to serve as the backend for a video search function, such
-       as the one in youtube2mp3.  They are only used when their respective
-       forced printing functions are called:
-
-       thumbnail:      Full URL to a video thumbnail image.
-       description:    One-line video description.
+       information about the video (or videos) the URL refers to. This
+       information includes the real video URL, the video title, author and
+       others. The information is stored in a dictionary which is then 
+       passed to the FileDownloader. The FileDownloader processes this
+       information possibly downloading the video to the file system, among
+       other possible outcomes.
+
+       The dictionaries must include the following fields:
+
+       id:         Video identifier.
+       url:        Final video URL.
+       uploader:   Nickname of the video uploader.
+       title:      Video title, unescaped.
+       ext:        Video filename extension.
+       player_url: SWF Player URL (may be None).
+
+       The following fields are optional:
+
+       format:         The video format, defaults to ext. Used by --get-format
+       thumbnail:      Full URL to a video thumbnail image.
+       description     One-line video description.
 
        Subclasses of this one should re-define the _real_initialize() and
        _real_extract() methods and define a _VALID_URL regexp.
        Probably, they should also be added to the list of extractors.
+
+       _real_extract() must return a *list* of information dictionaries as
+       described above.
        """
 
        _ready = False
@@ -475,6 +476,9 @@ class YoutubeIE(InfoExtractor):
                        # Extension
                        video_extension = self._video_extensions.get(format_param, 'flv')
 
+                       video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'),
+                                                           self._video_dimensions.get(format_param, '???'))
+
                        results.append({
                                'id':           video_id.decode('utf-8'),
                                'url':          video_real_url.decode('utf-8'),
@@ -482,7 +486,7 @@ class YoutubeIE(InfoExtractor):
                                'upload_date':  upload_date,
                                'title':        video_title,
                                'ext':          video_extension.decode('utf-8'),
-                               'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
+                               'format':       video_format,
                                'thumbnail':    video_thumbnail.decode('utf-8'),
                                'description':  video_description,
                                'player_url':   player_url,
@@ -616,7 +620,6 @@ class MetacafeIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -715,7 +718,6 @@ class DailymotionIE(InfoExtractor):
                        'upload_date':  video_upload_date,
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -810,7 +812,6 @@ class GoogleIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -877,7 +878,6 @@ class PhotobucketIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -1282,7 +1282,6 @@ class GenericIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -1888,7 +1887,6 @@ class DepositFilesIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        file_title,
                        'ext':          file_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -2243,7 +2241,6 @@ class MyVideoIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          u'flv',
-                       'format':       u'NA',
                        'player_url':   None,
                }]
 
@@ -2291,7 +2288,6 @@ class ComedyCentralIE(InfoExtractor):
                        print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
 
 
-
        def _real_extract(self, url):
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
@@ -2332,10 +2328,19 @@ class ComedyCentralIE(InfoExtractor):
                        epTitle = mobj.group('episode')
 
                mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+
                if len(mMovieParams) == 0:
-                       self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
-                       return
+                       # The Colbert Report embeds the information in a without
+                       # a URL prefix; so extract the alternate reference
+                       # and then add the URL prefix manually.
 
+                       altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+                       if len(altMovieParams) == 0:
+                               self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+                               return
+                       else:
+                               mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+               
                playerUrl_raw = mMovieParams[0][0]
                self.report_player_url(epTitle)
                try:
@@ -2386,8 +2391,8 @@ class ComedyCentralIE(InfoExtractor):
                                continue
                        
                        if self._downloader.params.get('listformats', None):
-                           self._print_formats([i[0] for i in turls])
-                           return
+                               self._print_formats([i[0] for i in turls])
+                               return
 
                        # For now, just pick the highest bitrate
                        format,video_url = turls[-1]
@@ -2397,20 +2402,17 @@ class ComedyCentralIE(InfoExtractor):
 
                        # Select format if we can find one
                        for f,v in turls:
-                           if f == req_format:
-                             format, video_url = f, v
-                             break
-
-                       # Patch to download from alternative CDN, which does not 
-                        # break on current RTMPDump builds
-            
+                               if f == req_format:
+                                       format, video_url = f, v
+                                       break
 
+                       # Patch to download from alternative CDN, which does not
+                       # break on current RTMPDump builds
                        broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
                        better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
-            
+
                        if video_url.startswith(broken_cdn):
-                            video_url = video_url.replace(broken_cdn, better_cdn)
-                    
+                               video_url = video_url.replace(broken_cdn, better_cdn)
 
                        effTitle = showId + u'-' + epTitle
                        info = {
@@ -2496,7 +2498,6 @@ class EscapistIE(InfoExtractor):
                        'upload_date': None,
                        'title': showName,
                        'ext': 'flv',
-                       'format': 'flv',
                        'thumbnail': imgUrl,
                        'description': description,
                        'player_url': playerUrl,
@@ -2561,7 +2562,6 @@ class CollegeHumorIE(InfoExtractor):
                        info['url'] = videoNode.findall('./file')[0].text
                        info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
                        info['ext'] = info['url'].rpartition('.')[2]
-                       info['format'] = info['ext']
                except IndexError:
                        self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
                        return
@@ -2632,7 +2632,6 @@ class XVideosIE(InfoExtractor):
                        'upload_date': None,
                        'title': video_title,
                        'ext': 'flv',
-                       'format': 'flv',
                        'thumbnail': video_thumbnail,
                        'description': None,
                        'player_url': None,
@@ -2729,7 +2728,6 @@ class SoundcloudIE(InfoExtractor):
                        'upload_date':  upload_date,
                        'title':        title,
                        'ext':          u'mp3',
-                       'format':       u'NA',
                        'player_url':   None,
                        'description': description.decode('utf-8')
                }]
@@ -2797,8 +2795,7 @@ class InfoQIE(InfoExtractor):
                        'uploader': None,
                        'upload_date': None,
                        'title': video_title,
-                       'ext': extension,
-                       'format': extension, # Extension is always(?) mp4, but seems to be flv
+                       'ext': extension, # Extension is always(?) mp4, but seems to be flv
                        'thumbnail': None,
                        'description': video_description,
                        'player_url': None,
@@ -2962,7 +2959,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
                                self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
                                return
                        info['ext'] = info['url'].rpartition('.')[2]
-                       info['format'] = info['ext']
                        return [info]
                elif mobj.group('course'): # A course page
                        course = mobj.group('course')
@@ -3236,7 +3232,6 @@ class YoukuIE(InfoExtractor):
                                'uploader': None,
                                'title': video_title,
                                'ext': ext,
-                               'format': u'NA'
                        }
                        files_info.append(info)
 
@@ -3300,7 +3295,6 @@ class XNXXIE(InfoExtractor):
                                'upload_date': None,
                                'title': video_title,
                                'ext': 'flv',
-                               'format': 'flv',
                                'thumbnail': video_thumbnail,
                                'description': None,
                                'player_url': None}
@@ -3427,6 +3421,5 @@ class GooglePlusIE(InfoExtractor):
                        'upload_date':  upload_date.decode('utf-8'),
                        'title':        video_title.decode('utf-8'),
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
                        'player_url':   None,
                }]