X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=3213abacfc95f9e9dced77c2b8d9d5eac8e9c335;hb=da8fb85859964d9a1d21a0328eb9044e19499d9c;hp=11b31db88422229b37c85a96ed1df3746867bf5a;hpb=57c7411f4615cda595f06fa461719b4336d808f4;p=youtube-dl.git

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 11b31db88..3213abacf 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,11 +1,12 @@
 import base64
 import hashlib
 import json
+import netrc
 import os
 import re
 import socket
 import sys
-import netrc
+import time
 import xml.etree.ElementTree
 
 from ..utils import (
@@ -68,6 +69,7 @@ class InfoExtractor(object):
                     * vcodec     Name of the video codec in use
                     * container  Name of the container format
                     * filesize   The number of bytes, if known in advance
+                    * filesize_approx  An estimate for the number of bytes
                     * player_url SWF Player URL (used for rtmpdump).
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
@@ -92,8 +94,12 @@ class InfoExtractor(object):
                     unique, but available before title. Typically, id is
                     something like "4234987", title "Dancing naked mole rats",
                     and display_id "dancing-naked-mole-rats"
-    thumbnails:     A list of dictionaries (with the entries "resolution" and
-                    "url") for the varying thumbnails
+    thumbnails:     A list of dictionaries, with the following entries:
+                        * "url"
+                        * "width" (optional, int)
+                        * "height" (optional, int)
+                        * "resolution" (optional, string "{width}x{height"},
+                                        deprecated)
     thumbnail:      Full URL to a video thumbnail image.
     description:    One-line video description.
     uploader:       Full name of the video uploader.
@@ -113,6 +119,8 @@ class InfoExtractor(object):
     webpage_url:    The url to the video webpage, if given to youtube-dl it
                     should allow to get the same result again. (It will be set
                     by YoutubeDL if it's missing)
+    categories:     A list of categories that the video falls in, for example
+                    ["Sports", "Berlin"]
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 
@@ -242,7 +250,7 @@ class InfoExtractor(object):
                 url = url_or_request.get_full_url()
             except AttributeError:
                 url = url_or_request
-            basen = video_id + '_' + url
+            basen = '%s_%s' % (video_id, url)
             if len(basen) > 240:
                 h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                 basen = basen[:240 - len(h)] + h
@@ -453,14 +461,17 @@ class InfoExtractor(object):
         if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
 
-    def _html_search_meta(self, name, html, display_name=None, fatal=False):
+    def _og_search_url(self, html, **kargs):
+        return self._og_search_property('url', html, **kargs)
+
+    def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
         if display_name is None:
             display_name = name
         return self._html_search_regex(
             r'''(?ix)<meta
                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
-            html, display_name, fatal=fatal)
+            html, display_name, fatal=fatal, **kwargs)
 
     def _dc_search_uploader(self, html):
         return self._html_search_meta('dc.creator', html, 'uploader')
@@ -545,6 +556,7 @@ class InfoExtractor(object):
                 f.get('abr') if f.get('abr') is not None else -1,
                 audio_ext_preference,
                 f.get('filesize') if f.get('filesize') is not None else -1,
+                f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                 f.get('format_id'),
             )
         formats.sort(key=_formats_key)
@@ -566,6 +578,13 @@ class InfoExtractor(object):
         else:
             return url
 
+    def _sleep(self, timeout, video_id, msg_template=None):
+        if msg_template is None:
+            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+        msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+        self.to_screen(msg)
+        time.sleep(timeout)
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
@@ -609,4 +628,3 @@ class SearchInfoExtractor(InfoExtractor):
     @property
     def SEARCH_KEY(self):
         return self._SEARCH_KEY
-