X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=69d5f687cbcfc913c1ee8ae3d8bc0a530b7202f0;hb=bcc069a937ca51b85e57fee61eff2f45f44816ac;hp=26dd9882f80ad1b2feea0f9ed6bf45718b4311af;hpb=212a5e28bae61f764e8e802e403a15cbe62f0dc6;p=youtube-dl.git

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 26dd9882f..69d5f687c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,11 +1,12 @@
 import base64
 import hashlib
 import json
+import netrc
 import os
 import re
 import socket
 import sys
-import netrc
+import time
 import xml.etree.ElementTree
 
 from ..utils import (
@@ -17,6 +18,7 @@ from ..utils import (
     clean_html,
     compiled_regex_type,
     ExtractorError,
+    int_or_none,
     RegexNotFoundError,
     sanitize_filename,
     unescapeHTML,
@@ -68,6 +70,7 @@ class InfoExtractor(object):
                     * vcodec     Name of the video codec in use
                     * container  Name of the container format
                     * filesize   The number of bytes, if known in advance
+                    * filesize_approx  An estimate for the number of bytes
                     * player_url SWF Player URL (used for rtmpdump).
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
@@ -81,6 +84,12 @@ class InfoExtractor(object):
                                  format, irrespective of the file format.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
+                    * http_referer  HTTP Referer header value to set.
+                    * http_method  HTTP method to use for the download.
+                    * http_headers  A dictionary of additional HTTP headers
+                                 to add to the request.
+                    * http_post_data  Additional data to send with a POST
+                                 request.
     url:            Final video URL.
     ext:            Video filename extension.
     format:         The video format, defaults to ext (used for --get-format)
@@ -92,8 +101,12 @@ class InfoExtractor(object):
                     unique, but available before title. Typically, id is
                     something like "4234987", title "Dancing naked mole rats",
                     and display_id "dancing-naked-mole-rats"
-    thumbnails:     A list of dictionaries (with the entries "resolution" and
-                    "url") for the varying thumbnails
+    thumbnails:     A list of dictionaries, with the following entries:
+                        * "url"
+                        * "width" (optional, int)
+                        * "height" (optional, int)
+                        * "resolution" (optional, string "{width}x{height"},
+                                        deprecated)
     thumbnail:      Full URL to a video thumbnail image.
     description:    One-line video description.
     uploader:       Full name of the video uploader.
@@ -295,8 +308,12 @@ class InfoExtractor(object):
     def _download_json(self, url_or_request, video_id,
                        note=u'Downloading JSON metadata',
                        errnote=u'Unable to download JSON metadata',
-                       transform_source=None):
-        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+                       transform_source=None,
+                       fatal=True):
+        json_string = self._download_webpage(
+            url_or_request, video_id, note, errnote, fatal=fatal)
+        if (not fatal) and json_string is False:
+            return None
         if transform_source:
             json_string = transform_source(json_string)
         try:
@@ -343,16 +360,6 @@ class InfoExtractor(object):
     @staticmethod
     def playlist_result(entries, playlist_id=None, playlist_title=None):
         """Returns a playlist"""
-        # Ensure we don't have any duplicates in the playlist
-        seen = set()
-        new_list = []
-        for url in entries:
-            theurl = tuple(url.items())
-            if theurl not in seen:
-             seen.add(theurl)
-             new_list.append(url)
-             entries = new_list
-
         video_info = {'_type': 'playlist',
                       'entries': entries}
         if playlist_id:
@@ -373,7 +380,8 @@ class InfoExtractor(object):
         else:
             for p in pattern:
                 mobj = re.search(p, string, flags)
-                if mobj: break
+                if mobj:
+                    break
 
         if os.name != 'nt' and sys.stderr.isatty():
             _name = u'\033[0;34m%s\033[0m' % name
@@ -432,6 +440,22 @@ class InfoExtractor(object):
         
         return (username, password)
 
+    def _get_tfa_info(self):
+        """
+        Get the two-factor authentication info
+        TODO - asking the user will be required for sms/phone verify
+        currently just uses the command line option
+        If there's no info available, return None
+        """
+        if self._downloader is None:
+            return None
+        downloader_params = self._downloader.params
+
+        if downloader_params.get('twofactor', None) is not None:
+            return downloader_params['twofactor']
+
+        return None
+
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
@@ -461,18 +485,22 @@ class InfoExtractor(object):
         return self._og_search_property('title', html, **kargs)
 
     def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
-        regexes = self._og_regexes('video')
-        if secure: regexes = self._og_regexes('video:secure_url') + regexes
+        regexes = self._og_regexes('video') + self._og_regexes('video:url')
+        if secure:
+            regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
 
-    def _html_search_meta(self, name, html, display_name=None, fatal=False):
+    def _og_search_url(self, html, **kargs):
+        return self._og_search_property('url', html, **kargs)
+
+    def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
         if display_name is None:
             display_name = name
         return self._html_search_regex(
             r'''(?ix)<meta
-                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
+                    (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
-            html, display_name, fatal=fatal)
+            html, display_name, fatal=fatal, **kwargs)
 
     def _dc_search_uploader(self, html):
         return self._html_search_meta('dc.creator', html, 'uploader')
@@ -557,6 +585,7 @@ class InfoExtractor(object):
                 f.get('abr') if f.get('abr') is not None else -1,
                 audio_ext_preference,
                 f.get('filesize') if f.get('filesize') is not None else -1,
+                f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                 f.get('format_id'),
             )
         formats.sort(key=_formats_key)
@@ -578,6 +607,35 @@ class InfoExtractor(object):
         else:
             return url
 
+    def _sleep(self, timeout, video_id, msg_template=None):
+        if msg_template is None:
+            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+        msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+        self.to_screen(msg)
+        time.sleep(timeout)
+
+    def _extract_f4m_formats(self, manifest_url, video_id):
+        manifest = self._download_xml(
+            manifest_url, video_id, 'Downloading f4m manifest',
+            'Unable to download f4m manifest')
+
+        formats = []
+        media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+        for i, media_el in enumerate(media_nodes):
+            tbr = int_or_none(media_el.attrib.get('bitrate'))
+            format_id = 'f4m-%d' % (i if tbr is None else tbr)
+            formats.append({
+                'format_id': format_id,
+                'url': manifest_url,
+                'ext': 'flv',
+                'tbr': tbr,
+                'width': int_or_none(media_el.attrib.get('width')),
+                'height': int_or_none(media_el.attrib.get('height')),
+            })
+        self._sort_formats(formats)
+
+        return formats
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
@@ -621,4 +679,3 @@ class SearchInfoExtractor(InfoExtractor):
     @property
     def SEARCH_KEY(self):
         return self._SEARCH_KEY
-