Merge pull request #8876 from remitamine/html5_media

author Yen Chi Hsuan <yan12125@gmail.com>

Sun, 10 Jul 2016 15:40:45 +0000 (23:40 +0800)

committer GitHub <noreply@github.com>

Sun, 10 Jul 2016 15:40:45 +0000 (23:40 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Sun, 10 Jul 2016 15:40:45 +0000 (23:40 +0800)
committer GitHub <noreply@github.com>
Sun, 10 Jul 2016 15:40:45 +0000 (23:40 +0800)
diff --combined test/test_utils.py

index afd273a6533b68915c3b9cbcd547591882f9b37a,d84eb438f23cb555eecc278f80caf4a1ca71a262..2273b5a1072259bf6b9e60cca85360f3bfac1fe6
--- 1/test/test_utils.py
--- 2/test/test_utils.py
+++ b/test/test_utils.py
@@@ -33,7 -33,6 +33,7 @@@ from youtube_dl.utils import 
       ExtractorError,
       find_xpath_attr,
       fix_xml_ampersands,
+ +    get_element_by_class,
       InAdvancePagedList,
       intlist_to_bytes,
       is_html,
@@@ -61,13 -60,11 +61,13 @@@
       timeconvert,
       unescapeHTML,
       unified_strdate,
+ +    unified_timestamp,
       unsmuggle_url,
       uppercase_escape,
       lowercase_escape,
       url_basename,
       urlencode_postdata,
+ +    urshift,
       update_url_query,
       version_tuple,
       xpath_with_ns,
@@@ -81,6 -78,7 +81,7 @@@
       cli_option,
       cli_valueless_option,
       cli_bool_option,
+     parse_codecs,
   )
   from youtube_dl.compat import (
       compat_chr,
@@@ -286,28 -284,8 +287,28 @@@ class TestUtil(unittest.TestCase)
               '20150202')
           self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
           self.assertEqual(unified_strdate('25-09-2014'), '20140925')
+ +        self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
           self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
   
+ +    def test_unified_timestamps(self):
+ +        self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
+ +        self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
+ +        self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
+ +        self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
+ +        self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
+ +        self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
+ +        self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
+ +        self.assertEqual(
+ +            unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
+ +            1417001400)
+ +        self.assertEqual(
+ +            unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
+ +            1422902860)
+ +        self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
+ +        self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
+ +        self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
+ +        self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
+ +
       def test_determine_ext(self):
           self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
           self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@@@ -406,12 -384,6 +407,12 @@@
           self.assertEqual(res_url, url)
           self.assertEqual(res_data, None)
   
+ +        smug_url = smuggle_url(url, {'a': 'b'})
+ +        smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
+ +        res_url, res_data = unsmuggle_url(smug_smug_url)
+ +        self.assertEqual(res_url, url)
+ +        self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
+ +
       def test_shell_quote(self):
           args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
           self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
@@@ -608,6 -580,29 +609,29 @@@
               limit_length('foo bar baz asd', 12).startswith('foo bar'))
           self.assertTrue('...' in limit_length('foo bar baz asd', 12))
   
+     def test_parse_codecs(self):
+         self.assertEqual(parse_codecs(''), {})
+         self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
+             'vcodec': 'avc1.77.30',
+             'acodec': 'mp4a.40.2',
+         })
+         self.assertEqual(parse_codecs('mp4a.40.2'), {
+             'vcodec': 'none',
+             'acodec': 'mp4a.40.2',
+         })
+         self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
+             'vcodec': 'avc1.42001e',
+             'acodec': 'mp4a.40.5',
+         })
+         self.assertEqual(parse_codecs('avc3.640028'), {
+             'vcodec': 'avc3.640028',
+             'acodec': 'none',
+         })
+         self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
+             'vcodec': 'h264',
+             'acodec': 'aac',
+         })
+ 
       def test_escape_rfc3986(self):
           reserved = "!*'();:@&=+$,/?#[]"
           unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
@@@ -988,17 -983,5 +1012,17 @@@ The first lin
           self.assertRaises(ValueError, encode_base_n, 0, 70)
           self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
   
+ +    def test_urshift(self):
+ +        self.assertEqual(urshift(3, 1), 1)
+ +        self.assertEqual(urshift(-3, 1), 2147483646)
+ +
+ +    def test_get_element_by_class(self):
+ +        html = '''
+ +            <span class="foo bar">nice</span>
+ +        '''
+ +
+ +        self.assertEqual(get_element_by_class('foo', html), 'nice')
+ +        self.assertEqual(get_element_by_class('no-such-class', html), None)
+ +
   if __name__ == '__main__':
       unittest.main()
diff --combined youtube_dl/extractor/common.py

index 816baa424e2a9b8efc5f9ce0c27ff320cca77e74,661889593018091e656d78734e725ac95c3eb670..df546da2736c441428e941f845853f0205ce107a
--- 1/youtube_dl/extractor/common.py
--- 2/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@@ -44,7 -44,6 +44,7 @@@ from ..utils import 
       sanitized_Request,
       unescapeHTML,
       unified_strdate,
+ +    unified_timestamp,
       url_basename,
       xpath_element,
       xpath_text,
@@@ -55,6 -54,8 +55,8 @@@
       update_Request,
       update_url_query,
       parse_m3u8_attributes,
+     extract_attributes,
+     parse_codecs,
   )
   
   
@@@ -162,7 -163,6 +164,7 @@@ class InfoExtractor(object)
                           * "height" (optional, int)
                           * "resolution" (optional, string "{width}x{height"},
                                           deprecated)
+ +                        * "filesize" (optional, int)
       thumbnail:      Full URL to a video thumbnail image.
       description:    Full video description.
       uploader:       Full name of the video uploader.
@@@ -751,12 -751,10 +753,12 @@@
           return self._og_search_property('url', html, **kargs)
   
       def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ +        if not isinstance(name, (list, tuple)):
+ +            name = [name]
           if display_name is None:
- -            display_name = name
+ +            display_name = name[0]
           return self._html_search_regex(
- -            self._meta_regex(name),
+ +            [self._meta_regex(n) for n in name],
               html, display_name, fatal=fatal, group='content', **kwargs)
   
       def _dc_search_uploader(self, html):
@@@ -805,17 -803,15 +807,17 @@@
           return self._html_search_meta('twitter:player', html,
                                         'twitter card player')
   
- -    def _search_json_ld(self, html, video_id, **kwargs):
+ +    def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
           json_ld = self._search_regex(
               r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
               html, 'JSON-LD', group='json_ld', **kwargs)
           if not json_ld:
               return {}
- -        return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
+ +        return self._json_ld(
+ +            json_ld, video_id, fatal=kwargs.get('fatal', True),
+ +            expected_type=expected_type)
   
- -    def _json_ld(self, json_ld, video_id, fatal=True):
+ +    def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
           if isinstance(json_ld, compat_str):
               json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
           if not json_ld:
@@@ -823,8 -819,6 +825,8 @@@
           info = {}
           if json_ld.get('@context') == 'http://schema.org':
               item_type = json_ld.get('@type')
+ +            if expected_type is not None and expected_type != item_type:
+ +                return info
               if item_type == 'TVEpisode':
                   info.update({
                       'episode': unescapeHTML(json_ld.get('name')),
@@@ -843,19 -837,6 +845,19 @@@
                       'title': unescapeHTML(json_ld.get('headline')),
                       'description': unescapeHTML(json_ld.get('articleBody')),
                   })
+ +            elif item_type == 'VideoObject':
+ +                info.update({
+ +                    'url': json_ld.get('contentUrl'),
+ +                    'title': unescapeHTML(json_ld.get('name')),
+ +                    'description': unescapeHTML(json_ld.get('description')),
+ +                    'thumbnail': json_ld.get('thumbnailUrl'),
+ +                    'duration': parse_duration(json_ld.get('duration')),
+ +                    'timestamp': unified_timestamp(json_ld.get('uploadDate')),
+ +                    'filesize': float_or_none(json_ld.get('contentSize')),
+ +                    'tbr': int_or_none(json_ld.get('bitrate')),
+ +                    'width': int_or_none(json_ld.get('width')),
+ +                    'height': int_or_none(json_ld.get('height')),
+ +                })
           return dict((k, v) for k, v in info.items() if v is not None)
   
       @staticmethod
@@@ -897,11 -878,7 +899,11 @@@
                   f['ext'] = determine_ext(f['url'])
   
               if isinstance(field_preference, (list, tuple)):
- -                return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
+ +                return tuple(
+ +                    f.get(field)
+ +                    if f.get(field) is not None
+ +                    else ('' if field == 'format_id' else -1)
+ +                    for field in field_preference)
   
               preference = f.get('preference')
               if preference is None:
@@@ -1635,6 -1612,62 +1637,62 @@@
                           self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
           return formats
   
+     def _parse_html5_media_entries(self, base_url, webpage):
+         def absolute_url(video_url):
+             return compat_urlparse.urljoin(base_url, video_url)
+ 
+         def parse_content_type(content_type):
+             if not content_type:
+                 return {}
+             ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
+             if ctr:
+                 mimetype, codecs = ctr.groups()
+                 f = parse_codecs(codecs)
+                 f['ext'] = mimetype2ext(mimetype)
+                 return f
+             return {}
+ 
+         entries = []
+         for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+             media_info = {
+                 'formats': [],
+                 'subtitles': {},
+             }
+             media_attributes = extract_attributes(media_tag)
+             src = media_attributes.get('src')
+             if src:
+                 media_info['formats'].append({
+                     'url': absolute_url(src),
+                     'vcodec': 'none' if media_type == 'audio' else None,
+                 })
+             media_info['thumbnail'] = media_attributes.get('poster')
+             if media_content:
+                 for source_tag in re.findall(r'<source[^>]+>', media_content):
+                     source_attributes = extract_attributes(source_tag)
+                     src = source_attributes.get('src')
+                     if not src:
+                         continue
+                     f = parse_content_type(source_attributes.get('type'))
+                     f.update({
+                         'url': absolute_url(src),
+                         'vcodec': 'none' if media_type == 'audio' else None,
+                     })
+                     media_info['formats'].append(f)
+                 for track_tag in re.findall(r'<track[^>]+>', media_content):
+                     track_attributes = extract_attributes(track_tag)
+                     kind = track_attributes.get('kind')
+                     if not kind or kind == 'subtitles':
+                         src = track_attributes.get('src')
+                         if not src:
+                             continue
+                         lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
+                         media_info['subtitles'].setdefault(lang, []).append({
+                             'url': absolute_url(src),
+                         })
+             if media_info['formats']:
+                 entries.append(media_info)
+         return entries
+ 
       def _live_title(self, name):
           """ Generate the title for a live video """
           now = datetime.datetime.now()
@@@ -1748,13 -1781,6 +1806,13 @@@
       def _mark_watched(self, *args, **kwargs):
           raise NotImplementedError('This method must be implemented by subclasses')
   
+ +    def geo_verification_headers(self):
+ +        headers = {}
+ +        geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ +        if geo_verification_proxy:
+ +            headers['Ytdl-request-proxy'] = geo_verification_proxy
+ +        return headers
+ +
   
   class SearchInfoExtractor(InfoExtractor):
       """
diff --combined youtube_dl/utils.py

index 3498697b60d70c45d4041f80f59945aefeb9e035,fe175e82c38f218d098f22ab1f9cc592965c45c3..4c1d0d526d5745622b3a5257e998d5611a6ad40b
--- 1/youtube_dl/utils.py
--- 2/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@@ -110,49 -110,6 +110,49 @@@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆ
                           itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
                                           'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
   
+ +DATE_FORMATS = (
+ +    '%d %B %Y',
+ +    '%d %b %Y',
+ +    '%B %d %Y',
+ +    '%b %d %Y',
+ +    '%b %dst %Y %I:%M',
+ +    '%b %dnd %Y %I:%M',
+ +    '%b %dth %Y %I:%M',
+ +    '%Y %m %d',
+ +    '%Y-%m-%d',
+ +    '%Y/%m/%d',
+ +    '%Y/%m/%d %H:%M:%S',
+ +    '%Y-%m-%d %H:%M:%S',
+ +    '%Y-%m-%d %H:%M:%S.%f',
+ +    '%d.%m.%Y %H:%M',
+ +    '%d.%m.%Y %H.%M',
+ +    '%Y-%m-%dT%H:%M:%SZ',
+ +    '%Y-%m-%dT%H:%M:%S.%fZ',
+ +    '%Y-%m-%dT%H:%M:%S.%f0Z',
+ +    '%Y-%m-%dT%H:%M:%S',
+ +    '%Y-%m-%dT%H:%M:%S.%f',
+ +    '%Y-%m-%dT%H:%M',
+ +)
+ +
+ +DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
+ +DATE_FORMATS_DAY_FIRST.extend([
+ +    '%d-%m-%Y',
+ +    '%d.%m.%Y',
+ +    '%d.%m.%y',
+ +    '%d/%m/%Y',
+ +    '%d/%m/%y',
+ +    '%d/%m/%Y %H:%M:%S',
+ +])
+ +
+ +DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
+ +DATE_FORMATS_MONTH_FIRST.extend([
+ +    '%m-%d-%Y',
+ +    '%m.%d.%Y',
+ +    '%m/%d/%Y',
+ +    '%m/%d/%y',
+ +    '%m/%d/%Y %H:%M:%S',
+ +])
+ +
   
   def preferredencoding():
       """Get preferred encoding.
@@@ -310,17 -267,9 +310,17 @@@ def get_element_by_id(id, html)
       return get_element_by_attribute('id', id, html)
   
   
- -def get_element_by_attribute(attribute, value, html):
+ +def get_element_by_class(class_name, html):
+ +    return get_element_by_attribute(
+ +        'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ +        html, escape_value=False)
+ +
+ +
+ +def get_element_by_attribute(attribute, value, html, escape_value=True):
       """Return the content of the tag with the specified attribute in the passed HTML document"""
   
+ +    value = re.escape(value) if escape_value else value
+ +
       m = re.search(r'''(?xs)
           <([a-zA-Z0-9:._-]+)
            (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
@@@ -329,7 -278,7 +329,7 @@@
           \s*>
           (?P<content>.*?)
           </\1>
- -    ''' % (re.escape(attribute), re.escape(value)), html)
+ +    ''' % (re.escape(attribute), value), html)
   
       if not m:
           return None
@@@ -1026,24 -975,6 +1026,24 @@@ class YoutubeDLCookieProcessor(compat_u
       https_response = http_response
   
   
+ +def extract_timezone(date_str):
+ +    m = re.search(
+ +        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ +        date_str)
+ +    if not m:
+ +        timezone = datetime.timedelta()
+ +    else:
+ +        date_str = date_str[:-len(m.group('tz'))]
+ +        if not m.group('sign'):
+ +            timezone = datetime.timedelta()
+ +        else:
+ +            sign = 1 if m.group('sign') == '+' else -1
+ +            timezone = datetime.timedelta(
+ +                hours=sign * int(m.group('hours')),
+ +                minutes=sign * int(m.group('minutes')))
+ +    return timezone, date_str
+ +
+ +
   def parse_iso8601(date_str, delimiter='T', timezone=None):
       """ Return a UNIX timestamp from the given date """
   
@@@ -1053,8 -984,20 +1053,8 @@@
       date_str = re.sub(r'\.[0-9]+', '', date_str)
   
       if timezone is None:
- -        m = re.search(
- -            r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
- -            date_str)
- -        if not m:
- -            timezone = datetime.timedelta()
- -        else:
- -            date_str = date_str[:-len(m.group(0))]
- -            if not m.group('sign'):
- -                timezone = datetime.timedelta()
- -            else:
- -                sign = 1 if m.group('sign') == '+' else -1
- -                timezone = datetime.timedelta(
- -                    hours=sign * int(m.group('hours')),
- -                    minutes=sign * int(m.group('minutes')))
+ +        timezone, date_str = extract_timezone(date_str)
+ +
       try:
           date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
           dt = datetime.datetime.strptime(date_str, date_format) - timezone
@@@ -1063,10 -1006,6 +1063,10 @@@
           pass
   
   
+ +def date_formats(day_first=True):
+ +    return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
+ +
+ +
   def unified_strdate(date_str, day_first=True):
       """Return a string with the date in the format YYYYMMDD"""
   
@@@ -1075,11 -1014,53 +1075,11 @@@
       upload_date = None
       # Replace commas
       date_str = date_str.replace(',', ' ')
- -    # %z (UTC offset) is only supported in python>=3.2
- -    if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
- -        date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
       # Remove AM/PM + timezone
       date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ +    _, date_str = extract_timezone(date_str)
   
- -    format_expressions = [
- -        '%d %B %Y',
- -        '%d %b %Y',
- -        '%B %d %Y',
- -        '%b %d %Y',
- -        '%b %dst %Y %I:%M',
- -        '%b %dnd %Y %I:%M',
- -        '%b %dth %Y %I:%M',
- -        '%Y %m %d',
- -        '%Y-%m-%d',
- -        '%Y/%m/%d',
- -        '%Y/%m/%d %H:%M:%S',
- -        '%Y-%m-%d %H:%M:%S',
- -        '%Y-%m-%d %H:%M:%S.%f',
- -        '%d.%m.%Y %H:%M',
- -        '%d.%m.%Y %H.%M',
- -        '%Y-%m-%dT%H:%M:%SZ',
- -        '%Y-%m-%dT%H:%M:%S.%fZ',
- -        '%Y-%m-%dT%H:%M:%S.%f0Z',
- -        '%Y-%m-%dT%H:%M:%S',
- -        '%Y-%m-%dT%H:%M:%S.%f',
- -        '%Y-%m-%dT%H:%M',
- -    ]
- -    if day_first:
- -        format_expressions.extend([
- -            '%d-%m-%Y',
- -            '%d.%m.%Y',
- -            '%d.%m.%y',
- -            '%d/%m/%Y',
- -            '%d/%m/%y',
- -            '%d/%m/%Y %H:%M:%S',
- -        ])
- -    else:
- -        format_expressions.extend([
- -            '%m-%d-%Y',
- -            '%m.%d.%Y',
- -            '%m/%d/%Y',
- -            '%m/%d/%y',
- -            '%m/%d/%Y %H:%M:%S',
- -        ])
- -    for expression in format_expressions:
+ +    for expression in date_formats(day_first):
           try:
               upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
           except ValueError:
@@@ -1095,29 -1076,6 +1095,29 @@@
           return compat_str(upload_date)
   
   
+ +def unified_timestamp(date_str, day_first=True):
+ +    if date_str is None:
+ +        return None
+ +
+ +    date_str = date_str.replace(',', ' ')
+ +
+ +    pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0)
+ +    timezone, date_str = extract_timezone(date_str)
+ +
+ +    # Remove AM/PM + timezone
+ +    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ +
+ +    for expression in date_formats(day_first):
+ +        try:
+ +            dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta
+ +            return calendar.timegm(dt.timetuple())
+ +        except ValueError:
+ +            pass
+ +    timetuple = email.utils.parsedate_tz(date_str)
+ +    if timetuple:
+ +        return calendar.timegm(timetuple.timetuple())
+ +
+ +
   def determine_ext(url, default_ext='unknown_video'):
       if url is None:
           return default_ext
@@@ -1452,8 -1410,6 +1452,8 @@@ def shell_quote(args)
   def smuggle_url(url, data):
       """ Pass additional data in a URL for internal use. """
   
+ +    url, idata = unsmuggle_url(url, {})
+ +    data.update(idata)
       sdata = compat_urllib_parse_urlencode(
           {'__youtubedl_smuggle': json.dumps(data)})
       return url + '#' + sdata
@@@ -1635,11 -1591,6 +1635,11 @@@ class HEADRequest(compat_urllib_request
           return 'HEAD'
   
   
+ +class PUTRequest(compat_urllib_request.Request):
+ +    def get_method(self):
+ +        return 'PUT'
+ +
+ +
   def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
       if get_attr:
           if v is not None:
@@@ -1675,10 -1626,6 +1675,10 @@@ def float_or_none(v, scale=1, invscale=
           return default
   
   
+ +def strip_or_none(v):
+ +    return None if v is None else v.strip()
+ +
+ +
   def parse_duration(s):
       if not isinstance(s, compat_basestring):
           return None
@@@ -1935,13 -1882,7 +1935,13 @@@ def update_Request(req, url=None, data=
       req_headers.update(headers)
       req_data = data or req.data
       req_url = update_url_query(url or req.get_full_url(), query)
- -    req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+ +    req_get_method = req.get_method()
+ +    if req_get_method == 'HEAD':
+ +        req_type = HEADRequest
+ +    elif req_get_method == 'PUT':
+ +        req_type = PUTRequest
+ +    else:
+ +        req_type = compat_urllib_request.Request
       new_req = req_type(
           req_url, data=req_data, headers=req_headers,
           origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
@@@ -2105,7 -2046,6 +2105,7 @@@ def mimetype2ext(mt)
           return ext
   
       _, _, res = mt.rpartition('/')
+ +    res = res.lower()
   
       return {
           '3gpp': '3gp',
@@@ -2117,15 -2057,45 +2117,51 @@@
           'x-flv': 'flv',
           'x-mp4-fragmented': 'mp4',
           'x-ms-wmv': 'wmv',
+ +        'mpegurl': 'm3u8',
+ +        'x-mpegurl': 'm3u8',
+ +        'vnd.apple.mpegurl': 'm3u8',
+ +        'dash+xml': 'mpd',
+ +        'f4m': 'f4m',
+ +        'f4m+xml': 'f4m',
       }.get(res, res)
   
   
+ def parse_codecs(codecs_str):
+     # http://tools.ietf.org/html/rfc6381
+     if not codecs_str:
+         return {}
+     splited_codecs = list(filter(None, map(
+         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
+     vcodec, acodec = None, None
+     for full_codec in splited_codecs:
+         codec = full_codec.split('.')[0]
+         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
+             if not vcodec:
+                 vcodec = full_codec
+         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
+             if not acodec:
+                 acodec = full_codec
+         else:
+             write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
+     if not vcodec and not acodec:
+         if len(splited_codecs) == 2:
+             return {
+                 'vcodec': vcodec,
+                 'acodec': acodec,
+             }
+         elif len(splited_codecs) == 1:
+             return {
+                 'vcodec': 'none',
+                 'acodec': vcodec,
+             }
+     else:
+         return {
+             'vcodec': vcodec or 'none',
+             'acodec': acodec or 'none',
+         }
+     return {}
+ 
+ 
   def urlhandle_detect_ext(url_handle):
       getheader = url_handle.headers.get
   
@@@ -2927,7 -2897,3 +2963,7 @@@ def parse_m3u8_attributes(attrib)
               val = val[1:-1]
           info[key] = val
       return info
+ +
+ +
+ +def urshift(val, n):
+ +    return val >> n if val >= 0 else (val + 0x100000000) >> n
author	Yen Chi Hsuan <yan12125@gmail.com>
	Sun, 10 Jul 2016 15:40:45 +0000 (23:40 +0800)
committer	GitHub <noreply@github.com>
	Sun, 10 Jul 2016 15:40:45 +0000 (23:40 +0800)
		1	2
test/test_utils.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/common.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history