Merge remote-tracking branch 'upstream/master'

2013-09-03 12:22:29 -07:00 · 2013-09-03 12:22:29 -07:00 · c3dd69eab4
parent 85f03346eb c8dbccde30
commit c3dd69eab4
8 changed files with 131 additions and 26 deletions
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@ -0,0 +1,33 @@
 #!/usr/bin/env python3
 import sys
 import os
 import textwrap
 # We must be able to import youtube_dl
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 import youtube_dl
 def main():
    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
        template = tmplf.read()
    ie_htmls = []
    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
        try:
            ie_html += ': {}'.format(ie.IE_DESC)
        except AttributeError:
            pass
        if ie.working() == False:
            ie_html += ' (Currently broken)'
        ie_htmls.append('<li>{}</li>'.format(ie_html))
    template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
        sitesf.write(template)
 if __name__ == '__main__':
    main()
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -85,6 +85,7 @@ ROOT=$(pwd)
    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
    "$ROOT/devscripts/gh-pages/generate-download.py"
    "$ROOT/devscripts/gh-pages/update-copyright.py"
    "$ROOT/devscripts/gh-pages/update-sites.py"
    git add *.html *.html.in update
    git commit -m "release $version"
    git show HEAD
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -89,6 +89,7 @@ from .tutv import TutvIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -55,7 +55,8 @@ class DailymotionIE(InfoExtractor):
        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
                                            u'Downloading embed page')
-        info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
+        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
            'video info', flags=re.MULTILINE)
        info = json.loads(info)
        # TODO: support choosing qualities
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
        description = self._og_search_description(webpage)
        video_uploader = self._html_search_regex(
-                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                webpage, u'uploader nickname', fatal=False)
        return {
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -14,19 +14,6 @@ from ..utils import (
 class ORFIE(InfoExtractor):
    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
        u'file': u'6566957.flv',
        u'info_dict': {
            u'title': u'Wetter',
            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
        },
        u'params': {
            # It uses rtmp
            u'skip_download': True,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@ -0,0 +1,56 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    get_element_by_id,
    clean_html,
 )
 class VeeHDIE(InfoExtractor):
    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://veehd.com/video/4686958',
        u'file': u'4686958.mp4',
        u'info_dict': {
            u'title': u'Time Lapse View from Space ( ISS)',
            u'uploader_id': u'spotted',
            u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
            webpage, u'player path')
        player_url = compat_urlparse.urljoin(url, player_path)
        player_page = self._download_webpage(player_url, video_id,
            u'Downloading player page')
        config_json = self._search_regex(r'value=\'config=({.+?})\'',
            player_page, u'config json')
        config = json.loads(config_json)
        video_url = compat_urlparse.unquote(config['clip']['url'])
        title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
            webpage, u'uploader')
        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
            webpage, u'thumbnail')
        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
            webpage, u'description', flags=re.DOTALL)
        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
            'url': video_url,
            'ext': 'mp4',
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
            'description': description,
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
            },
        },
        {
            u'url': u'http://player.vimeo.com/video/54469442',
            u'file': u'54469442.mp4',
            u'md5': u'619b811a4417aa4abe78dc653becf511',
            u'note': u'Videos that embed the url in the player page',
            u'info_dict': {
                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                u'uploader': u'The BLN & Business of Software',
            },
        },
    ]
    def _login(self):
@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):
        # Extract the config JSON
        try:
-            config = webpage.split(' = {config:')[1].split(',assets:')[0]
+            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
                webpage, u'info section', flags=re.DOTALL)
            config = json.loads(config)
        except:
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@ -132,12 +143,22 @@ class VimeoIE(InfoExtractor):
        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
        # Extract video thumbnail
-        video_thumbnail = config["video"]["thumbnail"]
+        video_thumbnail = config["video"].get("thumbnail")
        if video_thumbnail is None:
            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
        # Extract video description
        video_description = None
        try:
            video_description = get_element_by_attribute("itemprop", "description", webpage)
            if video_description: video_description = clean_html(video_description)
-        else: video_description = u''
+        except AssertionError as err:
            # On some pages like (http://player.vimeo.com/video/54469442) the
            # html tags are not closed, python 2.6 cannot handle it
            if err.args[0] == 'we should not get here!':
                pass
            else:
                raise
        # Extract upload date
        video_upload_date = None
@ -154,14 +175,15 @@ class VimeoIE(InfoExtractor):
        # TODO bind to format param
        codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
        files = { 'hd': [], 'sd': [], 'other': []}
        config_files = config["video"].get("files") or config["request"].get("files")
        for codec_name, codec_extension in codecs:
-            if codec_name in config["video"]["files"]:
+            if codec_name in config_files:
-                if 'hd' in config["video"]["files"][codec_name]:
+                if 'hd' in config_files[codec_name]:
                    files['hd'].append((codec_name, codec_extension, 'hd'))
-                elif 'sd' in config["video"]["files"][codec_name]:
+                elif 'sd' in config_files[codec_name]:
                    files['sd'].append((codec_name, codec_extension, 'sd'))
                else:
-                    files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
        for quality in ('hd', 'sd', 'other'):
            if len(files[quality]) > 0:
@ -173,6 +195,10 @@ class VimeoIE(InfoExtractor):
        else:
            raise ExtractorError(u'No known codec found')
        video_url = None
        if isinstance(config_files[video_codec], dict):
            video_url = config_files[video_codec][video_quality].get("url")
        if video_url is None:
            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
                        %(video_id, sig, timestamp, video_quality, video_codec.upper())