haruhi-dl/youtube_dl/InfoExtractors.py

import base64
import datetime
import itertools
import netrc
import os
import re
import socket
import time
import email.utils
import xml.etree.ElementTree
import random
import math
import operator
import hashlib
import binascii
import urllib

from .utils import *
from .extractor.common import InfoExtractor, SearchInfoExtractor

from .extractor.ard import ARDIE
from .extractor.arte import ArteTvIE
from .extractor.bandcamp import BandcampIE
from .extractor.bliptv import BlipTVIE, BlipTVUserIE
from .extractor.comedycentral import ComedyCentralIE
from .extractor.collegehumor import CollegeHumorIE
from .extractor.dailymotion import DailymotionIE
from .extractor.depositfiles import DepositFilesIE
from .extractor.eighttracks import EightTracksIE
from .extractor.escapist import EscapistIE
from .extractor.facebook import FacebookIE
from .extractor.flickr import FlickrIE
from .extractor.funnyordie import FunnyOrDieIE
from .extractor.gametrailers import GametrailersIE
from .extractor.generic import GenericIE
from .extractor.googleplus import GooglePlusIE
from .extractor.googlesearch import GoogleSearchIE
from .extractor.howcast import HowcastIE
from .extractor.hypem import HypemIE
from .extractor.ina import InaIE
from .extractor.infoq import InfoQIE
from .extractor.justintv import JustinTVIE
from .extractor.keek import KeekIE
from .extractor.liveleak import LiveLeakIE
from .extractor.metacafe import MetacafeIE
from .extractor.mixcloud import MixcloudIE
from .extractor.mtv import MTVIE
from .extractor.myspass import MySpassIE
from .extractor.myvideo import MyVideoIE
from .extractor.nba import NBAIE
from .extractor.statigram import StatigramIE
from .extractor.photobucket import PhotobucketIE
from .extractor.pornotube import PornotubeIE
from .extractor.rbmaradio import RBMARadioIE
from .extractor.redtube import RedTubeIE
from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
from .extractor.spiegel import SpiegelIE
from .extractor.stanfordoc import StanfordOpenClassroomIE
from .extractor.steam import SteamIE
from .extractor.ted import TEDIE
from .extractor.tumblr import TumblrIE
from .extractor.ustream import UstreamIE
from .extractor.vbox7 import Vbox7IE
from .extractor.vimeo import VimeoIE
from .extractor.vine import VineIE
from .extractor.worldstarhiphop import WorldStarHipHopIE
from .extractor.xnxx import XNXXIE
from .extractor.xvideos import XVideosIE
from .extractor.yahoo import YahooIE, YahooSearchIE
from .extractor.youjizz import YouJizzIE
from .extractor.youku import YoukuIE
from .extractor.youporn import YouPornIE
from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
from .extractor.zdf import ZDFIE


class TeamcocoIE(InfoExtractor):
    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        url_title = mobj.group('url_title')
        webpage = self._download_webpage(url, url_title)

        video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
            webpage, u'video id')

        self.report_extraction(video_id)

        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
            webpage, u'title')

        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
            webpage, u'thumbnail', fatal=False)

        video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
            webpage, u'description', fatal=False)

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
        data = self._download_webpage(data_url, video_id, 'Downloading data webpage')

        video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
            data, u'video URL')

        return [{
            'id':          video_id,
            'url':         video_url,
            'ext':         'mp4',
            'title':       video_title,
            'thumbnail':   thumbnail,
            'description': video_description,
        }]

class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'

    def _real_extract(self,url):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('id')
        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
        webpage = self._download_webpage(mrss_url, video_id)

        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
        if mobj is None:
            raise ExtractorError(u'Unable to extract media URL')
        if len(mobj.group('server')) == 0:
            video_url = compat_urllib_parse.unquote(mobj.group('file'))
        else:
            video_url = mobj.group('server')+'/key='+mobj.group('file')
        video_extension = video_url.split('.')[-1]

        video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
            webpage, u'title')

        # Can't see the description anywhere in the UI
        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
        #     webpage, u'description', fatal=False)
        # if video_description: video_description = unescapeHTML(video_description)

        mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
        if mobj:
            video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
        else:
            video_upload_date = None
            self._downloader.report_warning(u'Unable to extract upload date')

        video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
            webpage, u'uploader id', default=u'anonymous')

        video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
            webpage, u'thumbnail', fatal=False)

        return [{
            'id':       video_id,
            'url':      video_url,
            'ext':      video_extension,
            'title':    video_title,
            # 'description': video_description,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
            'thumbnail': video_thumbnail
        }]


def gen_extractors():
    """ Return a list of an instance of every supported extractor.
    The order does matter; the first extractor matched is the one handling the URL.
    """
    return [
        YoutubePlaylistIE(),
        YoutubeChannelIE(),
        YoutubeUserIE(),
        YoutubeSearchIE(),
        YoutubeIE(),
        MetacafeIE(),
        DailymotionIE(),
        GoogleSearchIE(),
        PhotobucketIE(),
        YahooIE(),
        YahooSearchIE(),
        DepositFilesIE(),
        FacebookIE(),
        BlipTVIE(),
        BlipTVUserIE(),
        VimeoIE(),
        MyVideoIE(),
        ComedyCentralIE(),
        EscapistIE(),
        CollegeHumorIE(),
        XVideosIE(),
        SoundcloudSetIE(),
        SoundcloudIE(),
        InfoQIE(),
        MixcloudIE(),
        StanfordOpenClassroomIE(),
        MTVIE(),
        YoukuIE(),
        XNXXIE(),
        YouJizzIE(),
        PornotubeIE(),
        YouPornIE(),
        GooglePlusIE(),
        ArteTvIE(),
        NBAIE(),
        WorldStarHipHopIE(),
        JustinTVIE(),
        FunnyOrDieIE(),
        SteamIE(),
        UstreamIE(),
        RBMARadioIE(),
        EightTracksIE(),
        KeekIE(),
        TEDIE(),
        MySpassIE(),
        SpiegelIE(),
        LiveLeakIE(),
        ARDIE(),
        ZDFIE(),
        TumblrIE(),
        BandcampIE(),
        RedTubeIE(),
        InaIE(),
        HowcastIE(),
        VineIE(),
        FlickrIE(),
        TeamcocoIE(),
        XHamsterIE(),
        HypemIE(),
        Vbox7IE(),
        GametrailersIE(),
        StatigramIE(),
        GenericIE()
    ]

def get_info_extractor(ie_name):
    """Returns the info extractor class with the given ie_name"""
    return globals()[ie_name+'IE']
Fix infoQ in Python3 2013-01-01 21:07:37 +01:00			`import base64`
Split code as a package, compiled into an executable zip 2012-03-25 03:07:37 +02:00			`import datetime`
8tracks IE (Closes #652) 2013-01-27 03:01:23 +01:00			`import itertools`
Split code as a package, compiled into an executable zip 2012-03-25 03:07:37 +02:00			`import netrc`
			`import os`
			`import re`
			`import socket`
			`import time`
			`import email.utils`
dropped the support for Python 2.5 let's elaborate the decision: Python 2.5 is a 6 years old release and "under the current release policy, no security issues in Python 2.5 will be fixed anymore" (!!); also, it doesn't support the new zipfile distribution format. 2012-05-01 17:01:51 +02:00			`import xml.etree.ElementTree`
add youku support 2012-08-08 20:04:02 +02:00			`import random`
			`import math`
Switch YTPlaylistIE to API (relevant: #586); fixes #651; fixes #673; fixes #661 2013-02-26 10:39:26 +01:00			`import operator`
MyVideoIE: add rtmp support 2013-05-15 23:38:44 +02:00			`import hashlib`
			`import binascii`
			`import urllib`
Split code as a package, compiled into an executable zip 2012-03-25 03:07:37 +02:00
Use relative imports 2012-11-28 03:34:40 +01:00			`from .utils import *`
Fix generic class move (add all files) 2013-06-23 19:57:38 +02:00			`from .extractor.common import InfoExtractor, SearchInfoExtractor`
Move ARD, Arte, ZDF into their own files 2013-06-23 20:24:07 +02:00
			`from .extractor.ard import ARDIE`
			`from .extractor.arte import ArteTvIE`
[Bandcamp] move into own file 2013-06-23 22:24:58 +02:00			`from .extractor.bandcamp import BandcampIE`
Move blip.tv extractors into their own file 2013-06-23 20:44:48 +02:00			`from .extractor.bliptv import BlipTVIE, BlipTVUserIE`
Move comedycentral into its own file 2013-06-23 20:50:22 +02:00			`from .extractor.comedycentral import ComedyCentralIE`
Move Collegehumor IE into its own file 2013-06-23 21:10:21 +02:00			`from .extractor.collegehumor import CollegeHumorIE`
Move DailyMotion into its own file 2013-06-23 20:09:47 +02:00			`from .extractor.dailymotion import DailymotionIE`
Move DepositFiles into its own IE 2013-06-23 21:06:20 +02:00			`from .extractor.depositfiles import DepositFilesIE`
[8tracks] Move into own file 2013-06-23 22:15:50 +02:00			`from .extractor.eighttracks import EightTracksIE`
Move Escapist into its own file 2013-06-23 21:08:17 +02:00			`from .extractor.escapist import EscapistIE`
Add facebook import 2013-06-23 21:00:34 +02:00			`from .extractor.facebook import FacebookIE`
[flickr] Move into own file 2013-06-23 22:31:12 +02:00			`from .extractor.flickr import FlickrIE`
Move FunnyOrDie into its own file 2013-06-23 22:05:23 +02:00			`from .extractor.funnyordie import FunnyOrDieIE`
Move gametrailers IE into its own file 2013-06-23 20:29:46 +02:00			`from .extractor.gametrailers import GametrailersIE`
Move GenericIE into its own file 2013-06-23 20:31:45 +02:00			`from .extractor.generic import GenericIE`
Move G+ IE into its own file, and move google search into a more descriptive module 2013-06-23 20:55:15 +02:00			`from .extractor.googleplus import GooglePlusIE`
			`from .extractor.googlesearch import GoogleSearchIE`
[howcast] Move into own file 2013-06-23 22:30:16 +02:00			`from .extractor.howcast import HowcastIE`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`from .extractor.hypem import HypemIE`
[ina] Move into own file 2013-06-23 22:28:19 +02:00			`from .extractor.ina import InaIE`
Move infoq into its own file 2013-06-23 21:14:19 +02:00			`from .extractor.infoq import InfoQIE`
[justin.tv] move into own file 2013-06-23 22:07:27 +02:00			`from .extractor.justintv import JustinTVIE`
[keek] move into own file 2013-06-23 22:16:41 +02:00			`from .extractor.keek import KeekIE`
[LiveLeak] move into own file 2013-06-23 22:23:19 +02:00			`from .extractor.liveleak import LiveLeakIE`
Move Metacafe and Statigram into their own files, and remove absolute import 2013-06-23 20:07:51 +02:00			`from .extractor.metacafe import MetacafeIE`
Move MixCloud into its own file 2013-06-23 21:59:15 +02:00			`from .extractor.mixcloud import MixcloudIE`
Move MTV IE into its own file 2013-06-23 21:27:38 +02:00			`from .extractor.mtv import MTVIE`
[myspass] Move into own file and default to mp4 ext 2013-06-23 22:20:45 +02:00			`from .extractor.myspass import MySpassIE`
Move MyVideo into its own file 2013-06-23 20:48:32 +02:00			`from .extractor.myvideo import MyVideoIE`
Move NBA IE into its own file 2013-06-23 21:18:00 +02:00			`from .extractor.nba import NBAIE`
Move Metacafe and Statigram into their own files, and remove absolute import 2013-06-23 20:07:51 +02:00			`from .extractor.statigram import StatigramIE`
Move Photobucket into its own file 2013-06-23 20:12:18 +02:00			`from .extractor.photobucket import PhotobucketIE`
[pornotube] move into own file 2013-06-23 22:13:32 +02:00			`from .extractor.pornotube import PornotubeIE`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00			`from .extractor.rbmaradio import RBMARadioIE`
[redtube] move into own file 2013-06-23 22:27:16 +02:00			`from .extractor.redtube import RedTubeIE`
Move Soundcloud into its own file 2013-06-23 20:57:44 +02:00			`from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE`
[Spiegel] move into own file 2013-06-23 22:22:08 +02:00			`from .extractor.spiegel import SpiegelIE`
Move StanfordOC IE into its own file 2013-06-23 21:16:32 +02:00			`from .extractor.stanfordoc import StanfordOpenClassroomIE`
Move Steam IE into its own file 2013-06-23 22:02:56 +02:00			`from .extractor.steam import SteamIE`
Move TED IE into its own file 2013-06-23 21:55:53 +02:00			`from .extractor.ted import TEDIE`
[Tumblr] move into own file 2013-06-23 22:24:07 +02:00			`from .extractor.tumblr import TumblrIE`
[ustream] move into its own file 2013-06-23 22:08:28 +02:00			`from .extractor.ustream import UstreamIE`
[VBox7] move into own file 2013-06-23 22:25:46 +02:00			`from .extractor.vbox7 import Vbox7IE`
Move Vimeo into its own file 2013-06-23 20:18:21 +02:00			`from .extractor.vimeo import VimeoIE`
[Vine] move into own file 2013-06-23 22:26:30 +02:00			`from .extractor.vine import VineIE`
Move WorldStarHipHop into its own file 2013-06-23 22:04:08 +02:00			`from .extractor.worldstarhiphop import WorldStarHipHopIE`
Move Steam IE into its own file 2013-06-23 22:02:56 +02:00			`from .extractor.xnxx import XNXXIE`
Move XVideos IE into its own file (and simplify it a bit) 2013-06-23 21:11:47 +02:00			`from .extractor.xvideos import XVideosIE`
Move YahooSearchIE to youtube_dl.extractor.yahoo 2013-06-23 20:41:54 +02:00			`from .extractor.yahoo import YahooIE, YahooSearchIE`
[youjizz] move into own file 2013-06-23 22:14:22 +02:00			`from .extractor.youjizz import YouJizzIE`
Move Youku IE into its own file 2013-06-23 22:01:02 +02:00			`from .extractor.youku import YoukuIE`
[YouPorn] move into own file 2013-06-23 22:12:14 +02:00			`from .extractor.youporn import YouPornIE`
Move YoutubeSearchIE to the other youtube IEs 2013-06-23 20:28:15 +02:00			`from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE`
Move ARD, Arte, ZDF into their own files 2013-06-23 20:24:07 +02:00			`from .extractor.zdf import ZDFIE`
Add base class SearchInfoExtractor for search queries IEs 2013-05-11 23:04:56 +02:00
_download_webpage helper function 2013-01-01 20:43:43 +01:00
Split code as a package, compiled into an executable zip 2012-03-25 03:07:37 +02:00


added InfoExtractor for XNXX 2012-08-16 01:54:03 +02:00
add youku support 2012-08-08 20:04:02 +02:00
Merge pull request #398 from tempname/master 2012-08-19 18:39:43 +02:00

Merge PR #422 from 'kevinamadeus/master' Add InfoExtractor for Google Plus video (with fixes) 2012-10-09 10:48:49 +02:00

NBA IE (Closes #590) 2012-12-13 21:27:57 +01:00
Preliminary support for twitch.tv and justin.tv 2012-12-16 09:50:41 +01:00
FunnyOrDie IE (Fixes #599) 2012-12-20 21:28:27 +01:00
TweetReel IE 2012-12-27 01:38:41 +01:00
Make ustream IE more robust 2013-01-12 13:49:14 +01:00
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00
Support for WorldStarHipHop.com 2013-03-07 06:09:55 +01:00
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00
Added extractors for 3 porn sites 2013-01-05 21:42:35 +01:00

Switch YTPlaylistIE to API (relevant: #586); fixes #651; fixes #673; fixes #661 2013-02-26 10:39:26 +01:00
Added extractors for 3 porn sites 2013-01-05 21:42:35 +01:00



Add KeekIE() 2013-02-08 08:25:55 +01:00

added new InfoExtractor for myspass.de 2013-02-16 13:46:13 +01:00
Spiegel IE 2013-03-12 01:08:54 +01:00
Update InfoExtractors.py 2013-03-26 21:37:08 +01:00
add ZDFIE and _download_with_mplayer(mms://,rtsp://) 2013-05-23 21:42:03 +02:00

TumblrIE I haven't found many videos to test, so it may not work for all. 2013-04-22 21:07:49 +02:00
Add BandcampIE (closes #568) 2013-05-01 15:55:46 +02:00
Spiegel IE 2013-03-12 01:08:54 +01:00
Add support for Howcast.com - closes #835 2013-05-18 19:17:19 +02:00
Add support for Vine - closes #845 2013-05-20 00:25:26 +02:00
add support for Flickr videos - closes #261 2013-05-20 23:18:40 +02:00
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00			`class TeamcocoIE(InfoExtractor):`
			`_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'`

			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`if mobj is None:`
			`raise ExtractorError(u'Invalid URL: %s' % url)`
			`url_title = mobj.group('url_title')`
			`webpage = self._download_webpage(url, url_title)`

_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00			`webpage, u'video id')`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
			`self.report_extraction(video_id)`

_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00			`webpage, u'title')`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00			`webpage, u'thumbnail', fatal=False)`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00			`webpage, u'description', fatal=False)`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
			`data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id`
			`data = self._download_webpage(data_url, video_id, 'Downloading data webpage')`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00
_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`video_url = self._html_search_regex(r'<file type="high".?>(.?)</file>',`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00			`data, u'video URL')`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
			`return [{`
			`'id': video_id,`
			`'url': video_url,`
			`'ext': 'mp4',`
			`'title': video_title,`
			`'thumbnail': thumbnail,`
Implement search_regex from #847 2013-06-06 13:27:27 +02:00			`'description': video_description,`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00			`}]`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00			`class XHamsterIE(InfoExtractor):`
			`"""Information Extractor for xHamster"""`
			`_VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'`

			`def _real_extract(self,url):`
			`mobj = re.match(self._VALID_URL, url)`

			`video_id = mobj.group('id')`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00			`mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00			`webpage = self._download_webpage(mrss_url, video_id)`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00			`mobj = re.search(r'\'srv\': \'(?P<server>[^\'])\',\s\'file\': \'(?P<file>[^\']+)\',', webpage)`
			`if mobj is None:`
			`raise ExtractorError(u'Unable to extract media URL')`
			`if len(mobj.group('server')) == 0:`
			`video_url = compat_urllib_parse.unquote(mobj.group('file'))`
			`else:`
			`video_url = mobj.group('server')+'/key='+mobj.group('file')`
			`video_extension = video_url.split('.')[-1]`

_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00			`webpage, u'title')`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00
XHamster: Can't see the description anywhere in the UI 2013-06-07 12:10:02 +02:00			`# Can't see the description anywhere in the UI`
_html_search_regex with clean_html superpowers 2013-06-09 11:57:13 +02:00			`# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',`
XHamster: Can't see the description anywhere in the UI 2013-06-07 12:10:02 +02:00			`# webpage, u'description', fatal=False)`
			`# if video_description: video_description = unescapeHTML(video_description)`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00
			`mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00			`if mobj:`
			`video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00			`else:`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00			`video_upload_date = None`
			`self._downloader.report_warning(u'Unable to extract upload date')`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00
test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this 2013-06-09 14:21:42 +02:00			`video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',`
use search_regex in new IEs 2013-06-07 11:46:03 +02:00			`webpage, u'uploader id', default=u'anonymous')`

			`video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',`
			`webpage, u'thumbnail', fatal=False)`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00
			`return [{`
			`'id': video_id,`
			`'url': video_url,`
			`'ext': video_extension,`
			`'title': video_title,`
XHamster: Can't see the description anywhere in the UI 2013-06-07 12:10:02 +02:00			`# 'description': video_description,`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00			`'upload_date': video_upload_date,`
			`'uploader_id': video_uploader_id,`
			`'thumbnail': video_thumbnail`
			`}]`
add support for Flickr videos - closes #261 2013-05-20 23:18:40 +02:00
added HypemIE rebased, closes PR #871 2013-06-05 16:16:53 +02:00


Add GametrailersIE 2013-06-16 20:34:45 +02:00
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00			`def gen_extractors():`
			`""" Return a list of an instance of every supported extractor.`
			`The order does matter; the first extractor matched is the one handling the URL.`
			`"""`
			`return [`
			`YoutubePlaylistIE(),`
			`YoutubeChannelIE(),`
			`YoutubeUserIE(),`
			`YoutubeSearchIE(),`
			`YoutubeIE(),`
			`MetacafeIE(),`
			`DailymotionIE(),`
			`GoogleSearchIE(),`
			`PhotobucketIE(),`
			`YahooIE(),`
			`YahooSearchIE(),`
			`DepositFilesIE(),`
			`FacebookIE(),`
			`BlipTVIE(),`
BlipTV: accept urls in the format http://a.blip.tv/api.swf#{id} (closes #857) Tweak the regex so that BlipTV can be before BlipTVUser. 2013-05-28 15:12:39 +02:00			`BlipTVUserIE(),`
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00			`VimeoIE(),`
			`MyVideoIE(),`
			`ComedyCentralIE(),`
			`EscapistIE(),`
			`CollegeHumorIE(),`
			`XVideosIE(),`
SoundcloudSetIE info extractor for soundcloud sets 2013-03-24 02:24:07 +01:00			`SoundcloudSetIE(),`
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00			`SoundcloudIE(),`
			`InfoQIE(),`
			`MixcloudIE(),`
			`StanfordOpenClassroomIE(),`
			`MTVIE(),`
			`YoukuIE(),`
			`XNXXIE(),`
oops - didn't remove some reminders 2013-01-06 21:52:33 +01:00			`YouJizzIE(),`
			`PornotubeIE(),`
			`YouPornIE(),`
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00			`GooglePlusIE(),`
			`ArteTvIE(),`
			`NBAIE(),`
Support for WorldStarHipHop.com 2013-03-07 06:09:55 +01:00			`WorldStarHipHopIE(),`
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00			`JustinTVIE(),`
			`FunnyOrDieIE(),`
			`SteamIE(),`
			`UstreamIE(),`
RBMA IE (Closes #630) 2013-01-12 17:58:39 +01:00			`RBMARadioIE(),`
8tracks IE (Closes #652) 2013-01-27 03:01:23 +01:00			`EightTracksIE(),`
Add KeekIE() 2013-02-08 08:25:55 +01:00			`KeekIE(),`
Basic support for TED 2013-02-17 17:13:06 +01:00			`TEDIE(),`
Add tests to MySpass 2013-02-18 18:45:09 +01:00			`MySpassIE(),`
Spiegel IE 2013-03-12 01:08:54 +01:00			`SpiegelIE(),`
Rebased, fixed and extended LiveLeak.com support close #757 - close #761 2013-03-29 15:13:24 +01:00			`LiveLeakIE(),`
added ARD InfoExtractor (german state television) 2013-04-07 15:23:48 +02:00			`ARDIE(),`
add ZDFIE and _download_with_mplayer(mms://,rtsp://) 2013-05-23 21:42:03 +02:00			`ZDFIE(),`
TumblrIE I haven't found many videos to test, so it may not work for all. 2013-04-22 21:07:49 +02:00			`TumblrIE(),`
Add BandcampIE (closes #568) 2013-05-01 15:55:46 +02:00			`BandcampIE(),`
Simplify RedTube 2013-05-03 20:07:35 +02:00			`RedTubeIE(),`
Clean up InaIE (Closes #823) 2013-05-05 20:57:19 +02:00			`InaIE(),`
Add support for Howcast.com - closes #835 2013-05-18 19:17:19 +02:00			`HowcastIE(),`
Add support for Vine - closes #845 2013-05-20 00:25:26 +02:00			`VineIE(),`
add support for Flickr videos - closes #261 2013-05-20 23:18:40 +02:00			`FlickrIE(),`
Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00			`TeamcocoIE(),`
Added support for xhamster in infoextractors 2013-06-04 14:30:54 +02:00			`XHamsterIE(),`
added HypemIE rebased, closes PR #871 2013-06-05 16:16:53 +02:00			`HypemIE(),`
Added Vbox7 Infoextractor 2013-06-08 09:44:38 +02:00			`Vbox7IE(),`
Add GametrailersIE 2013-06-16 20:34:45 +02:00			`GametrailersIE(),`
Improve Statigr.am IE 2013-06-23 18:58:53 +02:00			`StatigramIE(),`
Move gen_extractors to InfoExtractors 2013-01-01 19:37:07 +01:00			`GenericIE()`
			`]`
Create a function in InfoExtractors that returns the InfoExtractor class with the given name 2013-04-20 12:42:57 +02:00
			`def get_info_extractor(ie_name):`
			`"""Returns the info extractor class with the given ie_name"""`
			`return globals()[ie_name+'IE']`