haruhi-dl/youtube_dl/extractor/cliphunter.py

89 lines
2.8 KiB
Python
Raw Normal View History

2014-01-27 07:55:30 +01:00
from __future__ import unicode_literals
2014-01-25 01:46:52 +01:00
from .common import InfoExtractor
from ..utils import int_or_none
2014-02-02 12:03:36 +01:00
2014-01-25 01:46:52 +01:00
_translation_table = {
2014-01-27 12:39:39 +01:00
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
'y': 'l', 'z': 'i',
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
}
2014-01-25 01:46:52 +01:00
def _decode(s):
return ''.join(_translation_table.get(c, c) for c in s)
2014-01-25 01:46:52 +01:00
class CliphunterIE(InfoExtractor):
2014-01-27 07:55:30 +01:00
IE_NAME = 'cliphunter'
2014-01-25 01:46:52 +01:00
_VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
2014-01-27 12:39:39 +01:00
(?P<id>[0-9]+)/
(?P<seo>.+?)(?:$|[#\?])
'''
2016-07-15 18:04:30 +02:00
_TESTS = [{
2014-01-27 07:55:30 +01:00
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
2014-01-27 07:55:30 +01:00
'info_dict': {
'id': '1012420',
'ext': 'flv',
2014-01-27 07:55:30 +01:00
'title': 'Fun Jynx Maze solo',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
2016-07-15 18:04:30 +02:00
},
'skip': 'Video gone',
}, {
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
'info_dict': {
'id': '2019449',
'ext': 'mp4',
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
'thumbnail': r're:^https?://.*\.jpg$',
2016-07-15 18:04:30 +02:00
'age_limit': 18,
},
}]
2014-01-25 01:46:52 +01:00
def _real_extract(self, url):
2014-10-26 23:13:42 +01:00
video_id = self._match_id(url)
2014-01-25 01:46:52 +01:00
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
gexo_files = self._parse_json(
self._search_regex(
r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
video_id)
formats = []
for format_id, f in gexo_files.items():
video_url = f.get('url')
if not video_url:
continue
fmt = f.get('fmt')
height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({
'url': _decode(video_url),
'format_id': format_id,
'width': int_or_none(f.get('w')),
'height': int_or_none(height),
'tbr': int_or_none(f.get('br')),
})
self._sort_formats(formats)
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
webpage, 'thumbnail', fatal=False)
2014-01-25 01:46:52 +01:00
return {
'id': video_id,
'title': video_title,
'formats': formats,
'age_limit': self._rta_search(webpage),
'thumbnail': thumbnail,
2014-01-25 01:46:52 +01:00
}