Merge branch 'sexykarma' of https://github.com/CkuT/youtube-dl into CkuT-sexykarma

This commit is contained in:
Sergey M 2014-10-19 00:06:53 +07:00
commit 77c3c5c5ed
2 changed files with 78 additions and 0 deletions

View file

@ -315,6 +315,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE
from .sexykarma import SexyKarmaIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE

View file

@ -0,0 +1,77 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
import datetime
class SexyKarmaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/.+\-(?P<id>[a-zA-Z0-9\-]+)(.html)'
_TESTS = [{
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
'info_dict': {
'id': 'yHI70cOyIHt',
'ext': 'mp4',
'title': 'Taking a quick pee.',
'uploader_id': 'wildginger7',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': int,
'view_count': int,
'upload_date': '20141007',
}
}, {
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
'md5': 'dd216c68d29b49b12842b9babe762a5d',
'info_dict': {
'id': '8Id6EZPbuHf',
'ext': 'mp4',
'title': 'pot_pixie tribute',
'uploader_id': 'banffite',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': int,
'view_count': int,
'upload_date': '20141013',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h2 class="he2"><span>(.*?)</span>', webpage, 'title')
uploader_id = self._html_search_regex(r'class="aupa">\n*(.*?)</a>', webpage, 'uploader')
url = self._html_search_regex(r'<p><a href="(.*?)" ?\n*target="_blank"><font color', webpage, 'url')
thumbnail = self._html_search_regex(r'<div id="player" style="z-index:1;"> <span id="edge"></span> <span id="container"><img[\n ]*src="(.+?)"', webpage, 'thumbnail')
str_duration = self._html_search_regex(r'<tr>[\n\s]*<td>Time: </td>[\n\s]*<td align="right"><span>(.+)\n*', webpage, 'duration')
duration = self._to_seconds(str_duration)
str_views = self._html_search_regex(r'<tr>[\n\s]*<td>Views: </td>[\n\s]*<td align="right"><span>(.+)</span>', webpage, 'view_count')
view_count = int(str_views)
# print view_count
date = self._html_search_regex(r'class="aup">Added: <strong>(.*?)</strong>', webpage, 'date')
d = datetime.datetime.strptime(date, '%B %d, %Y')
upload_date = d.strftime('%Y%m%d')
categories = re.findall(r'http://www.sexykarma.com/gonewild/search/video/(?:.+?)"><span>(.*?)</span>', webpage)
return {
'id': video_id,
'title': title,
'uploader_id': uploader_id,
'url': url,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'upload_date': upload_date,
'categories': categories,
}
def _to_seconds(self, timestr):
seconds= 0
for part in timestr.split(':'):
seconds= seconds*60 + int(part)
return seconds