[clip.rs] fix extraction

This commit is contained in:
Laura Liberda 2021-02-11 14:08:07 +01:00
parent b55552ad1a
commit 4cfa7883a3
2 changed files with 49 additions and 8 deletions

View file

@ -2,14 +2,16 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .pulsembed import PulseVideoIE
from ..utils import (
ExtractorError,
)
from .pulsembed import PulseVideoIE, PulsEmbedIE
class ClipRsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
_TESTS = [{
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
'url': 'https://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
'info_dict': {
'id': '1488842.1399140381',
'ext': 'mp4',
@ -19,16 +21,42 @@ class ClipRsIE(InfoExtractor):
'timestamp': 1459850243,
'upload_date': '20160405',
}
}, {
'url': 'https://www.clip.rs/u-novom-sadu-se-sinoc-desio-jedan-zimski-neum-svi-su-zaboravili-na-koronu-uhvatili-se-u-kolo-i-nastao-je-hit-video/15686',
'info_dict': {
'id': '2210721.1689293351',
'ext': 'mp4',
'title': 'U Novom Sadu se sinoć desio jedan zimski Neum: Svi su zaboravili na koronu, uhvatili se u kolo i nastao je HIT VIDEO',
'description': 'md5:b1d7d6c0b029b922f06a2a08c9761852',
'timestamp': 1609405068,
'upload_date': '20201231',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info_dict = {}
return {
mvp_id = PulseVideoIE._search_mvp_id(webpage, default=None)
if mvp_id:
info_dict.update({
'url': 'pulsevideo:%s' % PulseVideoIE._search_mvp_id(webpage),
'ie_key': PulseVideoIE.ie_key(),
})
else:
entries = PulsEmbedIE._extract_entries(webpage)
if not entries:
raise ExtractorError('Video ID not found on webpage')
if len(entries) > 1:
raise ExtractorError('More than 1 PulsEmbed')
info_dict.update(entries[0])
info_dict.update({
'_type': 'url_transparent',
'url': 'pulsevideo:%s' % PulseVideoIE._search_mvp_id(webpage),
'ie_key': PulseVideoIE.ie_key(),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'display_id': display_id,
}
})
return info_dict

View file

@ -43,6 +43,10 @@ class PulseVideoIE(InfoExtractor):
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage)
if mvp:
return mvp.group(1)
mvp = re.search(
r'\sid=(["\']?)mvp:(\d+\.\d+)\1', webpage)
if mvp:
return mvp.group(2)
if default != NO_DEFAULT:
return default
raise ExtractorError('Could not extract mvp')
@ -222,7 +226,16 @@ class PulsEmbedIE(InfoExtractor):
'url': smuggle_url('pulsembed:%s' % embed.group('id'), {'referer': url}),
'ie_key': 'PulsEmbed',
})
return entries
ids = []
def dedupe(entry):
if entry['url'] not in ids:
ids.append(entry['url'])
return True
return False
return list(filter(dedupe, entries))
def _real_extract(self, url):
video_id = self._match_id(url)