diff --git a/haruhi_dl/extractor/embetty.py b/haruhi_dl/extractor/embetty.py new file mode 100644 index 000000000..4e3882006 --- /dev/null +++ b/haruhi_dl/extractor/embetty.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, +) + + +class EmbettyIE(InfoExtractor): + @staticmethod + def _extract_entries(webpage, **kw): + results = [] + + twitter_matches = re.finditer(r']*\bstatus="(\d{18})"[^>]*>', webpage) + for match in twitter_matches: + results.append({ + '_type': 'url', + 'url': 'https://twitter.com/i/web/status/%s' % match.group(1), + 'ie_key': 'Twitter', + }) + + video_matches = re.findall(r']+>', webpage) + for match in video_matches: + attr = extract_attributes(match) + if any(key not in attr for key in ('type', 'video-id')): + continue # invalid + service = attr['type'].lower() + if service == 'youtube': + results.append({ + '_type': 'url', + 'url': 'https://www.youtube.com/watch?v=%s&t=%s' % (attr['video-id'], attr.get('start-at', '0')), + 'ie_key': 'Youtube', + }) + elif service == 'vimeo': + results.append({ + '_type': 'url', + 'url': 'https://vimeo.com/%s#t=%s' % (attr['video-id'], attr.get('start-at', '0')), + 'ie_key': 'Vimeo', + }) + elif service == 'facebook': + results.append({ + '_type': 'url', + 'url': 'https://www.facebook.com/video.php?v=%s' % (attr['video-id']), + 'ie_key': 'Facebook', + }) + + return results + + @staticmethod + def _extract_urls(webpage, **kw): + return [embed['url'] for embed in EmbettyIE._extract_entries(webpage, **kw)]