From fd6ca382628afbc4a229a15cd26552e226ac4536 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 2 Jul 2016 21:33:23 +0800 Subject: [PATCH] [facebook] Improve Facebook embedded detection Related to #9938. Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5. --- youtube_dl/extractor/facebook.py | 15 +++++++++++++++ youtube_dl/extractor/generic.py | 26 ++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 9b87b37ae..6eaa22d89 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttps://www\.facebook\.com/video/embed.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + + # Facebook API embed + # see https://developers.facebook.com/docs/plugins/embedded-video-player + mobj = re.search(r'''(?x)]+ + class=(?P[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+ + data-href=(?P[\'"])(?P[^\'"]+)(?P=q2)''', webpage) + if mobj is not None: + return mobj.group('url') + def _login(self): (useremail, password) = self._get_login_info() if useremail is None: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9315b9e21..7212e0edd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE +from .facebook import FacebookIE class GenericIE(InfoExtractor): @@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor): 'uploader': 'TheAtlantic', }, 'add_ie': ['BrightcoveLegacy'], + }, + # Facebook