From f41bd43f2083f7447d66be81736177c7d9fd9fbf Mon Sep 17 00:00:00 2001 From: thecodingbagel Date: Thu, 29 Jun 2017 17:22:09 -0400 Subject: [PATCH] [Schooltube] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/schooltube.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/schooltube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a263c88b3..66d2588a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -886,6 +886,7 @@ from .safari import ( from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE +from .schooltube import SchooltubeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE diff --git a/youtube_dl/extractor/schooltube.py b/youtube_dl/extractor/schooltube.py new file mode 100644 index 000000000..db7ebe8fb --- /dev/null +++ b/youtube_dl/extractor/schooltube.py @@ -0,0 +1,49 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class SchooltubeIE(InfoExtractor): + _VALID_URL = r'http?://www.schooltube.com/video/(?P[^/?#]+)[\S\s]*' + _TEST = { + 'url': 'http://www.schooltube.com/video/9af6bd6815d74ea7948b/Innovation%20Workshop:%20Electronic%20Circuits%20--%20Part%202:%20Inside%20a%20Cell%20Phone', + 'md5': '0ce7f3f50a8b12054c906968d8512a57', + 'info_dict': { + 'id': '9af6bd6815d74ea7948b', + 'ext': 'mp4', + 'title': 'Innovation Workshop: Electronic Circuits -- Part 2: Inside a Cell Phone', + 'description': 'Inside a cell phone is a world of electronics that is highly engineered and complex. Take a closer look inside as we crack open an iPhone® to look at the microchips under an advanced microscope at the National Institute for Standards and Technology.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + jwplayer_data = self._parse_json( + self._search_regex( + r'(?s)jwplayer\(\"schooltube-video\"\).setup\((\{.*?\})\)', + webpage, + 'setup code', + fatal=False + ), + video_id, + transform_source=js_to_json + ) + + info_dict = self._parse_jwplayer_data( + jwplayer_data, + video_id, + require_title=False + ) + + info_dict.update({ + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'keywords': self._html_search_meta('keywords', webpage) + }) + + return info_dict