From 45cdca687c7f9ab77228a47cf5dcf34cad6ce3a3 Mon Sep 17 00:00:00 2001
From: shaileshaanand <anaandshailu@gmail.com>
Date: Thu, 12 Mar 2020 12:17:51 +0530
Subject: [PATCH 1/5] [skillshare:class] Add support for skillshare.com classes

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/skillshare.py | 61 ++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 youtube_dl/extractor/skillshare.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 64d1fa251..5586c928c 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -991,6 +991,7 @@ from .shared import (
 from .showroomlive import ShowRoomLiveIE
 from .sina import SinaIE
 from .sixplay import SixPlayIE
+from .skillshare import SkillshareClassIE
 from .skylinewebcams import SkylineWebcamsIE
 from .skynewsarabia import (
     SkyNewsArabiaIE,
diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py
new file mode 100644
index 000000000..c63b52ab5
--- /dev/null
+++ b/youtube_dl/extractor/skillshare.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+import json
+from .common import InfoExtractor
+
+
+class SkillshareClassIE(InfoExtractor):
+    IE_NAME = 'skillshare:class'
+    _VALID_URL = r'https?://(?:www\.)?skillshare\.com/classes/[^/]+/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.skillshare.com/classes/SEO-Today-Strategies-to-Earn-Trust-Rank-High-and-Stand-Out/423483018',
+        'only_matching': True,
+        'info_dict': {
+            'id': '5463396146001',
+            'ext': 'mp4',
+            'title': 'Introduction',
+            # TODO more properties, either as:
+            # * A value
+            # * MD5 checksum; start the string with md5:
+            # * A regular expression; start the string with re:
+            # * Any Python type (for example int or float)
+        }
+    }
+
+    def _real_extract(self, url):
+        search_term = r'(?P<json_line>{"userData":{.+});\n'
+        lesson_info_api_url_format = "https://www.skillshare.com/sessions/{}/video"
+        video_api_url_format = "https://edge.api.brightcove.com/playback/v1/accounts/{}/videos/{}"
+        headers = {"Accept": "application/json;pk=BCpkADawqM2OOcM6njnM7hf9EaK6lIFlqiXB0iWjqGWU    QjU7R8965xUvIQNqdQbnDTLz0IAO7E6Ir2rIbXJtFdzrGtitoee0n1XXRliD-RH9A-svuvNW    9qgo3Bh34HEZjXjG4Nml4iyz3KqF"}
+        class_id = self._match_id(url)
+        class_page = self._download_webpage(url, class_id)
+        class_json_data = json.loads(self._search_regex(search_term, class_page, 'class_json_data'))
+        account_id = str(class_json_data.get('pageData').get('videoPlayerData').get('brightcoveAccountId'))
+        class_title = class_json_data.get('pageData').get('headerData').get('title')
+        lessons = class_json_data.get('pageData').get('videoPlayerData').get('units')[0].get('sessions')
+        videos = []
+        for lesson in lessons:
+            lesson_id = str(lesson.get('id'))
+            lesson_info_api_url = lesson_info_api_url_format.format(lesson_id)
+            lesson_info_api_response = self._download_json(lesson_info_api_url, lesson_id)
+            print(lesson_info_api_response)
+            if 'video_hashed_id' not in lesson_info_api_response:
+                break
+            video_hashed_id = lesson_info_api_response.get('video_hashed_id')[3:]
+            video_api_url = video_api_url_format.format(account_id, video_hashed_id)
+            video_api_response = self._download_json(video_api_url, video_hashed_id, headers=headers)
+            lesson_title = lesson.get('title')
+            lesson_url = video_api_response.get('sources')[-1].get('src')
+            video = {
+                'id': video_hashed_id,
+                'title': lesson_title,
+                'url': lesson_url,
+                'ext': 'mp4',
+            }
+            videos.append(video)
+        return {
+            'id': class_id,
+            'title': class_title,
+            '_type': 'playlist',
+            'entries': videos
+        }

From 39eb11694ca64bef676343f7f8e706b31e84184e Mon Sep 17 00:00:00 2001
From: shaileshaanand <anaandshailu@gmail.com>
Date: Fri, 13 Mar 2020 13:10:36 +0530
Subject: [PATCH 2/5] [skillshare:class] use brightcove extractor

---
 youtube_dl/extractor/skillshare.py | 47 +++++++-----------------------
 1 file changed, 11 insertions(+), 36 deletions(-)

diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py
index c63b52ab5..43a6113c2 100644
--- a/youtube_dl/extractor/skillshare.py
+++ b/youtube_dl/extractor/skillshare.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 import json
 from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
 
 
 class SkillshareClassIE(InfoExtractor):
@@ -10,52 +11,26 @@ class SkillshareClassIE(InfoExtractor):
     _TEST = {
         'url': 'https://www.skillshare.com/classes/SEO-Today-Strategies-to-Earn-Trust-Rank-High-and-Stand-Out/423483018',
         'only_matching': True,
-        'info_dict': {
-            'id': '5463396146001',
-            'ext': 'mp4',
-            'title': 'Introduction',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
-        }
     }
 
     def _real_extract(self, url):
-        search_term = r'(?P<json_line>{"userData":{.+});\n'
-        lesson_info_api_url_format = "https://www.skillshare.com/sessions/{}/video"
-        video_api_url_format = "https://edge.api.brightcove.com/playback/v1/accounts/{}/videos/{}"
-        headers = {"Accept": "application/json;pk=BCpkADawqM2OOcM6njnM7hf9EaK6lIFlqiXB0iWjqGWU    QjU7R8965xUvIQNqdQbnDTLz0IAO7E6Ir2rIbXJtFdzrGtitoee0n1XXRliD-RH9A-svuvNW    9qgo3Bh34HEZjXjG4Nml4iyz3KqF"}
         class_id = self._match_id(url)
-        class_page = self._download_webpage(url, class_id)
-        class_json_data = json.loads(self._search_regex(search_term, class_page, 'class_json_data'))
+        class_json_data = json.loads(self._search_regex(r'(?P<json_line>{"userData":{.+});\n', self._download_webpage(url, class_id), 'class_json_data'))
         account_id = str(class_json_data.get('pageData').get('videoPlayerData').get('brightcoveAccountId'))
-        class_title = class_json_data.get('pageData').get('headerData').get('title')
         lessons = class_json_data.get('pageData').get('videoPlayerData').get('units')[0].get('sessions')
-        videos = []
+        entries = []
         for lesson in lessons:
             lesson_id = str(lesson.get('id'))
-            lesson_info_api_url = lesson_info_api_url_format.format(lesson_id)
-            lesson_info_api_response = self._download_json(lesson_info_api_url, lesson_id)
-            print(lesson_info_api_response)
+            lesson_info_api_response = self._download_json("https://www.skillshare.com/sessions/{}/video".format(lesson_id), lesson_id)
             if 'video_hashed_id' not in lesson_info_api_response:
                 break
             video_hashed_id = lesson_info_api_response.get('video_hashed_id')[3:]
-            video_api_url = video_api_url_format.format(account_id, video_hashed_id)
-            video_api_response = self._download_json(video_api_url, video_hashed_id, headers=headers)
-            lesson_title = lesson.get('title')
-            lesson_url = video_api_response.get('sources')[-1].get('src')
-            video = {
+            entry = {
+                '_type': 'url_transparent',
                 'id': video_hashed_id,
-                'title': lesson_title,
-                'url': lesson_url,
-                'ext': 'mp4',
+                'title': lesson.get('title'),
+                'ie_key': BrightcoveNewIE.ie_key(),
+                'url': 'https://players.brightcove.net/{}/default_default/index.html?videoId={}'.format(account_id, video_hashed_id),
             }
-            videos.append(video)
-        return {
-            'id': class_id,
-            'title': class_title,
-            '_type': 'playlist',
-            'entries': videos
-        }
+            entries.append(entry)
+        return self.playlist_result(entries, class_id, class_json_data.get('pageData').get('headerData').get('title'), class_json_data.get("pageData").get('sectionData').get('description'))

From 2df32f9697b45a82e92198d95088cc8e13d862b8 Mon Sep 17 00:00:00 2001
From: shaileshaanand <anaandshailu@gmail.com>
Date: Sun, 15 Mar 2020 14:53:02 +0530
Subject: [PATCH 3/5] change get() to subscripts for mandatory data

---
 youtube_dl/extractor/skillshare.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py
index 43a6113c2..39d15c862 100644
--- a/youtube_dl/extractor/skillshare.py
+++ b/youtube_dl/extractor/skillshare.py
@@ -1,6 +1,5 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import json
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
 
@@ -15,22 +14,22 @@ class SkillshareClassIE(InfoExtractor):
 
     def _real_extract(self, url):
         class_id = self._match_id(url)
-        class_json_data = json.loads(self._search_regex(r'(?P<json_line>{"userData":{.+});\n', self._download_webpage(url, class_id), 'class_json_data'))
-        account_id = str(class_json_data.get('pageData').get('videoPlayerData').get('brightcoveAccountId'))
-        lessons = class_json_data.get('pageData').get('videoPlayerData').get('units')[0].get('sessions')
+        class_json_data = self._parse_json(self._search_regex(r'(?P<json_line>{"userData":{.+});\n', self._download_webpage(url, class_id), 'class_json_data'), class_id)
+        account_id = str(class_json_data['pageData']['videoPlayerData']['brightcoveAccountId'])
+        lessons = class_json_data['pageData']['videoPlayerData']['units'][0]['sessions']
         entries = []
         for lesson in lessons:
-            lesson_id = str(lesson.get('id'))
+            lesson_id = str(lesson['id'])
             lesson_info_api_response = self._download_json("https://www.skillshare.com/sessions/{}/video".format(lesson_id), lesson_id)
             if 'video_hashed_id' not in lesson_info_api_response:
                 break
-            video_hashed_id = lesson_info_api_response.get('video_hashed_id')[3:]
+            video_hashed_id = lesson_info_api_response['video_hashed_id'][3:]
             entry = {
                 '_type': 'url_transparent',
                 'id': video_hashed_id,
-                'title': lesson.get('title'),
+                'title': lesson['title'],
                 'ie_key': BrightcoveNewIE.ie_key(),
                 'url': 'https://players.brightcove.net/{}/default_default/index.html?videoId={}'.format(account_id, video_hashed_id),
             }
             entries.append(entry)
-        return self.playlist_result(entries, class_id, class_json_data.get('pageData').get('headerData').get('title'), class_json_data.get("pageData").get('sectionData').get('description'))
+        return self.playlist_result(entries, class_id, class_json_data['pageData']['headerData']['title'], class_json_data["pageData"]['sectionData']['description'])

From ad0a7f648070875c23133493092f461113debe32 Mon Sep 17 00:00:00 2001
From: shaileshaanand <anaandshailu@gmail.com>
Date: Sun, 15 Mar 2020 15:07:04 +0530
Subject: [PATCH 4/5] use get() for optional description field

---
 youtube_dl/extractor/skillshare.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py
index 39d15c862..c787fe750 100644
--- a/youtube_dl/extractor/skillshare.py
+++ b/youtube_dl/extractor/skillshare.py
@@ -32,4 +32,4 @@ class SkillshareClassIE(InfoExtractor):
                 'url': 'https://players.brightcove.net/{}/default_default/index.html?videoId={}'.format(account_id, video_hashed_id),
             }
             entries.append(entry)
-        return self.playlist_result(entries, class_id, class_json_data['pageData']['headerData']['title'], class_json_data["pageData"]['sectionData']['description'])
+        return self.playlist_result(entries, class_id, class_json_data['pageData']['headerData']['title'], class_json_data.get("pageData").get('sectionData').get('description'))

From cd3a7c64ea63fefe345312242f3a66298d9cea1f Mon Sep 17 00:00:00 2001
From: shaileshaanand <anaandshailu@gmail.com>
Date: Mon, 16 Mar 2020 13:32:26 +0530
Subject: [PATCH 5/5] Made all requested changes

---
 youtube_dl/extractor/skillshare.py | 33 ++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py
index c787fe750..44d8680fb 100644
--- a/youtube_dl/extractor/skillshare.py
+++ b/youtube_dl/extractor/skillshare.py
@@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
+from ..utils import (
+    try_get,
+    compat_str
+)
 
 
 class SkillshareClassIE(InfoExtractor):
@@ -14,22 +18,33 @@ class SkillshareClassIE(InfoExtractor):
 
     def _real_extract(self, url):
         class_id = self._match_id(url)
-        class_json_data = self._parse_json(self._search_regex(r'(?P<json_line>{"userData":{.+});\n', self._download_webpage(url, class_id), 'class_json_data'), class_id)
-        account_id = str(class_json_data['pageData']['videoPlayerData']['brightcoveAccountId'])
+        class_json_data = self._parse_json(self._search_regex(
+            r'(?P<json_line>{.+pageData.+});\n',
+            self._download_webpage(url, class_id), 'class_json_data'), class_id)
+        account_id = class_json_data['pageData']['videoPlayerData']['brightcoveAccountId']
         lessons = class_json_data['pageData']['videoPlayerData']['units'][0]['sessions']
         entries = []
         for lesson in lessons:
-            lesson_id = str(lesson['id'])
-            lesson_info_api_response = self._download_json("https://www.skillshare.com/sessions/{}/video".format(lesson_id), lesson_id)
+            lesson_id = lesson.get('id')
+            lesson_info_api_response = self._download_json(
+                "https://www.skillshare.com/sessions/%s/video" % lesson_id,
+                lesson_id)
             if 'video_hashed_id' not in lesson_info_api_response:
                 break
-            video_hashed_id = lesson_info_api_response['video_hashed_id'][3:]
+            video_hashed_id = self._search_regex(
+                r'(\d+)', lesson_info_api_response.get('video_hashed_id'),
+                'video_hashed_id')
             entry = {
+                # the brightcove extractor extracts the title and id
                 '_type': 'url_transparent',
-                'id': video_hashed_id,
-                'title': lesson['title'],
                 'ie_key': BrightcoveNewIE.ie_key(),
-                'url': 'https://players.brightcove.net/{}/default_default/index.html?videoId={}'.format(account_id, video_hashed_id),
+                'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (account_id, video_hashed_id),
             }
             entries.append(entry)
-        return self.playlist_result(entries, class_id, class_json_data['pageData']['headerData']['title'], class_json_data.get("pageData").get('sectionData').get('description'))
+        return self.playlist_result(
+            entries, class_id, try_get(
+                class_json_data, lambda x: x['pageData']['headerData']['title'],
+                compat_str),
+            try_get(
+                class_json_data, lambda x: x['pageData']['sectionData']['description'],
+                compat_str))