From b1e4e389f352211593e103d597802409ba4c9763 Mon Sep 17 00:00:00 2001
From: frkhit <frkhit@gmail.com>
Date: Sun, 14 Oct 2018 16:56:50 +0800
Subject: [PATCH 1/2] [cctv] add support for cctv.com to download all videos of
 one channel

---
 youtube_dl/extractor/cctv.py       | 54 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  2 +-
 2 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py
index c76f361c6..434e9d896 100644
--- a/youtube_dl/extractor/cctv.py
+++ b/youtube_dl/extractor/cctv.py
@@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
@@ -189,3 +190,56 @@ class CCTVIE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+
+class CCTVChannelIE(InfoExtractor):
+    IE_DESC = '央视网 栏目'
+    _VALID_URL = r'http://tv.cctv.com/lm/(?P<id>[0-9A-Za-z-_]+)/?'
+    _TESTS = [{
+        'url': 'http://tv.cctv.com/lm/d10fys/',
+        'only_matching': True,
+    }]
+
+    def _entries(self, page, playlist_id):
+        re_req_item_id = re.compile(r'setItemByid[a-zA-Z0-9]+')
+        re_req_id_tmp = re.compile(r'videolistByColumnId\?id=[a-zA-Z0-9]+(?=&)')
+        re_req_id = re.compile(r'(?<=id=)[a-zA-Z0-9]+')
+        count_per_page = 100
+
+        req_item_id = re_req_item_id.findall(page)[0]
+        req_id = re_req_id.findall(re_req_id_tmp.findall(page)[0])[0]
+
+        page = 0
+        while True:
+            page += 1
+
+            url_template = "http://api.cntv.cn/lanmu/videolistByColumnId" + \
+                           "?id={}&serviceId=tvcctv&type=0&n={}&t=jsonp&cb={}&p=".format(
+                               req_id, count_per_page, req_item_id)
+            content = self._download_webpage(url_template + str(page), playlist_id)
+            if not content:
+                break
+
+            content = content.rstrip()
+            req_item_id = re_req_item_id.findall(content)[0]
+
+            video_list = json.loads(content[(len(req_item_id) + 1):-2])["response"]["docs"]
+
+            for content_dict in video_list:
+                video_id, video_title, video_url = \
+                    content_dict["videoId"], content_dict["videoTitle"], content_dict["videoUrl"]
+                yield self.url_result(video_url, ie="CCTV", video_id=video_id, video_title=video_title)
+
+            if len(video_list) < count_per_page:
+                break
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+
+        channel_page = self._download_webpage(
+            url, channel_id,
+            'Downloading channel page', fatal=False)
+        if channel_page is False:
+            raise Exception('CCTV said: Cannot connect to {}'.format(url), expected=True)
+
+        return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 464c8d690..10c0a3407 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -183,7 +183,7 @@ from .cbsnews import (
 from .cbssports import CBSSportsIE
 from .ccc import CCCIE
 from .ccma import CCMAIE
-from .cctv import CCTVIE
+from .cctv import CCTVIE, CCTVChannelIE
 from .cda import CDAIE
 from .ceskatelevize import (
     CeskaTelevizeIE,

From 2e6a1babfaefc4493ebcfb6b59f8400a9b41f568 Mon Sep 17 00:00:00 2001
From: frkhit <frkhit@gmail.com>
Date: Sun, 14 Oct 2018 18:08:32 +0800
Subject: [PATCH 2/2] [YoutubeDL:Playlist DownloadLog]: Using file_cache to
 record the progress of playlist-download-task. Download each videos in order;
 then re-download the broken videos.

---
 youtube_dl/YoutubeDL.py | 148 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 143 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 38ba43a97..a3847b893 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -9,18 +9,21 @@ import copy
 import datetime
 import errno
 import fileinput
+import hashlib
 import io
 import itertools
 import json
 import locale
 import operator
 import os
+import pickle
 import platform
 import re
 import shutil
 import subprocess
 import socket
 import sys
+import tempfile
 import time
 import tokenize
 import traceback
@@ -109,6 +112,119 @@ from .version import __version__
 if compat_os_name == 'nt':
     import ctypes
 
+youtube_dl_url_key = "youtube_dl_url_key"
+
+
+class PlaylistTaskLog(object):
+    def __init__(self, task_id, tmp_path=None, max_retry_count=5, to_screen=None):
+        self.to_screen = to_screen or print
+        self._task_id = task_id
+        self._log = {}
+        self._tmp_path = tmp_path or tempfile.gettempdir()
+        self._log_file = os.path.join(self._tmp_path, "{}.pkl".format(self._task_id))
+        self._max_retry_count = max_retry_count
+        self._pkl_exists_before = False
+        # init log
+        self._init_log()
+
+    def _init_log(self):
+        if not os.path.exists(self._log_file):
+            with open(self._log_file, "wb"):
+                pass
+            self._pkl_exists_before = False
+            return
+
+        with open(self._log_file, "rb") as f:
+            self._log = pickle.load(f)
+            self._pkl_exists_before = True
+            return
+
+    def _update_config(self):
+        if not self._log:
+            if os.path.exists(self._log_file):
+                os.remove(self._log_file)
+            return
+
+        with open(self._log_file, "wb") as f:
+            pickle.dump(self._log, f)
+
+    @classmethod
+    def create_id_by_ie_result(cls, ie_result):
+
+        def byteify(object_input):
+            if isinstance(object_input, dict):
+                str_dict = {byteify(key): byteify(value) for key, value in object_input.items()}
+                return ";;;".join(["{}:{}".format(key, str_dict[key]) for key in sorted(str_dict.keys())])
+            elif isinstance(object_input, (list, tuple)):
+                return ";;;".join([byteify(element) for element in sorted(object_input)])
+            else:
+                return "{}".format(object_input)
+
+        try:
+            if youtube_dl_url_key in ie_result:
+                try:
+                    return hashlib.md5(ie_result[youtube_dl_url_key]).hexdigest()
+                except Exception:
+                    return hashlib.md5(ie_result[youtube_dl_url_key].encode("utf-8")).hexdigest()
+        except Exception:
+            pass
+
+        try:
+            return hashlib.md5(json.dumps(ie_result, sort_keys=True)).hexdigest()
+        except TypeError:
+            return hashlib.md5(byteify(ie_result).encode("utf-8")).hexdigest()
+
+    def __iter__(self):
+        # python 2
+        return self
+
+    def __next__(self):
+        # Python 3
+        def _pop_task_info():
+            if not self._log:
+                return None
+
+            sorted_list = sorted(self._log.keys(), key=lambda x: self._log[x]["count"])
+
+            return self._log[sorted_list[0]]
+
+        while True:
+            task_info = _pop_task_info()
+
+            if task_info is None:
+                raise StopIteration
+
+            return task_info["task"]
+
+    next = __next__  # python 2
+
+    def commit(self, video_id, success=True):
+        if video_id not in self._log:
+            self.to_screen("WARNING: {} not in playlist log!".format(video_id))
+            return
+
+        if success is True:
+            self._log.pop(video_id)
+        else:
+            self._log[video_id]["count"] += 1
+            if self._log[video_id]["count"] > self._max_retry_count:
+                self._log.pop(video_id)
+
+        self._update_config()
+        # self.to_screen("DEBUG: PlaylistTaskLog[{} left] is {}".format(len(self._log), self._log))
+        self.to_screen("DEBUG: left {} videos".format(len(self._log)))
+
+    def add_task(self, number, n_entries, entry, extra):
+        video_id = "{}".format(number)
+
+        if not self._pkl_exists_before and video_id not in self._log:
+            self._log[video_id] = {
+                "count": 0,
+                "task": {"vid": video_id, "number": number, "n_entries": n_entries, "entry": entry, "extra": extra}
+            }
+
+            self._update_config()
+
 
 class YoutubeDL(object):
     """YoutubeDL class.
@@ -800,6 +916,12 @@ class YoutubeDL(object):
                     }
                 self.add_default_extra_info(ie_result, ie, url)
                 if process:
+                    try:
+                        if youtube_dl_url_key not in ie_result:
+                            ie_result[youtube_dl_url_key] = url
+                    except Exception:
+                        pass
+
                     return self.process_ie_result(ie_result, download, extra_info)
                 else:
                     return ie_result
@@ -972,8 +1094,10 @@ class YoutubeDL(object):
 
             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 
+            _download_log = PlaylistTaskLog(task_id=PlaylistTaskLog.create_id_by_ie_result(ie_result),
+                                            to_screen=self.to_screen)
+
             for i, entry in enumerate(entries, 1):
-                self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
                 # This __x_forwarded_for_ip thing is a bit ugly but requires
                 # minimal changes
                 if x_forwarded_for:
@@ -997,10 +1121,24 @@ class YoutubeDL(object):
                     self.to_screen('[download] ' + reason)
                     continue
 
-                entry_result = self.process_ie_result(entry,
-                                                      download=download,
-                                                      extra_info=extra)
-                playlist_results.append(entry_result)
+                _download_log.add_task(number=i, n_entries=len(entries), entry=entry, extra=extra)
+
+            # try to download all videos
+            for task in _download_log:
+                self.to_screen('[download] Downloading No.{} in {} videos'.format(task["number"], task["n_entries"]))
+
+                try:
+                    entry_result = self.process_ie_result(task["entry"],
+                                                          download=download,
+                                                          extra_info=task["extra"])
+                except Exception as e:
+                    self.to_stderr("ERROR: {}".format(e))
+                    _download_log.commit(video_id=task["vid"], success=False)
+                else:
+                    playlist_results.append(entry_result)
+
+                    _download_log.commit(video_id=task["vid"], success=True)
+
             ie_result['entries'] = playlist_results
             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
             return ie_result