From c7f7e05b382da1e845c07e5bd467820668b0a151 Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Sun, 19 Nov 2023 20:44:39 -0500 Subject: [PATCH] feat(yt-dl): automatically gather youtube cookies --- Contents/Code/__init__.py | 15 ++ Contents/Code/general_helper.py | 112 ++++++++++- Contents/Code/platform_helper.py | 46 +++++ Contents/Code/scheduled_tasks.py | 5 + Contents/Code/selenium_helper.py | 313 +++++++++++++++++++++++++++++ Contents/Code/youtube_dl_helper.py | 13 +- Contents/DefaultPrefs.json | 32 +++ requirements.txt | 1 + 8 files changed, 521 insertions(+), 16 deletions(-) create mode 100644 Contents/Code/platform_helper.py create mode 100644 Contents/Code/selenium_helper.py diff --git a/Contents/Code/__init__.py b/Contents/Code/__init__.py index 6ad2ce87..e133c158 100644 --- a/Contents/Code/__init__.py +++ b/Contents/Code/__init__.py @@ -2,7 +2,9 @@ # standard imports import inspect +import os import re +import sys # plex debugging try: @@ -40,6 +42,19 @@ except Exception as e: Log.Exception("Failed to bypass RestrictedPython: {}".format(e)) +# change priority of Contents/Libraries/Shared to be first +# this is required to import versions of libraries that we specify in our requirements.txt file +# instead of what Plex provides +try: + # sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'Libraries', 'Shared')) + import selenium + getattr(selenium, "__file__") + # todo - determine the best way to move our libraries to the top of the path + for path in sys.path: + Log.Debug("sys.path: {}".format(path)) +except Exception as e: + Log.Exception("Failed to add Contents/Libraries/Shared to sys.path: {}".format(e)) + # local imports from default_prefs import default_prefs from constants import contributes_to, version diff --git a/Contents/Code/general_helper.py b/Contents/Code/general_helper.py index 615d5fd1..0d3ac3e7 100644 --- a/Contents/Code/general_helper.py +++ b/Contents/Code/general_helper.py @@ -16,6 +16,10 @@ from plexhints.log_kit import Log # log kit from plexhints.prefs_kit import Prefs # prefs kit +# imports from Libraries\Shared +from plexapi.base import PlexPartialObject +import requests +from typing import Optional # local imports from constants import metadata_base_directory, metadata_type_map, themerr_data_directory @@ -27,7 +31,7 @@ def get_media_upload_path(item, media_type): - # type: (any, str) -> str + # type: (PlexPartialObject, str) -> str """ Get the path to the theme upload directory. @@ -35,7 +39,7 @@ def get_media_upload_path(item, media_type): Parameters ---------- - item : any + item : PlexPartialObject The item to get the theme upload path for. media_type : str The media type to get the theme upload path for. Must be one of 'art', 'posters', or 'themes'. @@ -75,7 +79,7 @@ def get_media_upload_path(item, media_type): def get_themerr_json_path(item): - # type: (any) -> str + # type: (PlexPartialObject) -> str """ Get the path to the Themerr data file. @@ -83,7 +87,7 @@ def get_themerr_json_path(item): Parameters ---------- - item : any + item : PlexPartialObject The item to get the Themerr data file path for. Returns @@ -102,7 +106,7 @@ def get_themerr_json_path(item): def get_themerr_json_data(item): - # type: (any) -> dict + # type: (PlexPartialObject) -> dict """ Get the Themerr data for the specified item. @@ -111,7 +115,7 @@ def get_themerr_json_data(item): Parameters ---------- - item : any + item : PlexPartialObject The item to get the Themerr data for. Returns @@ -153,8 +157,94 @@ def get_themerr_settings_hash(): return settings_hash +def get_user_country_code(ip_address=None): + # type: (Optional[str]) -> str + """ + Get the country code for the user with the given IP address. + + Parameters + ---------- + ip_address : Optional[str] + The IP address of the user. + + Returns + ------- + str + The `ALPHA-2 `__ country code for the user with the given IP address. + + Examples + -------- + >>> get_user_country_code() + 'US' + """ + api_url = 'https://ipinfo.io/json' + if ip_address: + api_url = 'https://ipinfo.io/{}/json'.format(ip_address) + + try: + response = requests.get(api_url) + data = response.json() + return data.get('country').encode('utf-8') + except requests.RequestException as e: + Log.Error("Could not determine user country: {}".format(e)) + + +def is_user_in_eu(ip_address=None): + # type: (Optional[str]) -> bool + """ + Check if the user with the given IP address is in the European Union. + + Parameters + ---------- + ip_address : Optional[str] + The IP address of the user. + + Returns + ------- + bool + True if the user with the given IP address is in the European Union, False otherwise. + + Examples + -------- + >>> is_user_in_eu() + False + """ + eu_countries = [ + 'AT', # Austria + 'BE', # Belgium + 'BG', # Bulgaria + 'CY', # Cyprus + 'CZ', # Czech Republic + 'DE', # Germany + 'DK', # Denmark + 'EE', # Estonia + 'ES', # Spain + 'FI', # Finland + 'FR', # France + 'GR', # Greece + 'HR', # Croatia + 'HU', # Hungary + 'IE', # Ireland + 'IT', # Italy + 'LT', # Lithuania + 'LU', # Luxembourg + 'LV', # Latvia + 'MT', # Malta + 'NL', # Netherlands + 'PL', # Poland + 'PT', # Portugal + 'RO', # Romania + 'SE', # Sweden + 'SI', # Slovenia + 'SK', # Slovakia + ] + + country_code = get_user_country_code(ip_address=ip_address) + return country_code in eu_countries + + def remove_uploaded_media(item, media_type): - # type: (any, str) -> None + # type: (PlexPartialObject, str) -> None """ Remove themes for the specified item. @@ -162,7 +252,7 @@ def remove_uploaded_media(item, media_type): Parameters ---------- - item : any + item : PlexPartialObject The item to remove the themes from. media_type : str The media type to remove the themes from. Must be one of 'art', 'posters', or 'themes'. @@ -193,7 +283,7 @@ def remove_uploaded_media_error_handler(func, path, exc_info): ---------- func : any The function that caused the error. - path : str + path : any The path that caused the error. exc_info : any The exception information. @@ -202,7 +292,7 @@ def remove_uploaded_media_error_handler(func, path, exc_info): def update_themerr_data_file(item, new_themerr_data): - # type: (any, dict) -> None + # type: (PlexPartialObject, dict) -> None """ Update the Themerr data file for the specified item. @@ -210,7 +300,7 @@ def update_themerr_data_file(item, new_themerr_data): Parameters ---------- - item : any + item : PlexPartialObject The item to update the Themerr data file for. new_themerr_data : dict The Themerr data to update the Themerr data file with. diff --git a/Contents/Code/platform_helper.py b/Contents/Code/platform_helper.py new file mode 100644 index 00000000..ee27959c --- /dev/null +++ b/Contents/Code/platform_helper.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +# standard imports +import os +import platform + +# plex debugging +try: + import plexhints # noqa: F401 +except ImportError: + pass +else: # the code is running outside of Plex + from plexhints.log_kit import Log # log kit + + +def get_os_architecture(): + # Getting architecture using platform module + machine = platform.machine() + + # For more detailed check, especially for Windows OS + if os.name == 'nt': + # Possible values: '32bit', '64bit' + # This will tell us if the OS is 64-bit or 32-bit + architecture = platform.architecture() + + if architecture[0] == '64bit': + return 'x86_64' + elif architecture[0] == '32bit': + return 'x86' + else: + return 'Unknown architecture' + else: + # For Unix/Linux systems, we can rely more on platform.machine() + if machine in ['x86_64', 'AMD64']: + return 'x86_64' + elif machine in ['i386', 'i686', 'x86']: + return 'x86' + elif machine in ['aarch64', 'arm64']: + return 'aarch64' + else: + return 'Unknown architecture' + + +# constants +architecture = get_os_architecture() +os_system = platform.system().lower() diff --git a/Contents/Code/scheduled_tasks.py b/Contents/Code/scheduled_tasks.py index 9a1f66f4..eda2ad19 100644 --- a/Contents/Code/scheduled_tasks.py +++ b/Contents/Code/scheduled_tasks.py @@ -21,6 +21,7 @@ # local imports from constants import plugin_identifier from plex_api_helper import scheduled_update +from selenium_helper import install_driver from webapp import cache_data # setup logging for schedule @@ -109,6 +110,10 @@ def setup_scheduling(): plex_api_helper.scheduled_update : Scheduled function to update the themes. """ if Prefs['bool_auto_update_items']: + schedule.every(max(1, int(Prefs['int_update_browser_driver_interval']))).hours.do( + job_func=run_threaded, + target=install_driver + ) schedule.every(max(15, int(Prefs['int_update_themes_interval']))).minutes.do( job_func=run_threaded, target=scheduled_update diff --git a/Contents/Code/selenium_helper.py b/Contents/Code/selenium_helper.py new file mode 100644 index 00000000..0a255928 --- /dev/null +++ b/Contents/Code/selenium_helper.py @@ -0,0 +1,313 @@ +# -*- coding: utf-8 -*- + +# standard imports +import json +import os +import tarfile +import time +from threading import Lock +import zipfile + +# plex debugging +try: + import plexhints # noqa: F401 +except ImportError: + pass +else: # the code is running outside of Plex + from plexhints.constant_kit import CACHE_1DAY # constant kit + from plexhints.core_kit import Core # core kit + from plexhints.parse_kit import JSON # parse kit + from plexhints.prefs_kit import Prefs # prefs kit + from plexhints.log_kit import Log # log kit + +# imports from Libraries\Shared +import requests +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +from typing import Optional + +# local imports +from constants import plugin_support_data_directory +from general_helper import is_user_in_eu +import platform_helper + +driver_path = os.path.join(plugin_support_data_directory, 'selenium-drivers') +driver_versions_json_path = os.path.join(driver_path, 'installed_versions.json') +temp_path = os.path.join(plugin_support_data_directory, 'temp') + +driver_versions_lock = Lock() + +# variables +yt_cookies = dict() +yt_cookies_last_updated = 0 + + +# get youtube cookies using selenium +def get_yt_cookies(): + # type: () -> Optional[dict] + """ + Get YouTube cookies. + + Get the YouTube cookies using Selenium. + + Returns + ------- + Optional[str] + The YouTube cookies, or None if the cookies could not be retrieved. + + Examples + -------- + >>> get_yt_cookies() + ... + """ + if not Prefs['bool_youtube_cookies']: + Log.Debug('Using YouTube cookies is disabled in the plugin settings.') + return None + + if Prefs['enum_browser_driver'] == 'None': + Log.Warning('"Browser (driver) for web automations" is not set, please select a browser in the plugin settings.') + return None + + global yt_cookies, yt_cookies_last_updated + + if time.time() - yt_cookies_last_updated < CACHE_1DAY: + return yt_cookies + + driver = None + driver_file = None + options = None + if Prefs['enum_browser_driver'] == 'Chrome': + # setup chrome options + options = webdriver.ChromeOptions() + # https://github.com/GoogleChrome/chrome-launcher/blob/main/docs/chrome-flags-for-tools.md + options.add_argument('--enable-automation') + options.add_argument('--headless') + options.add_argument('--disable-gpu') + if Prefs['bool_youtube_consent']: + options.add_argument('--incognito') + + # setup chrome driver + driver_file = os.path.join(plugin_support_data_directory, 'selenium-drivers', 'chromedriver') + elif Prefs['enum_browser_driver'] == 'Firefox': + # setup firefox options + options = webdriver.FirefoxOptions() + # https://wiki.mozilla.org/Firefox/CommandLineOptions + options.add_argument("-headless") + if Prefs['bool_youtube_consent']: + options.add_argument('-private') + + # setup firefox driver + driver_file = os.path.join(plugin_support_data_directory, 'selenium-drivers', 'geckodriver') + + if not os.path.isfile(driver_file): + Log.Error('Failed to find driver at: {}'.format(driver_file)) + return None + + if Prefs['enum_browser_driver'] == 'Chrome': + driver = webdriver.Chrome(executable_path=driver_file, options=options) + elif Prefs['enum_browser_driver'] == 'Firefox': + driver = webdriver.Firefox(executable_path=driver_file, options=options) + + # get the cookies + try: + driver.get('https://www.youtube.com') + if Prefs['bool_youtube_consent'] and is_user_in_eu(): + consent_button_xpath = ('/html/body/ytd-app/ytd-consent-bump-v2-lightbox/tp-yt-paper-dialog/div[4]/div[2]/' + 'div[6]/div[1]/ytd-button-renderer[2]/yt-button-shape/button/div/span') + WebDriverWait(driver=driver, timeout=10).until( + EC.presence_of_element_located((By.XPATH, consent_button_xpath)) + ) + + # click the consent button + consent_button = driver.find_element_by_xpath(consent_button_xpath) + consent_button.click() + time.sleep(10) + # todo - this is a set of dictionaries (probably not the correct format) + yt_cookies = driver.get_cookies() + yt_cookies_last_updated = time.time() + except Exception as e: + Log.Exception('Failed to get YouTube cookies: {}'.format(e)) + return None + finally: + driver.quit() + + return yt_cookies + + +def install_chromedriver(): + # type: () -> None + """Install chromedriver.""" + driver = 'chromedriver' + Log.Info('Installing {}'.format(driver)) + + # get the chromedriver version + release_data = JSON.ObjectFromURL( + url='https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json', + errors='ignore' + ) + version = release_data['channels']['stable']['version'] + + if version == get_installed_version(driver=driver): + Log.Debug('{} {} is already installed'.format(driver, version)) + return + + architectures = dict( + darwin=dict( + aarch64='mac-arm64', + x86_64='mac-x64', + ), + linux=dict( + x86_64='linux64', + ), + windows=dict( + x86='win32', + x86_64='win64', + ), + ) + + release_platform = architectures[platform_helper.os_system][platform_helper.architecture] + + # download the release + Log.Info('Downloading {} {}'.format(driver, version)) + for asset in release_data['channels']['stable']['downloads'][driver]: + if asset['platform'] == release_platform: + Log.Info('Downloading {}'.format(asset['name'])) + download_path = os.path.join(temp_path, asset['name']) + with requests.get(asset['url'], stream=True) as r: + f = open(download_path, 'wb') + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + f.close() + + # extract the release + Log.Info('Extracting chromedriver {}'.format(version)) + with zipfile.ZipFile(download_path, 'r') as zip_ref: + zip_ref.extractall(driver_path) + + # delete the download + os.remove(download_path) + + # make the driver executable + if platform_helper.os_system != 'windows': + os.chmod(os.path.join(driver_path, driver), 0o755) + + # update the installed versions json + update_version_file(driver=driver, version=version) + + break + + +def install_geckodriver(): + # type: () -> None + """Install geckodriver.""" + driver = 'geckodriver' + Log.Info('Installing {}'.format(driver)) + + # get the geckodriver version + release_data = JSON.ObjectFromURL( + url='https://api.github.com/repos/mozilla/geckodriver/releases/latest', + errors='ignore' + ) + version = release_data['tag_name'] + + if version == get_installed_version(driver=driver): + Log.Debug('{} {} is already installed'.format(driver, version)) + return + + architectures = dict( + darwin=dict( + aarch64='{}-{}-macos-aarch64.tar.gz'.format(driver, version), + x86_64='{}-{}-macos.tar.gz'.format(driver, version), + ), + linux=dict( + aarch64='{}-{}-Linux-aarch64.tar.gz'.format(driver, version), + x86='{}-{}-Linux32.tar.gz'.format(driver, version), + x86_64='{}-{}-Linux64.tar.gz'.format(driver, version), + ), + windows=dict( + x86='{}-{}-win32.zip'.format(driver, version), + x86_64='{}-{}-win64.zip'.format(driver, version), + ), + ) + + release_file_name = architectures[platform_helper.os_system][platform_helper.architecture] + + # download the release + Log.Info('Downloading {} {}'.format(driver, version)) + for asset in release_data['assets']: + if asset['name'] == release_file_name: + Log.Info('Downloading {}'.format(asset['name'])) + download_path = os.path.join(temp_path, asset['name']) + with requests.get(asset['browser_download_url'], stream=True) as r: + f = open(download_path, 'wb') + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + f.close() + + # extract the release + Log.Info('Extracting geckodriver {}'.format(version)) + if platform_helper.os_system == 'windows': + with zipfile.ZipFile(download_path, 'r') as zip_ref: + zip_ref.extractall(driver_path) + else: + with tarfile.open(download_path, 'r:gz') as tar_ref: + tar_ref.extractall(driver_path) + + # delete the download + os.remove(download_path) + + # make the driver executable + if platform_helper.os_system != 'windows': + os.chmod(os.path.join(driver_path, driver), 0o755) + + # update the installed versions json + update_version_file(driver=driver, version=version) + + break + + +def get_installed_version(driver): + # type: (str) -> Optional[str] + """ + Get the installed version of the specified driver. + """ + with driver_versions_lock: + if os.path.isfile(driver_versions_json_path): + data = json.loads(s=str(Core.storage.load(filename=driver_versions_json_path, binary=False))) + else: + data = dict() + return data.get(driver) + + +def update_version_file(driver, version): + # type: (str, str) -> None + """ + Update the installed versions json file. + """ + with driver_versions_lock: + if os.path.isfile(driver_versions_json_path): + data = json.loads(s=str(Core.storage.load(filename=driver_versions_json_path, binary=False))) + else: + data = dict() + data[driver] = version + Core.storage.save(filename=driver_versions_json_path, data=json.dumps(data), binary=False) + + +def install_driver(): + # type: () -> None + """ + Install the driver. + """ + try: + browser_map[Prefs['enum_browser_driver']]() + except KeyError: + Log.Warning( + '"Browser (driver) for web automations" is not set, please select a browser in the plugin settings.') + + +browser_map = dict( + Chrome=install_chromedriver, + Firefox=install_geckodriver, +) diff --git a/Contents/Code/youtube_dl_helper.py b/Contents/Code/youtube_dl_helper.py index f493b626..f7e7d4a1 100644 --- a/Contents/Code/youtube_dl_helper.py +++ b/Contents/Code/youtube_dl_helper.py @@ -16,10 +16,13 @@ from plexhints.prefs_kit import Prefs # prefs kit # imports from Libraries\Shared -from constants import plugin_identifier, plugin_support_data_directory from typing import Optional import youtube_dl +# local imports +from constants import plugin_identifier, plugin_support_data_directory +from selenium_helper import get_yt_cookies + # get the plugin logger plugin_logger = logging.getLogger(plugin_identifier) @@ -76,15 +79,15 @@ def process_youtube(url): youtube_include_dash_manifest=False, ) - if Prefs['str_youtube_cookies']: + if Prefs['bool_youtube_cookies']: try: - cookies = json.loads(Prefs['str_youtube_cookies']) + cookies = get_yt_cookies() for cookie in cookies: - include_subdom = cookie['domain'].startswith('.') + include_subdomain = cookie['domain'].startswith('.') expiry = int(cookie.get('expiry', 0)) values = [ cookie['domain'], - nsbool(include_subdom), + nsbool(include_subdomain), cookie['path'], nsbool(cookie['secure']), str(expiry), diff --git a/Contents/DefaultPrefs.json b/Contents/DefaultPrefs.json index 48bc2498..5fbf88dc 100644 --- a/Contents/DefaultPrefs.json +++ b/Contents/DefaultPrefs.json @@ -104,6 +104,38 @@ "default": "3", "secure": "false" }, + { + "id": "enum_browser_driver", + "type": "enum", + "label": "Browser (driver) for web automations (e.g. collecting YouTube cookies)", + "default": "None", + "values": [ + "None", + "Chrome", + "Firefox" + ] + }, + { + "id": "int_update_browser_driver_interval", + "type": "text", + "label": "Interval to check for browser driver updates, in hours (min: 1)", + "default": "24", + "secure": "false" + }, + { + "id": "bool_youtube_consent", + "type": "bool", + "label": "Agree to YouTube's Terms of Service", + "default": "False", + "secure": "false" + }, + { + "id": "bool_youtube_cookies", + "type": "bool", + "label": "Use cookies from YouTube.com", + "default": "False", + "secure": "false" + }, { "id": "str_youtube_user", "type": "text", diff --git a/requirements.txt b/requirements.txt index e33a65fd..29d7e8db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ plexapi-backport[alert]==4.15.6 # custom python-plexapi supporting python 2.7 polib==1.2.0;python_version<"3" requests==2.27.1;python_version<"3" # 2.27 is last version supporting Python 2.7 schedule==0.6.0;python_version<"3" +selenium==3.141.0;python_version<"3" typing==3.10.0.0 werkzeug==1.0.1;python_version<"3"