mirror of
https://github.com/peter-tanner/Blackboard-marks.git
synced 2024-11-30 11:40:16 +08:00
update to match new pdf viewer which uses shadow root instead of nested
iframe.
This commit is contained in:
parent
c97582bef2
commit
357b196613
|
@ -1,5 +1,7 @@
|
|||
## Blackboard marks downloader (UWA)
|
||||
|
||||
NOTE: _Who gives a shit about marks? I don't think I am as much of a tryhard as when I first made this script. Either way I'm still patching this when the crappy code breaks at the end of each semester..._
|
||||
|
||||
---
|
||||
|
||||
**Dependencies**:
|
||||
|
|
|
@ -6,6 +6,6 @@ BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
|
|||
DL_DIR = os.getcwd() + os.path.sep + "tmp" + os.path.sep
|
||||
Path(DL_DIR).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
SAVE_DIR = "grades"
|
||||
SAVE_DIR = "grades_2024-07-23_B"
|
||||
|
||||
URL_LIST = SAVE_DIR + os.path.sep + "URLS.txt"
|
||||
|
|
34
main.py
34
main.py
|
@ -1,5 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from typing import cast
|
||||
import requests
|
||||
|
@ -8,6 +9,7 @@ from selenium.webdriver.support import expected_conditions as EC
|
|||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.common.exceptions import ElementClickInterceptedException
|
||||
# For chrome stuff
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
@ -97,15 +99,26 @@ def scrape_further(driver: WebDriver, path, session):
|
|||
# cant be arsed to figure out how the pspdfkit js that executes this download works.
|
||||
SwitchToIFrame(
|
||||
driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
|
||||
SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
|
||||
# New version does not have nested iframe and uses a shadowroot instead...
|
||||
# SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
|
||||
get_feedback = True
|
||||
except Exception:
|
||||
print("No feedback to download")
|
||||
except: pass
|
||||
|
||||
if get_feedback:
|
||||
dl_button = WaitClickable(
|
||||
driver, (By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
|
||||
dl_button.click()
|
||||
# dl_button = WaitClickable(driver, (By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
|
||||
# New version does not have nested iframe and uses a shadowroot instead...
|
||||
# Loop since it takes a while for the iframe to load...
|
||||
while True:
|
||||
try:
|
||||
dl_button = driver.execute_script("return arguments[0].shadowRoot.querySelector(\"button[title='Download']\")", driver.find_element(By.XPATH, "//div[@class='PSPDFKit-Container']"))
|
||||
dl_button.click()
|
||||
break
|
||||
except:
|
||||
time.sleep(1)
|
||||
download_file(path)
|
||||
print("[INFO]: Downloaded feedback")
|
||||
else:
|
||||
print("\x1b[1;31m[WARNING]\x1b\x1b[0m: No feedback to download")
|
||||
request_stack.download_all()
|
||||
# end of scrape_further
|
||||
|
||||
|
@ -137,7 +150,7 @@ OPTIONS.add_argument('--disable-dev-shm-usage')
|
|||
OPTIONS.add_experimental_option("prefs", prefs)
|
||||
# OPTIONS.add_argument("--headless")
|
||||
driver = webdriver.Chrome(
|
||||
executable_path='chromedriver.exe',
|
||||
executable_path='chromedriver',
|
||||
desired_capabilities=CAPABILITIES,
|
||||
options=OPTIONS
|
||||
)
|
||||
|
@ -189,8 +202,11 @@ for i, course in enumerate(course_details):
|
|||
}
|
||||
""")
|
||||
|
||||
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
||||
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
||||
try:
|
||||
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
||||
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
||||
except ElementClickInterceptedException: # already clicked on All category - do not do anything
|
||||
pass
|
||||
|
||||
table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
|
||||
|
||||
|
|
|
@ -66,15 +66,15 @@ class Asset:
|
|||
|
||||
def write_metadata(self, headers):
|
||||
metacsv = [
|
||||
["original_filename", self.original_filename],
|
||||
["readable_filename", self.filename],
|
||||
["url", self.url],
|
||||
["original_filename", self.original_filename or "error"],
|
||||
["readable_filename", self.filename or "error"],
|
||||
["url", self.url or "error"],
|
||||
["pathhash", hashlib.md5(
|
||||
self.url.encode()).hexdigest()],
|
||||
["etag", headers['ETag']],
|
||||
["etaghash", self.etag_hash],
|
||||
["last-modified", headers["Last-Modified"]],
|
||||
["content-length", headers["Content-Length"]],
|
||||
self.url.encode()).hexdigest() or "error"],
|
||||
["etag", headers['ETag'] or "error"],
|
||||
["etaghash", self.etag_hash or "error"],
|
||||
["last-modified", headers["Last-Modified"] or "error"],
|
||||
["content-length", headers["Content-Length"] or "error"],
|
||||
["age", ""],
|
||||
]
|
||||
csvpath = self.path.joinpath("ZZZ_metadata")
|
||||
|
|
|
@ -49,7 +49,7 @@ def save_html(dir, filename, driver: WebDriver, page_log_file=False):
|
|||
|
||||
def download_file(dest):
|
||||
d = Path(DL_DIR)
|
||||
time.sleep(2)
|
||||
time.sleep(10) # sorry for blocking!
|
||||
downloading = True
|
||||
poll = 1.0
|
||||
while downloading:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
timeout = 5
|
||||
timeout = 10
|
||||
|
||||
|
||||
def WaitClickable(driver, locator):
|
||||
|
|
Loading…
Reference in New Issue
Block a user