mirror of
https://github.com/peter-tanner/Blackboard-marks.git
synced 2024-11-30 11:40:16 +08:00
update to match new pdf viewer which uses shadow root instead of nested
iframe.
This commit is contained in:
parent
c97582bef2
commit
357b196613
|
@ -1,5 +1,7 @@
|
||||||
## Blackboard marks downloader (UWA)
|
## Blackboard marks downloader (UWA)
|
||||||
|
|
||||||
|
NOTE: _Who gives a shit about marks? I don't think I am as much of a tryhard as when I first made this script. Either way I'm still patching this when the crappy code breaks at the end of each semester..._
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**Dependencies**:
|
**Dependencies**:
|
||||||
|
|
|
@ -6,6 +6,6 @@ BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
|
||||||
DL_DIR = os.getcwd() + os.path.sep + "tmp" + os.path.sep
|
DL_DIR = os.getcwd() + os.path.sep + "tmp" + os.path.sep
|
||||||
Path(DL_DIR).mkdir(parents=True, exist_ok=True)
|
Path(DL_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
SAVE_DIR = "grades"
|
SAVE_DIR = "grades_2024-07-23_B"
|
||||||
|
|
||||||
URL_LIST = SAVE_DIR + os.path.sep + "URLS.txt"
|
URL_LIST = SAVE_DIR + os.path.sep + "URLS.txt"
|
||||||
|
|
28
main.py
28
main.py
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import time
|
||||||
from selenium.webdriver.remote.webdriver import WebDriver
|
from selenium.webdriver.remote.webdriver import WebDriver
|
||||||
from typing import cast
|
from typing import cast
|
||||||
import requests
|
import requests
|
||||||
|
@ -8,6 +9,7 @@ from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
|
from selenium.common.exceptions import ElementClickInterceptedException
|
||||||
# For chrome stuff
|
# For chrome stuff
|
||||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
@ -97,15 +99,26 @@ def scrape_further(driver: WebDriver, path, session):
|
||||||
# cant be arsed to figure out how the pspdfkit js that executes this download works.
|
# cant be arsed to figure out how the pspdfkit js that executes this download works.
|
||||||
SwitchToIFrame(
|
SwitchToIFrame(
|
||||||
driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
|
driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
|
||||||
SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
|
# New version does not have nested iframe and uses a shadowroot instead...
|
||||||
|
# SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
|
||||||
get_feedback = True
|
get_feedback = True
|
||||||
except Exception:
|
except: pass
|
||||||
print("No feedback to download")
|
|
||||||
if get_feedback:
|
if get_feedback:
|
||||||
dl_button = WaitClickable(
|
# dl_button = WaitClickable(driver, (By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
|
||||||
driver, (By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
|
# New version does not have nested iframe and uses a shadowroot instead...
|
||||||
|
# Loop since it takes a while for the iframe to load...
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
dl_button = driver.execute_script("return arguments[0].shadowRoot.querySelector(\"button[title='Download']\")", driver.find_element(By.XPATH, "//div[@class='PSPDFKit-Container']"))
|
||||||
dl_button.click()
|
dl_button.click()
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
time.sleep(1)
|
||||||
download_file(path)
|
download_file(path)
|
||||||
|
print("[INFO]: Downloaded feedback")
|
||||||
|
else:
|
||||||
|
print("\x1b[1;31m[WARNING]\x1b\x1b[0m: No feedback to download")
|
||||||
request_stack.download_all()
|
request_stack.download_all()
|
||||||
# end of scrape_further
|
# end of scrape_further
|
||||||
|
|
||||||
|
@ -137,7 +150,7 @@ OPTIONS.add_argument('--disable-dev-shm-usage')
|
||||||
OPTIONS.add_experimental_option("prefs", prefs)
|
OPTIONS.add_experimental_option("prefs", prefs)
|
||||||
# OPTIONS.add_argument("--headless")
|
# OPTIONS.add_argument("--headless")
|
||||||
driver = webdriver.Chrome(
|
driver = webdriver.Chrome(
|
||||||
executable_path='chromedriver.exe',
|
executable_path='chromedriver',
|
||||||
desired_capabilities=CAPABILITIES,
|
desired_capabilities=CAPABILITIES,
|
||||||
options=OPTIONS
|
options=OPTIONS
|
||||||
)
|
)
|
||||||
|
@ -189,8 +202,11 @@ for i, course in enumerate(course_details):
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
try:
|
||||||
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
||||||
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
|
||||||
|
except ElementClickInterceptedException: # already clicked on All category - do not do anything
|
||||||
|
pass
|
||||||
|
|
||||||
table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
|
table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
|
||||||
|
|
||||||
|
|
|
@ -66,15 +66,15 @@ class Asset:
|
||||||
|
|
||||||
def write_metadata(self, headers):
|
def write_metadata(self, headers):
|
||||||
metacsv = [
|
metacsv = [
|
||||||
["original_filename", self.original_filename],
|
["original_filename", self.original_filename or "error"],
|
||||||
["readable_filename", self.filename],
|
["readable_filename", self.filename or "error"],
|
||||||
["url", self.url],
|
["url", self.url or "error"],
|
||||||
["pathhash", hashlib.md5(
|
["pathhash", hashlib.md5(
|
||||||
self.url.encode()).hexdigest()],
|
self.url.encode()).hexdigest() or "error"],
|
||||||
["etag", headers['ETag']],
|
["etag", headers['ETag'] or "error"],
|
||||||
["etaghash", self.etag_hash],
|
["etaghash", self.etag_hash or "error"],
|
||||||
["last-modified", headers["Last-Modified"]],
|
["last-modified", headers["Last-Modified"] or "error"],
|
||||||
["content-length", headers["Content-Length"]],
|
["content-length", headers["Content-Length"] or "error"],
|
||||||
["age", ""],
|
["age", ""],
|
||||||
]
|
]
|
||||||
csvpath = self.path.joinpath("ZZZ_metadata")
|
csvpath = self.path.joinpath("ZZZ_metadata")
|
||||||
|
|
|
@ -49,7 +49,7 @@ def save_html(dir, filename, driver: WebDriver, page_log_file=False):
|
||||||
|
|
||||||
def download_file(dest):
|
def download_file(dest):
|
||||||
d = Path(DL_DIR)
|
d = Path(DL_DIR)
|
||||||
time.sleep(2)
|
time.sleep(10) # sorry for blocking!
|
||||||
downloading = True
|
downloading = True
|
||||||
poll = 1.0
|
poll = 1.0
|
||||||
while downloading:
|
while downloading:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
timeout = 5
|
timeout = 10
|
||||||
|
|
||||||
|
|
||||||
def WaitClickable(driver, locator):
|
def WaitClickable(driver, locator):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user