update selenium, 2023 version

2024-11-30 11:40:16 +08:00 · 2022-11-26 15:51:00 +08:00 · 2022-11-26 15:51:00 +08:00 · c97582bef2
commit c97582bef2
parent 234cd31bf4
11 changed files with 538 additions and 515 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,5 @@ chromedriver*
 test*
 .vscode/
 *.7z
-*.tar
+*.tar
 .venv
--- a/README.md
+++ b/README.md
@ -1,24 +1,28 @@
-## Blackboard marks downloader (UWA)
+## Blackboard marks downloader (UWA)
---
+
-**Dependencies**:
+---
- python
+
- selenium
+**Dependencies**:
- chromedriver, placed relative to this directory
+
-
+- python
-Run the script with `py main.py` and enter your student number and password. I'm not taking your personal details, but *don't take my word for it* - always check the source if you don't trust it!
+- selenium
-
+- chromedriver, placed relative to this directory
---
+
-
+Run the script with `py main.py` and enter your student number and password. I'm not taking your personal details, but _don't take my word for it_ - always check the source if you don't trust it!
-Made this script to download my marks, receipts and all the stuff I uploaded for my first semester. It's a fucking mess of spaghetti python code because to be honest I really just wanted to get this out of the way and have some time for other stuff after the first round of exams. It's a mess of code, with some bits (the login) being picked from the scraper script and some of the scraper asset objects being translated from ruby to python here (in a quick and incomplete way). This will probably will break in some way when the UI is overhauled for next semester :/
+
-
+---
-There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. It relies on a lot of js crap so I ended up using selenium all the way through. Doesn't download styles to save space, you'll have to download the css and js yourself and it has to be absolute because the script makes no effort to make the links relative.
+
-
+Made this script to download my marks, receipts and all the stuff I uploaded for my first semester.
-This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
+
-
+There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. Saves visited pages to `URLS.txt` so you can use something like SingleFile extension and use their batch save url feature to save the list of urls visited (I recommend enabling scripts in the singlefile settings so that comments are saved)
-Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
+
-
+This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
-## Note:
+
-* Does not download turnitin reports. You have to click the link manually to the feedback site.
+Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
-* Does not download multiple submission attempts - only downloads the last/graded attempt.
+
-* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
+## Note:
-* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
+
 - Does not download turnitin reports. You have to click the link manually to the feedback site.
 - Does not download multiple submission attempts - only downloads the last/graded attempt.
 - Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
 - Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
--- a/constants/constants.py
+++ b/constants/constants.py
@ -1,7 +1,11 @@
 import os
 from pathlib import Path
-BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
+BASE_URL = "https://lms.uwa.edu.au"  # Include protocol.
-DL_DIR = os.getcwd()+os.path.sep+"tmp"+os.path.sep
+DL_DIR = os.getcwd() + os.path.sep + "tmp" + os.path.sep
-Path(DL_DIR).mkdir(parents=True, exist_ok=True)
+Path(DL_DIR).mkdir(parents=True, exist_ok=True)
 SAVE_DIR = "grades"
 URL_LIST = SAVE_DIR + os.path.sep + "URLS.txt"
--- a/main.py
+++ b/main.py
@ -1,252 +1,262 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3
-
+
-import requests
+from selenium.webdriver.remote.webdriver import WebDriver
-from selenium import webdriver
+from typing import cast
-from selenium.webdriver.support import expected_conditions as EC
+import requests
-from selenium.webdriver.support.wait import WebDriverWait
+from selenium import webdriver
-from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.support.wait import WebDriverWait
-# For chrome stuff
+from selenium.webdriver.common.by import By
-from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+from selenium.webdriver.common.action_chains import ActionChains
-from selenium.webdriver.chrome.options import Options
+# For chrome stuff
-# ---
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
-from urllib.parse import parse_qs, urlparse
+from selenium.webdriver.chrome.options import Options
-import os
+# ---
-from os.path import sep
+from urllib.parse import parse_qs, urlparse
-import re
+import os
-import argparse
+from os.path import sep
-
+import re
-from utils.asset import RequestStack
+import argparse
-from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
+
-from constants.constants import BASE_URL, DL_DIR
+from utils.asset import RequestStack
-from utils.login import login
+from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
-from utils.utils import download_file, get_assignment_name, save_html
+from constants.constants import BASE_URL, DL_DIR, SAVE_DIR
-from pathlib import Path
+from utils.login import login
-from selenium.common.exceptions import ElementNotInteractableException
+from utils.utils import download_file, get_assignment_name, save_html
-
+from pathlib import Path
-testing = False
+from selenium.common.exceptions import ElementNotInteractableException
-try:
+
-    testing = True
+testing = False
-    from utils.test import get_etc
+# try:
-except:
+#     testing = True
-    def get_etc(*args): return False
+#     from utils.test import get_etc
-
+# except Exception:
-
+
-# stupid bug
+
-def click_the_fing_button(driver, button):
+def get_etc(*args):
-    try:
+    return False
-        ActionChains(driver).move_to_element(button)
+
-        ActionChains(driver).click(button).perform()
+
-        WebDriverWait(driver, 2).until(EC.number_of_windows_to_be(2))
+# stupid bug
-    except:
+
-        # hack to wake selenium up when it doesnt want to click the button!
+
-        driver.set_window_size(1024, 768)
+def click_the_fing_button(driver: WebDriver, button):
-        click_the_fing_button(driver, button)
+    try:
-        driver.maximize_window()
+        ActionChains(driver).move_to_element(button)
-
+        ActionChains(driver).click(button).perform()
-# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
+        WebDriverWait(driver, 2).until(EC.number_of_windows_to_be(2))
-
+    except Exception:
-
+        # hack to wake selenium up when it doesnt want to click the button!
-def scrape_further(driver, path, session):
+        driver.set_window_size(1024, 768)
-    # attempts for bb-held tests
+        click_the_fing_button(driver, button)
-    attempts = driver.find_elements(
+        driver.maximize_window()
-        By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
+
-    attempts = [x.get_attribute('href') for x in attempts]
+# You can probably replace this with a recursive method like in blackboard
-    for i, attempt in enumerate(attempts):
+# scraper but tbh i just want to get this script done so i can stop working for
-        name = "attempt_" + \
+# once.
-            str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
+
-        attempt = re.sub("^"+BASE_URL, "", attempt)
+
-        driver.execute_script("window.open('"+BASE_URL+attempt+"')")
+def scrape_further(driver: WebDriver, path, session):
-        WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(3))
+    # attempts for bb-held tests
-        driver.switch_to.window(driver.window_handles[2])
+    attempts = driver.find_elements(
-        save_html(path, name, driver.page_source)
+        By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
-        if testing:
+    attempts = [x.get_attribute('href') for x in attempts]
-            get_etc(driver, session, path)
+    for i, attempt in enumerate(attempts):
-        driver.close()
+        name = "attempt_" + \
-        driver.switch_to.window(driver.window_handles[1])
+            str(i) + "_[" + parse_qs(urlparse(attempt).query)['attempt_id'][0] + "]"
-
+        attempt = re.sub("^" + BASE_URL, "", attempt)
-    # Comments may contain feedback links
+        driver.execute_script("window.open('" + BASE_URL + attempt + "')")
-    request_stack = RequestStack(session)
+        WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(3))
-    etc_files = driver.find_elements(
+        driver.switch_to.window(driver.window_handles[2])
-        By.XPATH, "//a[contains(@href, '/bbcswebdav')]")
+        save_html(path, name, driver, True)
-    etc_files = [x.get_attribute('href') for x in etc_files]
+        if testing:
-    for i, item in enumerate(etc_files):
+            get_etc(driver, session, path)
-        if (not item is None) and ("bbcswebdav" in item):
+        driver.close()
-            request_stack.add_file(item, path)
+        driver.switch_to.window(driver.window_handles[1])
-
+
-    # submission file for assignment
+    # Comments may contain feedback links
-    attempts = driver.find_elements(
+    request_stack = RequestStack(session)
-        By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
+    etc_files = driver.find_elements(
-    attempts = [x.get_attribute('href') for x in attempts]
+        By.XPATH, "//a[contains(@href, '/bbcswebdav')]")
-    for i, attempt in enumerate(attempts):
+    etc_files = [x.get_attribute('href') for x in etc_files]
-        request_stack.add_file(attempt, path)
+    for i, item in enumerate(etc_files):
-
+        if (item is not None) and ("bbcswebdav" in item):
-    get_feedback = False
+            request_stack.add_file(item, path)
-    try:
+
-        # download button causes a tab to appear quickly, download, then disappear
+    # submission file for assignment
-        # need to capture the url to get the metadata and dl to the correct location
+    attempts = driver.find_elements(
-        # cant be arsed to figure out how the pspdfkit js that executes this download works.
+        By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
-        SwitchToIFrame(
+    attempts = [x.get_attribute('href') for x in attempts]
-            driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
+    for i, attempt in enumerate(attempts):
-        SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
+        request_stack.add_file(attempt, path)
-        get_feedback = True
+
-    except:
+    get_feedback = False
-        print("No feedback to download")
+    try:
-    if get_feedback:
+        # download button causes a tab to appear quickly, download, then disappear
-        dl_button = WaitClickable(
+        # need to capture the url to get the metadata and dl to the correct location
-            driver, (By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
+        # cant be arsed to figure out how the pspdfkit js that executes this download works.
-        dl_button.click()
+        SwitchToIFrame(
-        download_file(path)
+            driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
-    request_stack.download_all()
+        SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
-# end of scrape_further
+        get_feedback = True
-
+    except Exception:
-
+        print("No feedback to download")
-parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
+    if get_feedback:
-# parser.add_argument("-p", "--password", help="Automatically use provided password", default="")
+        dl_button = WaitClickable(
-parser.add_argument("-u", "--username",
+            driver, (By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
-                    help="Automatically use provided userID", default="")
+        dl_button.click()
-
+        download_file(path)
-path = ['grades']
+    request_stack.download_all()
-args = parser.parse_args()
+# end of scrape_further
-
+
-CAPABILITIES = DesiredCapabilities.CHROME
+
-CAPABILITIES['goog:loggingPrefs'] = {
+parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
-    'performance': 'ALL',
+# parser.add_argument("-p", "--password", help="Automatically use provided password", default="")
-}
+parser.add_argument("-u", "--username",
-
+                    help="Automatically use provided userID", default="")
-for f in os.listdir(DL_DIR):
+
-    os.remove(Path(DL_DIR).joinpath(f))
+path = [SAVE_DIR]
-prefs = {
+Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)
-    "profile.default_content_settings.popups": 0,
+args = parser.parse_args()
-    "download.default_directory": DL_DIR,
+
-    "directory_upgrade": True
+CAPABILITIES = cast("dict[str, object]", DesiredCapabilities.CHROME.copy())
-}
+CAPABILITIES['goog:loggingPrefs'] = {
-OPTIONS = Options()
+    'performance': 'ALL',
-OPTIONS.add_argument('--no-sandbox')
+}
-OPTIONS.add_argument('--disable-dev-shm-usage')
+
-OPTIONS.add_experimental_option("prefs", prefs)
+for f in os.listdir(DL_DIR):
-# OPTIONS.add_argument("--headless")
+    os.remove(Path(DL_DIR).joinpath(f))
-driver = webdriver.Chrome(
+prefs = {
-    executable_path='chromedriver.exe',
+    "profile.default_content_settings.popups": 0,
-    desired_capabilities=CAPABILITIES,
+    "download.default_directory": DL_DIR,
-    options=OPTIONS
+    "directory_upgrade": True
-)
+}
-driver.maximize_window()
+OPTIONS = Options()
-
+OPTIONS.add_argument('--no-sandbox')
-cookies = login(args, driver)  # do Login.
+OPTIONS.add_argument('--disable-dev-shm-usage')
-session = requests.Session()
+OPTIONS.add_experimental_option("prefs", prefs)
-for cookie in cookies:
+# OPTIONS.add_argument("--headless")
-    session.cookies.set(cookie["name"], cookie["value"])
+driver = webdriver.Chrome(
-
+    executable_path='chromedriver.exe',
-# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
+    desired_capabilities=CAPABILITIES,
-driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
+    options=OPTIONS
-try:
+)
-    WaitClickable(driver, (By.CLASS_NAME, "button-1")).click()
+driver.maximize_window()
-except:
+
-    print("no tos warning - skipped")
+cookies = login(args, driver)  # do Login.
-
+session = requests.Session()
-driver.get(
+for cookie in cookies:
-    BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
+    session.cookies.set(cookie["name"], cookie["value"])
-save_html(sep.join(path), 'entrypoint', driver.page_source)
+
-
+# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
-WaitClickable(driver, (By.ID, "left_stream_mygrades"))
+driver.get(BASE_URL + "/webapps/gradebook/do/student/viewCourses")
-# get courseIDs
+try:
-courses = driver.find_element(By.ID, "left_stream_mygrades")\
+    WaitClickable(driver, (By.CLASS_NAME, "button-1")).click()
-                .find_elements(By.XPATH, "//div[@role='tab']")
+except Exception:
-
+    print("no tos warning - skipped")
-course_details = []
+
-for i, course_results in enumerate(courses):
+driver.get(
-    course_results = courses[i]
+    BASE_URL + "/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
-    ActionChains(driver).move_to_element(course_results).perform()
+save_html(sep.join(path), 'entrypoint', driver, True)
-    course_url = course_results.get_attribute("bb:rhs")
+
-    course_name = course_results.find_elements(
+WaitClickable(driver, (By.ID, "left_stream_mygrades"))
-        By.XPATH, "//span[@class='stream_area_name']")[i].text
+# get courseIDs
-    course_name += " [" + \
+courses = driver.find_element(By.ID, "left_stream_mygrades")\
-        parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
+                .find_elements(By.XPATH, "//div[@role='tab']")
-    course_details.append({
+
-        'name': course_name,
+course_details = []
-        'url': course_url
+for i, course_results in enumerate(courses):
-    })
+    course_results = courses[i]
-
+    ActionChains(driver).move_to_element(course_results).perform()
-for i, course in enumerate(course_details):
+    course_url = course_results.get_attribute("bb:rhs")
-    path.append(course['name'])  # course name
+    course_name = course_results.find_elements(
-    print(course['name'])
+        By.XPATH, "//span[@class='stream_area_name']")[i].text
-    driver.get(BASE_URL+course['url'])
+    course_name += " [" + \
-
+        parse_qs(urlparse(course_url).query)['course_id'][0] + "]"
-    driver.execute_script("""
+    course_details.append({
-    mygrades.loadContentFrame = function(url) {
+        'name': course_name,
-        window.open(url);
+        'url': course_url
-    }
+    })
-    """)
+
-
+for i, course in enumerate(course_details):
-    WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
+    path.append(course['name'])  # course name
-    WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
+    print(course['name'])
-
+    driver.get(BASE_URL + course['url'])
-    table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
+
-
+    driver.execute_script("""
-    for i, assignment in enumerate(table):
+    mygrades.loadContentFrame = function(url) {
-        print(i)
+        window.open(url);
-        buttons = assignment.find_elements(By.TAG_NAME, "input")
+    }
-        block = None
+    """)
-        assignment_name = None
+
-        information_link = False
+    WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
-        try:
+    WaitClickable(driver, (By.XPATH, "//a[@value='A']")).click()
-            block = assignment.find_element(
+
-                By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
+    table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
-            information_link = True
+
-        except:
+    for i, assignment in enumerate(table):
-            block = assignment.find_element(
+        print(i)
-                By.XPATH, "./div[@class='cell gradable']")
+        buttons = assignment.find_elements(By.TAG_NAME, "input")
-        assignment_name = get_assignment_name(driver, block)
+        block = None
-        path.append(assignment_name)
+        assignment_name = None
-        # download information if it exists.
+        information_link = False
-        if information_link:
+        try:
-            try:
+            block = assignment.find_element(
-                ActionChains(driver).move_to_element(
+                By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
-                    block).click(block).perform()
+            information_link = True
-                print("Switched "+assignment_name)
+        except Exception:
-                WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
+            block = assignment.find_element(
-                driver.switch_to.window(driver.window_handles[1])
+                By.XPATH, "./div[@class='cell gradable']")
-                save_html(sep.join(path), "information", driver.page_source)
+        assignment_name = get_assignment_name(driver, block)
-                scrape_further(driver, sep.join(path), session)
+        path.append(assignment_name)
-                driver.close()
+        # download information if it exists.
-                driver.switch_to.window(driver.window_handles[0])
+        if information_link:
-            except ElementNotInteractableException:
+            try:
-                print('idk')
+                ActionChains(driver).move_to_element(
-        # download rubric if it exists.
+                    block).click(block).perform()
-        for button in buttons:
+                print("Switched " + assignment_name)
-            action = button.get_attribute("onclick")
+                WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
-            if action != None and "showInLightBox" not in action:
+                driver.switch_to.window(driver.window_handles[1])
-                click_the_fing_button(driver, button)
+                save_html(sep.join(path), "information", driver, True)
-                driver.execute_script(
+                scrape_further(driver, sep.join(path), session)
-                    "window.scrollTo(0, document.body.scrollHeight)")
+                driver.close()
-                driver.switch_to.window(driver.window_handles[1])
+                driver.switch_to.window(driver.window_handles[0])
-                WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
+            except ElementNotInteractableException:
-                save_html(sep.join(path), "rubric", driver.page_source)
+                print('idk')
-                driver.find_element(
+        # download rubric if it exists.
-                    By.XPATH, "//li[@id='listViewTab']/a").click()
+        for button in buttons:
-                WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
+            action = button.get_attribute("onclick")
-                save_html(sep.join(path), "list", driver.page_source)
+            if action is not None and "showInLightBox" not in action:
-                detailed_buttons = driver.find_elements(
+                click_the_fing_button(driver, button)
-                    By.XPATH, "//div[@class='u_controlsWrapper']/input")
+                driver.execute_script(
-                detailed_buttons[1].click()
+                    "window.scrollTo(0, document.body.scrollHeight)")
-                detailed_buttons[0].click()
+                driver.switch_to.window(driver.window_handles[1])
-                save_html(sep.join(path), "list_detailed", driver.page_source)
+                WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
-                driver.close()
+                save_html(sep.join(path), "rubric", driver, True)
-                driver.switch_to.window(driver.window_handles[0])
+                driver.find_element(
-        path.pop()
+                    By.XPATH, "//li[@id='listViewTab']/a").click()
-    save_html(sep.join(path), path[0], driver.page_source)
+                WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
-    WaitClickable(driver, (By.XPATH, "//a[@value='S']")).click()
+                save_html(sep.join(path), "list", driver, True)
-    save_html(sep.join(path), "submitted", driver.page_source)
+                detailed_buttons = driver.find_elements(
-    try:
+                    By.XPATH, "//div[@class='u_controlsWrapper']/input")
-        WaitClickable(
+                detailed_buttons[1].click()
-            driver, (By.XPATH, "//div[@id='submissionReceipts']//a")).click()
+                detailed_buttons[0].click()
-        WaitClickable(
+                save_html(sep.join(path), "list_detailed", driver, True)
-            driver, (By.XPATH, "//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
+                driver.close()
-    except:
+                driver.switch_to.window(driver.window_handles[0])
-        print('No items?')
+        path.pop()
-    save_html(sep.join(path), "receipts", driver.page_source)
+    save_html(sep.join(path), path[0], driver, True)
-    path.pop()
+    WaitClickable(driver, (By.XPATH, "//a[@value='S']")).click()
-
+    save_html(sep.join(path), "submitted", driver, True)
-
+    try:
-driver.quit()
+        WaitClickable(
            driver, (By.XPATH, "//div[@id='submissionReceipts']//a")).click()
        WaitClickable(
            driver, (By.XPATH, "//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
    except Exception:
        print('No items?')
    save_html(sep.join(path), "receipts", driver, True)
    path.pop()
 driver.quit()
--- a/requirements.txt
+++ b/requirements.txt
--- a/utils/init.py
+++ b/utils/init.py
@ -1,4 +1,4 @@
-# https://stackoverflow.com/a/49375740
+# https://stackoverflow.com/a/49375740
-import os
+import os
-import sys
+import sys
-sys.path.append(os.path.dirname(os.path.realpath(__file__)))
+sys.path.append(os.path.dirname(os.path.realpath(__file__)))
--- a/utils/asset.py
+++ b/utils/asset.py
@ -1,84 +1,84 @@
-from constants.constants import BASE_URL
+from constants.constants import BASE_URL
-import re
+import re
-import hashlib
+import hashlib
-import requests
+import requests
-import shutil
+import shutil
-import csv
+import csv
-from pathlib import Path
+from pathlib import Path
-
+
-
+
-def convert_filename(name, hash):
+def convert_filename(name, hash):
-    _name = name.split('.')
+    _name = name.split('.')
-    if len(_name) > 1:
+    if len(_name) > 1:
-        _name[-2] += ("["+hash+"]")
+        _name[-2] += ("[" + hash + "]")
-    else:
+    else:
-        _name[0] += ("["+hash+"]")
+        _name[0] += ("[" + hash + "]")
-    return '.'.join(_name)
+    return '.'.join(_name)
-
+
-
+
-class RequestStack:
+class RequestStack:
-    def __init__(self, token):
+    def __init__(self, token):
-        self.request_stack = []
+        self.request_stack = []
-        self.token = token
+        self.token = token
-        super().__init__()
+        super().__init__()
-
+
-    def add_file(self, url, path):
+    def add_file(self, url, path):
-        self.request_stack.append(Asset(url, path))
+        self.request_stack.append(Asset(url, path))
-
+
-    def download_all(self):
+    def download_all(self):
-        for file in self.request_stack:
+        for file in self.request_stack:
-            print(f"\tDownloading {file.url}")
+            print(f"\tDownloading {file.url}")
-            file.download(self.token)
+            file.download(self.token)
-
+
-
+
-class Asset:
+class Asset:
-    def __init__(self, url, path):
+    def __init__(self, url, path):
-        self.path = Path(path)
+        self.path = Path(path)
-        self.url = re.sub("^"+BASE_URL, "", url)
+        self.url = re.sub("^" + BASE_URL, "", url)
-        # self.file_id = re.findall('file_id=(.+)&',url)
+        # self.file_id = re.findall('file_id=(.+)&',url)
-        self.path.mkdir(parents=True, exist_ok=True)
+        self.path.mkdir(parents=True, exist_ok=True)
-        super().__init__()
+        super().__init__()
-
+
-    def download(self, session):
+    def download(self, session):
-        response = session.get(
+        response = session.get(
-            BASE_URL+self.url, stream=True, allow_redirects=False)
+            BASE_URL + self.url, stream=True, allow_redirects=False)
-        headers = response.headers
+        headers = response.headers
-        if response.status_code == 302 and len(headers['location']) > 0:
+        if response.status_code == 302 and len(headers['location']) > 0:
-            Asset(headers['location'], self.path).download(session)
+            Asset(headers['location'], self.path).download(session)
-            return
+            return
-        elif response.status_code != 200:
+        elif response.status_code != 200:
-            print("[!] Error "+str(response.status_code))
+            print("[!] Error " + str(response.status_code))
-            return response.status_code
+            return response.status_code
-        headers = {x: re.sub(r'^"*|"*?$', '', headers.get(x))
+        headers = {x: re.sub(r'^"*|"*?$', '', headers.get(x))
-                   for x in headers}  # ewww regex
+                   for x in headers}  # ewww regex
-        if 'Content-Disposition' in headers.keys():
+        if 'Content-Disposition' in headers.keys():
-            self.original_filename = re.findall(
+            self.original_filename = re.findall(
-                'filename="(.+)"', headers['Content-Disposition'])[0]
+                'filename="(.+)"', headers['Content-Disposition'])[0]
-        else:
+        else:
-            self.original_filename = re.sub(".*/", "", self.url)
+            self.original_filename = re.sub(".*/", "", self.url)
-        self.etag_hash = hashlib.md5(headers['ETag'].encode()).hexdigest()
+        self.etag_hash = hashlib.md5(headers['ETag'].encode()).hexdigest()
-        self.filename = convert_filename(
+        self.filename = convert_filename(
-            self.original_filename, self.etag_hash[0:6])
+            self.original_filename, self.etag_hash[0:6])
-
+
-        with open(self.path.joinpath(self.filename), 'wb') as f:
+        with open(self.path.joinpath(self.filename), 'wb') as f:
-            shutil.copyfileobj(response.raw, f)
+            shutil.copyfileobj(response.raw, f)
-        self.write_metadata(headers)
+        self.write_metadata(headers)
-
+
-    def write_metadata(self, headers):
+    def write_metadata(self, headers):
-        metacsv = [
+        metacsv = [
-            ["original_filename",   self.original_filename],
+            ["original_filename", self.original_filename],
-            ["readable_filename",   self.filename],
+            ["readable_filename", self.filename],
-            ["url",                 self.url],
+            ["url", self.url],
-            ["pathhash",            hashlib.md5(
+            ["pathhash", hashlib.md5(
-                self.url.encode()).hexdigest()],
+                self.url.encode()).hexdigest()],
-            ["etag",                headers['ETag']],
+            ["etag", headers['ETag']],
-            ["etaghash",            self.etag_hash],
+            ["etaghash", self.etag_hash],
-            ["last-modified",       headers["Last-Modified"]],
+            ["last-modified", headers["Last-Modified"]],
-            ["content-length",      headers["Content-Length"]],
+            ["content-length", headers["Content-Length"]],
-            ["age",                 ""],
+            ["age", ""],
-        ]
+        ]
-        csvpath = self.path.joinpath("ZZZ_metadata")
+        csvpath = self.path.joinpath("ZZZ_metadata")
-        csvpath.mkdir(parents=True, exist_ok=True)
+        csvpath.mkdir(parents=True, exist_ok=True)
-        with open(csvpath.joinpath(self.filename+"__metadata.csv"), "w", newline="") as f:
+        with open(csvpath.joinpath(self.filename + "__metadata.csv"), "w", newline="") as f:
-            writer = csv.writer(f)
+            writer = csv.writer(f)
-            writer.writerows(metacsv)
+            writer.writerows(metacsv)
--- a/utils/login.py
+++ b/utils/login.py
@ -1,46 +1,46 @@
-import sys
+import sys
-from utils.wait import WaitClickable
+from utils.wait import WaitClickable
-from utils.selectors import Selectors
+from utils.selectors import Selectors
-from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support.wait import WebDriverWait
-from urllib.parse import urlparse
+from urllib.parse import urlparse
-from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support import expected_conditions as EC
-from getpass import getpass
+from getpass import getpass
-from constants.constants import BASE_URL
+from constants.constants import BASE_URL
-import re
+import re
-import json
+import json
-
+
-
+
-def login(args, driver):
+def login(args, driver):
-    driver.get(BASE_URL)
+    driver.get(BASE_URL)
-    USERNAME = args.username
+    USERNAME = args.username
-    if len(USERNAME) == 0:
+    if len(USERNAME) == 0:
-        print('UserID: ')
+        print('UserID: ')
-        USERNAME = input()
+        USERNAME = input()
-    USERNAME += '@student.uwa.edu.au'
+    USERNAME += '@student.uwa.edu.au'
-    print('Password: ')
+    print('Password: ')
-    PASSWORD = getpass('')
+    PASSWORD = getpass('')
-
+
-    WaitClickable(driver, Selectors.BOX_USERNAME).send_keys(USERNAME)
+    WaitClickable(driver, Selectors.BOX_USERNAME).send_keys(USERNAME)
-    WaitClickable(driver, Selectors.BUTTON_NEXT).click()
+    WaitClickable(driver, Selectors.BUTTON_NEXT).click()
-    print('Entered username.')
+    print('Entered username.')
-
+
-    try:
+    try:
-        WaitClickable(driver, Selectors.BOX_PASSWORD).send_keys(PASSWORD)
+        WaitClickable(driver, Selectors.BOX_PASSWORD).send_keys(PASSWORD)
-        WaitClickable(driver, Selectors.BUTTON_NEXT).click()
+        WaitClickable(driver, Selectors.BUTTON_NEXT).click()
-        print('Entered password.')
+        print('Entered password.')
-    except:
+    except Exception:
-        print(WebDriverWait(driver, 1).until(
+        print(WebDriverWait(driver, 1).until(
-            EC.visibility_of_element_located(Selectors.DIV_USERERROR)).text)
+            EC.visibility_of_element_located(Selectors.DIV_USERERROR)).text)
-        driver.quit()
+        driver.quit()
-        exit(2)
+        exit(2)
-
+
-    WaitClickable(driver, Selectors.BUTTON_DENY).click()
+    WaitClickable(driver, Selectors.BUTTON_DENY).click()
-    # WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
+    # WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
-
+
-    cookie = driver.get_cookies()
+    cookie = driver.get_cookies()
-    if not cookie == None:
+    if cookie is not None:
-        return cookie
+        return cookie
-
+
-    print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
+    print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
-    driver.quit()
+    driver.quit()
-    exit(1)
+    exit(1)
--- a/utils/selectors.py
+++ b/utils/selectors.py
@ -1,11 +1,11 @@
-from selenium.webdriver.common.by import By
+from selenium.webdriver.common.by import By
-
+
-
+
-class Selectors:
+class Selectors:
-    # Microsoft login
+    # Microsoft login
-    BOX_USERNAME = (By.ID, "i0116")
+    BOX_USERNAME = (By.ID, "i0116")
-    BOX_PASSWORD = (By.ID, "i0118")
+    BOX_PASSWORD = (By.ID, "i0118")
-    DIV_USERERROR = (By.ID, 'usernameError')
+    DIV_USERERROR = (By.ID, 'usernameError')
-    BUTTON_NEXT = (By.ID, "idSIButton9")
+    BUTTON_NEXT = (By.ID, "idSIButton9")
-    BUTTON_DENY = (By.ID, "idBtn_Back")
+    BUTTON_DENY = (By.ID, "idBtn_Back")
-    # Selectors for grades
+    # Selectors for grades
--- a/utils/utils.py
+++ b/utils/utils.py
@ -1,74 +1,79 @@
-import pathlib
+from selenium.webdriver.remote.webdriver import WebDriver
-import re
+import pathlib
-from constants.constants import DL_DIR
+import re
-from selenium.webdriver.support import expected_conditions as EC
+from typing import Union
-import time
+from constants.constants import DL_DIR, URL_LIST
-import os
+from selenium.webdriver.support import expected_conditions as EC
-from pathlib import Path
+import time
-import shutil
+import os
-
+from pathlib import Path
-
+import shutil
-def friendly_filename(name):
+
-    name = friendly_dirname(name)
+
-    return re.sub("[\\\/]", '', name)
+def friendly_filename(name):
-
+    name = friendly_dirname(name)
-
+    return re.sub("[\\\/]", '', name)
-def friendly_dirname(name):
+
-    # .gsub(/[^\w\s_-]+/, '')
+
-    # .gsub(/\s+/, '_')
+def friendly_dirname(name):
-    # pipeline:
+    # .gsub(/[^\w\s_-]+/, '')
-    name = re.sub("[\x00-\x1f]", '', name)
+    # .gsub(/\s+/, '_')
-    name = re.sub("[\:\<\>\"\|\?\*]", '', name)
+    # pipeline:
-    name = re.sub("(^|\b\s)\s+($|\s?\b)", '\\1\\2', name)
+    name = re.sub("[\x00-\x1f]", '', name)
-    return name.strip()
+    name = re.sub("[\:\<\>\"\|\?\*]", '', name)
-
+    name = re.sub("(^|\b\s)\s+($|\s?\b)", '\\1\\2', name)
-
+    return name.strip()
-def get_assignment_name(driver, block):
+
-    s = friendly_filename(get_text_excluding_children(driver, block))
+
-    print("Assesment: "+s)
+def get_assignment_name(driver: WebDriver, block):
-    return s
+    s = friendly_filename(get_text_excluding_children(driver, block))
-
+    print("Assesment: " + s)
-
+    return s
-def save_html(dir, filename, page_source):
+
-    dir = pathlib.Path(friendly_dirname(dir))
+
-    dir.mkdir(parents=True, exist_ok=True)
+def save_html(dir, filename, driver: WebDriver, page_log_file=False):
-    file = dir.joinpath(friendly_filename(filename)+".html")
+    if page_log_file:
-    with open(file, "w", encoding="utf-8") as f:
+        with open(URL_LIST, "a", encoding="utf-8") as f:
-        f.write(page_source)
+            f.write(driver.current_url + "\n")
-
+    dir = pathlib.Path(friendly_dirname(dir))
-# NOTE: Switching to a "download" tab causes issues so we must use the in built
+    dir.mkdir(parents=True, exist_ok=True)
-# download in Chrome, which does not have etag or metadata information.
+    file = dir.joinpath(friendly_filename(filename) + ".html")
-# Files are using annotate-au.foundations.blackboard.com and not bbcswebdav system
+    with open(file, "w", encoding="utf-8") as f:
-# so the tag may not exist in the first place.
+        f.write(driver.page_source)
-
+
-
+# NOTE: Switching to a "download" tab causes issues so we must use the in built
-def download_file(dest):
+# download in Chrome, which does not have etag or metadata information.
-    d = Path(DL_DIR)
+# Files are using annotate-au.foundations.blackboard.com and not bbcswebdav system
-    time.sleep(2)
+# so the tag may not exist in the first place.
-    downloading = True
+
-    poll = 1.0
+
-    while downloading:
+def download_file(dest):
-        for f in os.listdir(d):
+    d = Path(DL_DIR)
-            if Path(f).suffix == '.crdownload':
+    time.sleep(2)
-                time.sleep(poll)
+    downloading = True
-                poll *= 1.5
+    poll = 1.0
-                break
+    while downloading:
-            else:
+        for f in os.listdir(d):
-                _dest = Path(dest).joinpath("MARKED__"+f)
+            if Path(f).suffix == '.crdownload':
-                try:
+                time.sleep(poll)
-                    shutil.move(d.joinpath(f), _dest)
+                poll *= 1.5
-                except shutil.SameFileError:
+                break
-                    os.remove(_dest)
+            else:
-                    shutil.move(d.joinpath(f), _dest)
+                _dest = Path(dest).joinpath("MARKED__" + f)
-
+                try:
-        if len(os.listdir(d)) == 0:
+                    shutil.move(str(d.joinpath(f)), _dest)
-            downloading = False
+                except shutil.SameFileError:
-
+                    os.remove(_dest)
-
+                    shutil.move(str(d.joinpath(f)), _dest)
-# https://stackoverflow.com/a/19040341
+
-def get_text_excluding_children(driver, element):
+        if len(os.listdir(d)) == 0:
-    return driver.execute_script("""
+            downloading = False
-    return jQuery(arguments[0]).contents().filter(function() {
+
-        return this.nodeType == Node.TEXT_NODE;
+
-    }).text();
+# https://stackoverflow.com/a/19040341
-    """, element)
+def get_text_excluding_children(driver, element):
    return driver.execute_script("""
    return jQuery(arguments[0]).contents().filter(function() {
        return this.nodeType == Node.TEXT_NODE;
    }).text();
    """, element)
--- a/utils/wait.py
+++ b/utils/wait.py
@ -1,16 +1,15 @@
-from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support.wait import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support import expected_conditions as EC
-timeout = 5
+timeout = 5
-# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
+
-
+
-
+def WaitClickable(driver, locator):
-def WaitClickable(driver, locator): return WebDriverWait(
+    return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
-    driver, timeout).until(EC.element_to_be_clickable(locator))
+
-
+
-
+def WaitDiv(driver, locator):
-def WaitDiv(driver, locator): return WebDriverWait(
+    return WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
-    driver, timeout).until(EC.presence_of_element_located(locator))
+
-
+
-
+def SwitchToIFrame(driver, locator):
-def SwitchToIFrame(driver, locator): return WebDriverWait(
+    return WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
    driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))