more hackyness - download marked files w/ feedback

2024-11-30 11:40:16 +08:00 · 2021-06-18 19:23:19 +08:00 · 2021-06-18 19:23:19 +08:00 · 8e76eb8b55
commit 8e76eb8b55
parent 09c90d1e27
6 changed files with 119 additions and 28 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 grades/
+tmp/
 __pycache__
 chromedriver*
 test*
--- a/README.md
+++ b/README.md
@ -13,4 +13,11 @@ Made this script to download my marks, receipts and all the stuff I uploaded for

 There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. It relies on a lot of js crap so I ended up using selenium all the way through. Doesn't download styles to save space, you'll have to download the css and js yourself and it has to be absolute because the script makes no effort to make the links relative.

-This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
+This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
+
+Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
+
+## Note:
+* Does not download turnitin reports. You have to click the link manually to the feedback site.
+* Does not download multiple submission attempts - only downloads the last/graded attempt.
+* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
--- a/constants/constants.py
+++ b/constants/constants.py
@ -1 +1,7 @@
+import os
+from pathlib import Path
+
 BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
+
+DL_DIR = os.getcwd()+os.path.sep+"tmp"+os.path.sep
+Path(DL_DIR).mkdir(parents=True, exist_ok=True)
--- a/main.py
+++ b/main.py
@ -11,6 +11,7 @@ from selenium.webdriver.chrome.options import Options
 # ---
 from urllib.parse import parse_qs, urlparse
 import os
+from os.path import sep
 import requests
 import time
 import getpass
@ -23,12 +24,14 @@ import pathlib
 import utils.selectors
 from utils.asset import Asset, RequestStack
 from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
-from constants.constants import BASE_URL
+from constants.constants import BASE_URL, DL_DIR
 from utils.login import login
 from utils.selectors import Selectors
-from utils.utils import friendly_filename, get_assignment_name, get_text_excluding_children, save_html
+from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
 import code
 from random import randint
+from pathlib import Path
+from selenium.common.exceptions import ElementNotInteractableException

 testing = False
 try:
@ -39,8 +42,8 @@ except:

 cookie = None

+# stupid bug
 def click_the_fing_button(driver,button):
-    # https://stackoverflow.com/a/67414801 stupid bug
    try:
        ActionChains(driver).move_to_element(button)
        ActionChains(driver).click(button).perform()
@ -73,17 +76,46 @@ def scrape_further(driver,path):
    attempts = [ x.get_attribute('href') for x in attempts ]
    for i, attempt in enumerate(attempts):
        request_stack.add_file(attempt,path)
+
+    get_feedback = False
+    try:
+        # download button causes a tab to appear quickly, download, then disappear
+        # need to capture the url to get the metadata and dl to the correct location
+        # cant be arsed to figure out how the pspdfkit js that executes this download works.
+        SwitchToIFrame(driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
+        SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
+        get_feedback = True
+    except:
+        print("No feedback to download")
+    if get_feedback:
+        dl_button = WaitClickable(driver,(By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
+        dl_button.click()
+        download_file(path)
    request_stack.download_all()
+# end of scrape_further
+

 parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
 # parser.add_argument("-p", "--password", help="Automatically use provided password", default="")
 parser.add_argument("-u", "--username", help="Automatically use provided userID", default="")

+path = ['grades']
 args = parser.parse_args()

 CAPABILITIES = DesiredCapabilities.CHROME
-CAPABILITIES['goog:loggingPrefs'] = {'performance': 'ALL'}
+CAPABILITIES['goog:loggingPrefs'] = {
+    'performance'           : 'ALL',
+}
+
+for f in os.listdir(DL_DIR):
+    os.remove(Path(DL_DIR).joinpath(f))
+prefs = {
+            "profile.default_content_settings.popups": 0,
+            "download.default_directory": DL_DIR,
+            "directory_upgrade": True
+        }
 OPTIONS = Options()
+OPTIONS.add_experimental_option("prefs", prefs)
 # OPTIONS.add_argument("--headless")
 driver = webdriver.Chrome(
                            executable_path='chromedriver',
@ -94,13 +126,15 @@ driver.maximize_window()

 cookie = {'Cookie': login(args, driver)} # do Login.

+# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
 driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
-
 try:
    WaitClickable(driver,(By.CLASS_NAME, "button-1")).click()
 except:
    print("no tos warning - skipped")
-SwitchToIFrame(driver, (By.ID, 'mybbCanvas'))
+
+driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
+save_html(sep.join(path), 'entrypoint', driver.page_source)

 # get courseIDs
 courses = driver.find_element_by_id("left_stream_mygrades")\
@ -118,7 +152,6 @@ for i, course_results in enumerate(courses):
        'url' : course_url
    })

-path = ['grades']
 for i, course in enumerate(course_details):
    path.append(course['name']) # course name
    print(course['name'])
@ -130,12 +163,11 @@ for i, course in enumerate(course_details):
    }
    """)

+    WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
    WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()

    table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")

-    save_html("/".join(path), path[0], driver.page_source)
-
    for i, assignment in enumerate(table):
        print(i)
        buttons = assignment.find_elements_by_tag_name("input")
@ -151,14 +183,17 @@ for i, course in enumerate(course_details):
        path.append(assignment_name)
        # download information if it exists.
        if information_link:
-            ActionChains(driver).move_to_element(block).click(block).perform()
-            print("Switched "+assignment_name)
-            WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
-            driver.switch_to.window(driver.window_handles[1])
-            save_html("/".join(path),"information",driver.page_source)
-            scrape_further(driver, "/".join(path))
-            driver.close()
-            driver.switch_to.window(driver.window_handles[0])
+            try:
+                ActionChains(driver).move_to_element(block).click(block).perform()
+                print("Switched "+assignment_name)
+                WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
+                driver.switch_to.window(driver.window_handles[1])
+                save_html(sep.join(path),"information",driver.page_source)
+                scrape_further(driver, sep.join(path))
+                driver.close()
+                driver.switch_to.window(driver.window_handles[0])
+            except ElementNotInteractableException:
+                print('idk')
        # download rubric if it exists.
        for button in buttons:
            action = button.get_attribute("onclick")
@ -167,25 +202,26 @@ for i, course in enumerate(course_details):
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
                driver.switch_to.window(driver.window_handles[1])
                WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
-                save_html("/".join(path),"rubric",driver.page_source)
+                save_html(sep.join(path),"rubric",driver.page_source)
                driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
                WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
-                save_html("/".join(path),"list",driver.page_source)
+                save_html(sep.join(path),"list",driver.page_source)
                detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
                detailed_buttons[1].click()
                detailed_buttons[0].click()
-                save_html("/".join(path),"list_detailed",driver.page_source)
+                save_html(sep.join(path),"list_detailed",driver.page_source)
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
-        path.pop()
+        path.pop() 
+    save_html(sep.join(path), path[0], driver.page_source)
    WaitClickable(driver,(By.XPATH,"//a[@value='S']")).click()
-    save_html("/".join(path),"submitted",driver.page_source)
+    save_html(sep.join(path),"submitted",driver.page_source)
    try:
        WaitClickable(driver,(By.XPATH,"//div[@id='submissionReceipts']//a")).click()
        WaitClickable(driver,(By.XPATH,"//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
    except:
        print('No items?')
-    save_html("/".join(path),"receipts",driver.page_source)
+    save_html(sep.join(path),"receipts",driver.page_source)
    path.pop()


--- a/utils/utils.py
+++ b/utils/utils.py
@ -1,5 +1,14 @@
 import pathlib
 import re
+from constants.constants import DL_DIR
+from utils.wait import WaitClickable
+from utils.asset import Asset
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import os
+from pathlib import Path
+import shutil

 def friendly_filename(name):
    name = friendly_dirname(name)
@ -27,6 +36,38 @@ def save_html(dir,filename,page_source):
    with open(file, "w", encoding="utf-8") as f:
        f.write(page_source)

+# Why is it so hard to just get the url of a single tab...
+# def get_fast_dl(driver,button):
+#     windows = len(driver.window_handles)
+#     return 
+
+# Because selenium seems to fuck up the url switching to a "download" tab, 
+# I have to use the inbuilt download in chrome :(. That also means no etag/metadata 
+# but to be honest it's using annotate-au.foundations.blackboard.com and not bbcswebdav system 
+# so the tag may not exist in the first place.
+def download_file(dest):
+    d = Path(DL_DIR)
+    time.sleep(2)
+    downloading = True
+    poll = 1.0
+    while downloading:
+        for f in os.listdir(d):
+            if Path(f).suffix == '.crdownload':
+                time.sleep(poll)
+                poll *= 1.5
+                break
+            else:
+                _dest = Path(dest).joinpath("MARKED__"+f)
+                try:
+                    shutil.move(d.joinpath(f),_dest)
+                except shutil.SameFileError:
+                    os.remove(_dest)
+                    shutil.move(d.joinpath(f),_dest)
+
+        if len(os.listdir(d)) == 0:
+            downloading = False
+
+
 # https://stackoverflow.com/a/19040341
 def get_text_excluding_children(driver, element):
    return driver.execute_script("""
--- a/utils/wait.py
+++ b/utils/wait.py
@ -1,7 +1,7 @@
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-
+timeout = 4
 # find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
-WaitClickable = lambda driver,locator:WebDriverWait(driver, 10).until(EC.element_to_be_clickable(locator))
-WaitDiv = lambda driver,locator:WebDriverWait(driver, 5).until(EC.presence_of_element_located(locator))
-SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, 5).until(EC.frame_to_be_available_and_switch_to_it(locator))
+WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
+WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
+SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))