more hackyness - download marked files w/ feedback

This commit is contained in:
Peter 2021-06-18 19:23:19 +08:00
parent 09c90d1e27
commit 8e76eb8b55
6 changed files with 119 additions and 28 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
grades/
tmp/
__pycache__
chromedriver*
test*

View File

@ -14,3 +14,10 @@ Made this script to download my marks, receipts and all the stuff I uploaded for
There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. It relies on a lot of js crap so I ended up using selenium all the way through. Doesn't download styles to save space, you'll have to download the css and js yourself and it has to be absolute because the script makes no effort to make the links relative.
This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
## Note:
* Does not download turnitin reports. You have to click the link manually to the feedback site.
* Does not download multiple submission attempts - only downloads the last/graded attempt.
* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already

View File

@ -1 +1,7 @@
import os
from pathlib import Path
BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
DL_DIR = os.getcwd()+os.path.sep+"tmp"+os.path.sep
Path(DL_DIR).mkdir(parents=True, exist_ok=True)

80
main.py
View File

@ -11,6 +11,7 @@ from selenium.webdriver.chrome.options import Options
# ---
from urllib.parse import parse_qs, urlparse
import os
from os.path import sep
import requests
import time
import getpass
@ -23,12 +24,14 @@ import pathlib
import utils.selectors
from utils.asset import Asset, RequestStack
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
from constants.constants import BASE_URL
from constants.constants import BASE_URL, DL_DIR
from utils.login import login
from utils.selectors import Selectors
from utils.utils import friendly_filename, get_assignment_name, get_text_excluding_children, save_html
from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
import code
from random import randint
from pathlib import Path
from selenium.common.exceptions import ElementNotInteractableException
testing = False
try:
@ -39,8 +42,8 @@ except:
cookie = None
# stupid bug
def click_the_fing_button(driver,button):
# https://stackoverflow.com/a/67414801 stupid bug
try:
ActionChains(driver).move_to_element(button)
ActionChains(driver).click(button).perform()
@ -73,17 +76,46 @@ def scrape_further(driver,path):
attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts):
request_stack.add_file(attempt,path)
get_feedback = False
try:
# download button causes a tab to appear quickly, download, then disappear
# need to capture the url to get the metadata and dl to the correct location
# cant be arsed to figure out how the pspdfkit js that executes this download works.
SwitchToIFrame(driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
get_feedback = True
except:
print("No feedback to download")
if get_feedback:
dl_button = WaitClickable(driver,(By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
dl_button.click()
download_file(path)
request_stack.download_all()
# end of scrape_further
parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
# parser.add_argument("-p", "--password", help="Automatically use provided password", default="")
parser.add_argument("-u", "--username", help="Automatically use provided userID", default="")
path = ['grades']
args = parser.parse_args()
CAPABILITIES = DesiredCapabilities.CHROME
CAPABILITIES['goog:loggingPrefs'] = {'performance': 'ALL'}
CAPABILITIES['goog:loggingPrefs'] = {
'performance' : 'ALL',
}
for f in os.listdir(DL_DIR):
os.remove(Path(DL_DIR).joinpath(f))
prefs = {
"profile.default_content_settings.popups": 0,
"download.default_directory": DL_DIR,
"directory_upgrade": True
}
OPTIONS = Options()
OPTIONS.add_experimental_option("prefs", prefs)
# OPTIONS.add_argument("--headless")
driver = webdriver.Chrome(
executable_path='chromedriver',
@ -94,13 +126,15 @@ driver.maximize_window()
cookie = {'Cookie': login(args, driver)} # do Login.
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
try:
WaitClickable(driver,(By.CLASS_NAME, "button-1")).click()
except:
print("no tos warning - skipped")
SwitchToIFrame(driver, (By.ID, 'mybbCanvas'))
driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
save_html(sep.join(path), 'entrypoint', driver.page_source)
# get courseIDs
courses = driver.find_element_by_id("left_stream_mygrades")\
@ -118,7 +152,6 @@ for i, course_results in enumerate(courses):
'url' : course_url
})
path = ['grades']
for i, course in enumerate(course_details):
path.append(course['name']) # course name
print(course['name'])
@ -130,12 +163,11 @@ for i, course in enumerate(course_details):
}
""")
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
save_html("/".join(path), path[0], driver.page_source)
for i, assignment in enumerate(table):
print(i)
buttons = assignment.find_elements_by_tag_name("input")
@ -151,14 +183,17 @@ for i, course in enumerate(course_details):
path.append(assignment_name)
# download information if it exists.
if information_link:
ActionChains(driver).move_to_element(block).click(block).perform()
print("Switched "+assignment_name)
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
driver.switch_to.window(driver.window_handles[1])
save_html("/".join(path),"information",driver.page_source)
scrape_further(driver, "/".join(path))
driver.close()
driver.switch_to.window(driver.window_handles[0])
try:
ActionChains(driver).move_to_element(block).click(block).perform()
print("Switched "+assignment_name)
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
driver.switch_to.window(driver.window_handles[1])
save_html(sep.join(path),"information",driver.page_source)
scrape_further(driver, sep.join(path))
driver.close()
driver.switch_to.window(driver.window_handles[0])
except ElementNotInteractableException:
print('idk')
# download rubric if it exists.
for button in buttons:
action = button.get_attribute("onclick")
@ -167,25 +202,26 @@ for i, course in enumerate(course_details):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
driver.switch_to.window(driver.window_handles[1])
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
save_html("/".join(path),"rubric",driver.page_source)
save_html(sep.join(path),"rubric",driver.page_source)
driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
save_html("/".join(path),"list",driver.page_source)
save_html(sep.join(path),"list",driver.page_source)
detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
detailed_buttons[1].click()
detailed_buttons[0].click()
save_html("/".join(path),"list_detailed",driver.page_source)
save_html(sep.join(path),"list_detailed",driver.page_source)
driver.close()
driver.switch_to.window(driver.window_handles[0])
path.pop()
save_html(sep.join(path), path[0], driver.page_source)
WaitClickable(driver,(By.XPATH,"//a[@value='S']")).click()
save_html("/".join(path),"submitted",driver.page_source)
save_html(sep.join(path),"submitted",driver.page_source)
try:
WaitClickable(driver,(By.XPATH,"//div[@id='submissionReceipts']//a")).click()
WaitClickable(driver,(By.XPATH,"//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
except:
print('No items?')
save_html("/".join(path),"receipts",driver.page_source)
save_html(sep.join(path),"receipts",driver.page_source)
path.pop()

View File

@ -1,5 +1,14 @@
import pathlib
import re
from constants.constants import DL_DIR
from utils.wait import WaitClickable
from utils.asset import Asset
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
from pathlib import Path
import shutil
def friendly_filename(name):
name = friendly_dirname(name)
@ -27,6 +36,38 @@ def save_html(dir,filename,page_source):
with open(file, "w", encoding="utf-8") as f:
f.write(page_source)
# Why is it so hard to just get the url of a single tab...
# def get_fast_dl(driver,button):
# windows = len(driver.window_handles)
# return
# Because selenium seems to fuck up the url switching to a "download" tab,
# I have to use the inbuilt download in chrome :(. That also means no etag/metadata
# but to be honest it's using annotate-au.foundations.blackboard.com and not bbcswebdav system
# so the tag may not exist in the first place.
def download_file(dest):
d = Path(DL_DIR)
time.sleep(2)
downloading = True
poll = 1.0
while downloading:
for f in os.listdir(d):
if Path(f).suffix == '.crdownload':
time.sleep(poll)
poll *= 1.5
break
else:
_dest = Path(dest).joinpath("MARKED__"+f)
try:
shutil.move(d.joinpath(f),_dest)
except shutil.SameFileError:
os.remove(_dest)
shutil.move(d.joinpath(f),_dest)
if len(os.listdir(d)) == 0:
downloading = False
# https://stackoverflow.com/a/19040341
def get_text_excluding_children(driver, element):
return driver.execute_script("""

View File

@ -1,7 +1,7 @@
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
timeout = 4
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
WaitClickable = lambda driver,locator:WebDriverWait(driver, 10).until(EC.element_to_be_clickable(locator))
WaitDiv = lambda driver,locator:WebDriverWait(driver, 5).until(EC.presence_of_element_located(locator))
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, 5).until(EC.frame_to_be_available_and_switch_to_it(locator))
WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))