crappy sphaghetti code - inital release

This commit is contained in:
Peter 2021-06-18 03:01:14 +08:00
commit a4352dfd3e
10 changed files with 398 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
grades/
__pycache__
chromedriver*
test*

16
README.md Normal file
View File

@ -0,0 +1,16 @@
## Blackboard marks downloader (UWA)
---
**Dependencies**:
- python
- selenium
- chromedriver, placed relative to this directory
Run the script with `py main.py` and enter your student number and password. I'm not taking your personal details, but *don't my word for it* - always check the source if you don't trust it!
---
Made this script to download my marks, receipts and all the stuff I uploaded for my first semester. It's a fucking mess of spaghetti python code because to be honest I really just wanted to get this out of the way and have some time for other stuff after the first round of exams. It's a mess of code, with some bits (the login) being picked from the scraper script and some of the scraper asset objects being translated from ruby to python here (in a quick and incomplete way). This will probably will break in some way when the UI is overhauled for next semester :/
There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. It relies on a lot of js crap so I ended up using selenium all the way through. Doesn't download styles to save space, you'll have to download the css and js yourself and it has to be absolute because the script makes no effort to make the links relative.
This one was made for UWA but you may be able to tweak it for your institution (see constants.py).

1
constants/constants.py Normal file
View File

@ -0,0 +1 @@
BASE_URL = "https://lms.uwa.edu.au" # Include protocol.

192
main.py Normal file
View File

@ -0,0 +1,192 @@
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
# For chrome stuff
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
# ---
from urllib.parse import parse_qs, urlparse
import os
import requests
import time
import getpass
import json
import re
import sys
import argparse
import pathlib
import utils.selectors
from utils.asset import Asset, RequestStack
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
from constants.constants import BASE_URL
from utils.login import login
from utils.selectors import Selectors
from utils.utils import friendly_filename, get_assignment_name, get_text_excluding_children, save_html
import code
from random import randint
testing = False
try:
testing = True
from utils.test import get_etc
except:
def get_etc(*args): return False
cookie = None
def click_the_fing_button(driver,button):
# https://stackoverflow.com/a/67414801 stupid bug
try:
ActionChains(driver).move_to_element(button)
ActionChains(driver).click(button).perform()
WebDriverWait(driver,2).until(EC.number_of_windows_to_be(2))
except:
driver.set_window_size(1024, 768) # hack to wake selenium up when it doesnt want to click the button!
click_the_fing_button(driver,button)
driver.maximize_window()
# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
def scrape_further(driver,path):
# attempts for bb-held tests
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assessment')]")
attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts):
name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
attempt = re.sub("^"+BASE_URL,"",attempt)
driver.execute_script("window.open('"+BASE_URL+attempt+"')")
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(3))
driver.switch_to.window(driver.window_handles[2])
save_html(path, name, driver.page_source)
if testing:
get_etc(driver, cookie, path)
driver.close()
driver.switch_to.window(driver.window_handles[1])
# submission file for assignment
request_stack = RequestStack(cookie)
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assignment/download')]")
attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts):
request_stack.add_file(attempt,path)
request_stack.download_all()
parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
# parser.add_argument("-p", "--password", help="Automatically use provided password", default="")
parser.add_argument("-u", "--username", help="Automatically use provided userID", default="")
args = parser.parse_args()
CAPABILITIES = DesiredCapabilities.CHROME
CAPABILITIES['goog:loggingPrefs'] = {'performance': 'ALL'}
OPTIONS = Options()
# OPTIONS.add_argument("--headless")
driver = webdriver.Chrome(
executable_path='chromedriver',
desired_capabilities=CAPABILITIES,
options=OPTIONS
)
driver.maximize_window()
cookie = {'Cookie': login(args, driver)} # do Login.
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
try:
WaitClickable(driver,(By.CLASS_NAME, "button-1")).click()
except:
print("no tos warning - skipped")
SwitchToIFrame(driver, (By.ID, 'mybbCanvas'))
# get courseIDs
courses = driver.find_element_by_id("left_stream_mygrades")\
.find_elements_by_xpath("//div[@role='tab']")
course_details = []
for i, course_results in enumerate(courses):
course_results = courses[i]
ActionChains(driver).move_to_element(course_results).perform()
course_url = course_results.get_attribute("bb:rhs")
course_name = course_results.find_elements_by_xpath("//span[@class='stream_area_name']")[i].text
course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
course_details.append({
'name': course_name,
'url' : course_url
})
path = ['grades']
for i, course in enumerate(course_details):
path.append(course['name']) # course name
print(course['name'])
driver.get(BASE_URL+course['url'])
driver.execute_script("""
mygrades.loadContentFrame = function(url) {
window.open(url);
}
""")
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
save_html("/".join(path), path[0], driver.page_source)
for i, assignment in enumerate(table):
print(i)
buttons = assignment.find_elements_by_tag_name("input")
block = None
assignment_name = None
information_link = False
try:
block = assignment.find_element_by_xpath("./div[@class='cell gradable']/a[@onclick]")
information_link = True
except:
block = assignment.find_element_by_xpath("./div[@class='cell gradable']")
assignment_name = get_assignment_name(driver,block)
path.append(assignment_name)
# download information if it exists.
if information_link:
ActionChains(driver).move_to_element(block).click(block).perform()
print("Switched "+assignment_name)
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
driver.switch_to.window(driver.window_handles[1])
save_html("/".join(path),"information",driver.page_source)
scrape_further(driver, "/".join(path))
driver.close()
driver.switch_to.window(driver.window_handles[0])
# download rubric if it exists.
for button in buttons:
action = button.get_attribute("onclick")
if action != None and "showInLightBox" not in action:
click_the_fing_button(driver,button)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
driver.switch_to.window(driver.window_handles[1])
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
save_html("/".join(path),"rubric",driver.page_source)
driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
save_html("/".join(path),"list",driver.page_source)
detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
detailed_buttons[1].click()
detailed_buttons[0].click()
save_html("/".join(path),"list_detailed",driver.page_source)
driver.close()
driver.switch_to.window(driver.window_handles[0])
path.pop()
WaitClickable(driver,(By.XPATH,"//a[@value='S']")).click()
save_html("/".join(path),"submitted",driver.page_source)
try:
WaitClickable(driver,(By.XPATH,"//div[@id='submissionReceipts']//a")).click()
WaitClickable(driver,(By.XPATH,"//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
except:
print('No items?')
save_html("/".join(path),"receipts",driver.page_source)
path.pop()
driver.quit()

3
utils/__init__.py Normal file
View File

@ -0,0 +1,3 @@
# https://stackoverflow.com/a/49375740
import os, sys
sys.path.append(os.path.dirname(os.path.realpath(__file__)))

76
utils/asset.py Normal file
View File

@ -0,0 +1,76 @@
import wget
from constants.constants import BASE_URL
import re
import hashlib
import requests
import shutil
import csv
from pathlib import Path
def convert_filename(name, hash):
_name = name.split('.')
if len(_name) > 1:
_name[-2] += ("["+hash+"]")
else:
_name[0] += ("["+hash+"]")
return '.'.join(_name)
class RequestStack:
def __init__(self,token):
self.request_stack = []
self.token = token
super().__init__()
def add_file(self,url,path):
self.request_stack.append(Asset(url,path))
def download_all(self):
for file in self.request_stack:
file.download(self.token)
class Asset:
def __init__(self,url,path):
self.path = Path(path)
self.url = re.sub("^"+BASE_URL,"",url)
# self.file_id = re.findall('file_id=(.+)&',url)
self.path.mkdir(parents=True, exist_ok=True)
super().__init__()
def download(self,req_headers):
response = requests.get(BASE_URL+self.url, stream=True, headers=req_headers, allow_redirects=False)
headers = response.headers
if response.status_code == 302 and len(headers['location']) > 0:
Asset(headers['location'], self.path).download(req_headers)
return
elif response.status_code != 200:
print("Error "+str(response.status_code))
return response.status_code
headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
if 'Content-Disposition' in headers.keys():
self.original_filename = re.findall('filename="(.+)"', headers['Content-Disposition'])[0]
else:
self.original_filename = re.sub(".*/","",self.url)
self.etag_hash = hashlib.md5(headers['ETag'].encode()).hexdigest()
self.filename = convert_filename(self.original_filename, self.etag_hash[0:6])
with open(self.path.joinpath(self.filename), 'wb') as f:
shutil.copyfileobj(response.raw, f)
self.write_metadata(headers)
def write_metadata(self,headers):
metacsv = [
["original_filename", self.original_filename],
["readable_filename", self.filename],
["url", self.url],
["pathhash", hashlib.md5(self.url.encode()).hexdigest()],
["etag", headers['ETag']],
["etaghash", self.etag_hash],
["last-modified", headers["Last-Modified"]],
["content-length", headers["Content-Length"]],
["age", ""],
]
csvpath = self.path.joinpath("ZZZ_metadata")
csvpath.mkdir(parents=True, exist_ok=True)
with open(csvpath.joinpath(self.filename+"__metadata.csv"), "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(metacsv)

53
utils/login.py Normal file
View File

@ -0,0 +1,53 @@
from utils.wait import WaitClickable
from utils.selectors import Selectors
import sys
from selenium.webdriver.support.wait import WebDriverWait
from urllib.parse import urlparse
from selenium.webdriver.support import expected_conditions as EC
from getpass import getpass
from constants.constants import BASE_URL
import re
import json
def try_cookie(driver):
for entry in driver.get_log('performance'):
parameters = json.loads(entry["message"])['message']['params']
if (
'documentURL' in parameters.keys()
and re.search(r'https://lms.uwa.edu.au/webapps/portal.*', parameters['documentURL']) != None
):
return parameters['redirectResponse']['requestHeaders']['Cookie']
def login(args, driver):
USERNAME = args.username
if len(USERNAME) == 0:
print('UserID: ')
USERNAME = input()
USERNAME += '@student.uwa.edu.au'
print('Password: ')
PASSWORD = getpass('')
driver.get(BASE_URL)
WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
WaitClickable(driver,Selectors.BUTTON_NEXT).click()
print('Entered username.')
try:
WaitClickable(driver,Selectors.BOX_PASSWORD).send_keys(PASSWORD)
WaitClickable(driver,Selectors.BUTTON_NEXT).click()
print('Entered password.')
except:
print(WebDriverWait(driver, 1).until(EC.visibility_of_element_located(Selectors.DIV_USERERROR)).text)
driver.quit()
exit(2)
WaitClickable(driver,Selectors.BUTTON_DENY).click()
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
current_uri = urlparse(driver.current_url)
if '{uri.scheme}://{uri.netloc}'.format(uri=current_uri) != BASE_URL:
driver.quit()
print("Login failed.")
exit(-1)
return try_cookie(driver)

10
utils/selectors.py Normal file
View File

@ -0,0 +1,10 @@
from selenium.webdriver.common.by import By
class Selectors:
# Microsoft login
BOX_USERNAME = (By.ID, "i0116")
BOX_PASSWORD = (By.ID, "i0118")
DIV_USERERROR = (By.ID, 'usernameError')
BUTTON_NEXT = (By.ID, "idSIButton9")
BUTTON_DENY = (By.ID, "idBtn_Back")
# Selectors for grades

36
utils/utils.py Normal file
View File

@ -0,0 +1,36 @@
import pathlib
import re
def friendly_filename(name):
name = friendly_dirname(name)
return re.sub("[\\\/]",'',name)
def friendly_dirname(name):
#.gsub(/[^\w\s_-]+/, '')
# .gsub(/\s+/, '_')
# pipeline:
name = re.sub("[\x00-\x1f]",'',name)
name = re.sub("[\:\<\>\"\|\?\*]",'',name)
name = re.sub("(^|\b\s)\s+($|\s?\b)", '\\1\\2', name)
return name.strip()
def get_assignment_name(driver,block):
s = friendly_filename(get_text_excluding_children(driver,block))
print("Assesment: "+s)
return s
def save_html(dir,filename,page_source):
dir = pathlib.Path(friendly_dirname(dir))
dir.mkdir(parents=True, exist_ok=True)
file = dir.joinpath(friendly_filename(filename)+".html")
with open(file, "w", encoding="utf-8") as f:
f.write(page_source)
# https://stackoverflow.com/a/19040341
def get_text_excluding_children(driver, element):
return driver.execute_script("""
return jQuery(arguments[0]).contents().filter(function() {
return this.nodeType == Node.TEXT_NODE;
}).text();
""", element)

7
utils/wait.py Normal file
View File

@ -0,0 +1,7 @@
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
WaitClickable = lambda driver,locator:WebDriverWait(driver, 10).until(EC.element_to_be_clickable(locator))
WaitDiv = lambda driver,locator:WebDriverWait(driver, 5).until(EC.presence_of_element_located(locator))
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, 5).until(EC.frame_to_be_available_and_switch_to_it(locator))