Update to new LMS, update chromdriver to 95, unspaghettified *some* code

This commit is contained in:
Peter 2021-11-14 01:53:15 +08:00
parent 8e76eb8b55
commit e3ed2765d6
7 changed files with 44 additions and 63 deletions

5
.gitignore vendored
View File

@ -1,5 +1,6 @@
grades/ grades*
tmp/ tmp/
__pycache__ __pycache__
chromedriver* chromedriver*
test* test*
.vscode/

View File

@ -20,4 +20,5 @@ Just made it able to download the graded results which may contain annotations.
## Note: ## Note:
* Does not download turnitin reports. You have to click the link manually to the feedback site. * Does not download turnitin reports. You have to click the link manually to the feedback site.
* Does not download multiple submission attempts - only downloads the last/graded attempt. * Does not download multiple submission attempts - only downloads the last/graded attempt.
* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already * Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.

56
main.py
View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3
import requests
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
# For chrome stuff # For chrome stuff
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@ -12,24 +13,14 @@ from selenium.webdriver.chrome.options import Options
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
import os import os
from os.path import sep from os.path import sep
import requests
import time
import getpass
import json
import re import re
import sys
import argparse import argparse
import pathlib
import utils.selectors from utils.asset import RequestStack
from utils.asset import Asset, RequestStack
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
from constants.constants import BASE_URL, DL_DIR from constants.constants import BASE_URL, DL_DIR
from utils.login import login from utils.login import login
from utils.selectors import Selectors from utils.utils import download_file, get_assignment_name, save_html
from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
import code
from random import randint
from pathlib import Path from pathlib import Path
from selenium.common.exceptions import ElementNotInteractableException from selenium.common.exceptions import ElementNotInteractableException
@ -40,8 +31,6 @@ try:
except: except:
def get_etc(*args): return False def get_etc(*args): return False
cookie = None
# stupid bug # stupid bug
def click_the_fing_button(driver,button): def click_the_fing_button(driver,button):
try: try:
@ -54,9 +43,9 @@ def click_the_fing_button(driver,button):
driver.maximize_window() driver.maximize_window()
# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once. # You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
def scrape_further(driver,path): def scrape_further(driver,path,session):
# attempts for bb-held tests # attempts for bb-held tests
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assessment')]") attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
attempts = [ x.get_attribute('href') for x in attempts ] attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts): for i, attempt in enumerate(attempts):
name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]" name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
@ -66,13 +55,13 @@ def scrape_further(driver,path):
driver.switch_to.window(driver.window_handles[2]) driver.switch_to.window(driver.window_handles[2])
save_html(path, name, driver.page_source) save_html(path, name, driver.page_source)
if testing: if testing:
get_etc(driver, cookie, path) get_etc(driver, session, path)
driver.close() driver.close()
driver.switch_to.window(driver.window_handles[1]) driver.switch_to.window(driver.window_handles[1])
# submission file for assignment # submission file for assignment
request_stack = RequestStack(cookie) request_stack = RequestStack(session)
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assignment/download')]") attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
attempts = [ x.get_attribute('href') for x in attempts ] attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts): for i, attempt in enumerate(attempts):
request_stack.add_file(attempt,path) request_stack.add_file(attempt,path)
@ -118,13 +107,16 @@ OPTIONS = Options()
OPTIONS.add_experimental_option("prefs", prefs) OPTIONS.add_experimental_option("prefs", prefs)
# OPTIONS.add_argument("--headless") # OPTIONS.add_argument("--headless")
driver = webdriver.Chrome( driver = webdriver.Chrome(
executable_path='chromedriver', executable_path='chromedriver.exe',
desired_capabilities=CAPABILITIES, desired_capabilities=CAPABILITIES,
options=OPTIONS options=OPTIONS
) )
driver.maximize_window() driver.maximize_window()
cookie = {'Cookie': login(args, driver)} # do Login. cookies = login(args, driver) # do Login.
session = requests.Session()
for cookie in cookies:
session.cookies.set(cookie["name"], cookie["value"])
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line. # need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses") driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
@ -137,15 +129,15 @@ driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygr
save_html(sep.join(path), 'entrypoint', driver.page_source) save_html(sep.join(path), 'entrypoint', driver.page_source)
# get courseIDs # get courseIDs
courses = driver.find_element_by_id("left_stream_mygrades")\ courses = driver.find_element(By.ID, "left_stream_mygrades")\
.find_elements_by_xpath("//div[@role='tab']") .find_elements(By.XPATH, "//div[@role='tab']")
course_details = [] course_details = []
for i, course_results in enumerate(courses): for i, course_results in enumerate(courses):
course_results = courses[i] course_results = courses[i]
ActionChains(driver).move_to_element(course_results).perform() ActionChains(driver).move_to_element(course_results).perform()
course_url = course_results.get_attribute("bb:rhs") course_url = course_results.get_attribute("bb:rhs")
course_name = course_results.find_elements_by_xpath("//span[@class='stream_area_name']")[i].text course_name = course_results.find_elements(By.XPATH, "//span[@class='stream_area_name']")[i].text
course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]" course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
course_details.append({ course_details.append({
'name': course_name, 'name': course_name,
@ -166,19 +158,19 @@ for i, course in enumerate(course_details):
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click() WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click() WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div") table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
for i, assignment in enumerate(table): for i, assignment in enumerate(table):
print(i) print(i)
buttons = assignment.find_elements_by_tag_name("input") buttons = assignment.find_elements(By.TAG_NAME, "input")
block = None block = None
assignment_name = None assignment_name = None
information_link = False information_link = False
try: try:
block = assignment.find_element_by_xpath("./div[@class='cell gradable']/a[@onclick]") block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
information_link = True information_link = True
except: except:
block = assignment.find_element_by_xpath("./div[@class='cell gradable']") block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']")
assignment_name = get_assignment_name(driver,block) assignment_name = get_assignment_name(driver,block)
path.append(assignment_name) path.append(assignment_name)
# download information if it exists. # download information if it exists.
@ -189,7 +181,7 @@ for i, course in enumerate(course_details):
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2)) WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
driver.switch_to.window(driver.window_handles[1]) driver.switch_to.window(driver.window_handles[1])
save_html(sep.join(path),"information",driver.page_source) save_html(sep.join(path),"information",driver.page_source)
scrape_further(driver, sep.join(path)) scrape_further(driver, sep.join(path), session)
driver.close() driver.close()
driver.switch_to.window(driver.window_handles[0]) driver.switch_to.window(driver.window_handles[0])
except ElementNotInteractableException: except ElementNotInteractableException:
@ -203,10 +195,10 @@ for i, course in enumerate(course_details):
driver.switch_to.window(driver.window_handles[1]) driver.switch_to.window(driver.window_handles[1])
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer")) WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
save_html(sep.join(path),"rubric",driver.page_source) save_html(sep.join(path),"rubric",driver.page_source)
driver.find_element_by_xpath("//li[@id='listViewTab']/a").click() driver.find_element(By.XPATH, "//li[@id='listViewTab']/a").click()
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList")) WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
save_html(sep.join(path),"list",driver.page_source) save_html(sep.join(path),"list",driver.page_source)
detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input") detailed_buttons = driver.find_elements(By.XPATH, "//div[@class='u_controlsWrapper']/input")
detailed_buttons[1].click() detailed_buttons[1].click()
detailed_buttons[0].click() detailed_buttons[0].click()
save_html(sep.join(path),"list_detailed",driver.page_source) save_html(sep.join(path),"list_detailed",driver.page_source)

View File

@ -1,4 +1,3 @@
import wget
from constants.constants import BASE_URL from constants.constants import BASE_URL
import re import re
import hashlib import hashlib
@ -26,6 +25,7 @@ class RequestStack:
def download_all(self): def download_all(self):
for file in self.request_stack: for file in self.request_stack:
print(f"\tDownloading {file.url}")
file.download(self.token) file.download(self.token)
class Asset: class Asset:
@ -36,14 +36,14 @@ class Asset:
self.path.mkdir(parents=True, exist_ok=True) self.path.mkdir(parents=True, exist_ok=True)
super().__init__() super().__init__()
def download(self,req_headers): def download(self,session):
response = requests.get(BASE_URL+self.url, stream=True, headers=req_headers, allow_redirects=False) response = session.get(BASE_URL+self.url, stream=True, allow_redirects=False)
headers = response.headers headers = response.headers
if response.status_code == 302 and len(headers['location']) > 0: if response.status_code == 302 and len(headers['location']) > 0:
Asset(headers['location'], self.path).download(req_headers) Asset(headers['location'], self.path).download(session)
return return
elif response.status_code != 200: elif response.status_code != 200:
print("Error "+str(response.status_code)) print("[!] Error "+str(response.status_code))
return response.status_code return response.status_code
headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
if 'Content-Disposition' in headers.keys(): if 'Content-Disposition' in headers.keys():

View File

@ -1,6 +1,6 @@
import sys
from utils.wait import WaitClickable from utils.wait import WaitClickable
from utils.selectors import Selectors from utils.selectors import Selectors
import sys
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from urllib.parse import urlparse from urllib.parse import urlparse
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
@ -9,16 +9,8 @@ from constants.constants import BASE_URL
import re import re
import json import json
def try_cookie(driver):
for entry in driver.get_log('performance'):
parameters = json.loads(entry["message"])['message']['params']
if (
'documentURL' in parameters.keys()
and re.search(r'https://lms.uwa.edu.au/webapps/portal.*', parameters['documentURL']) != None
):
return parameters['redirectResponse']['requestHeaders']['Cookie']
def login(args, driver): def login(args, driver):
driver.get(BASE_URL)
USERNAME = args.username USERNAME = args.username
if len(USERNAME) == 0: if len(USERNAME) == 0:
print('UserID: ') print('UserID: ')
@ -26,8 +18,6 @@ def login(args, driver):
USERNAME += '@student.uwa.edu.au' USERNAME += '@student.uwa.edu.au'
print('Password: ') print('Password: ')
PASSWORD = getpass('') PASSWORD = getpass('')
driver.get(BASE_URL)
WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME) WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
WaitClickable(driver,Selectors.BUTTON_NEXT).click() WaitClickable(driver,Selectors.BUTTON_NEXT).click()
@ -44,10 +34,10 @@ def login(args, driver):
WaitClickable(driver,Selectors.BUTTON_DENY).click() WaitClickable(driver,Selectors.BUTTON_DENY).click()
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments # WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
current_uri = urlparse(driver.current_url)
if '{uri.scheme}://{uri.netloc}'.format(uri=current_uri) != BASE_URL:
driver.quit()
print("Login failed.")
exit(-1)
return try_cookie(driver) cookie = driver.get_cookies()
if not cookie == None: return cookie
print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
driver.quit()
exit(1)

View File

@ -1,9 +1,6 @@
import pathlib import pathlib
import re import re
from constants.constants import DL_DIR from constants.constants import DL_DIR
from utils.wait import WaitClickable
from utils.asset import Asset
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
import time import time
import os import os

View File

@ -1,7 +1,7 @@
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
timeout = 4 timeout = 5
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name)) # find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator)) WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator)) WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator)) SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))