Update to new LMS, update chromdriver to 95, unspaghettified *some* code

This commit is contained in:
Peter 2021-11-14 01:53:15 +08:00
parent 8e76eb8b55
commit e3ed2765d6
7 changed files with 44 additions and 63 deletions

3
.gitignore vendored
View File

@ -1,5 +1,6 @@
grades/
grades*
tmp/
__pycache__
chromedriver*
test*
.vscode/

View File

@ -21,3 +21,4 @@ Just made it able to download the graded results which may contain annotations.
* Does not download turnitin reports. You have to click the link manually to the feedback site.
* Does not download multiple submission attempts - only downloads the last/graded attempt.
* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.

56
main.py
View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3
import requests
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
# For chrome stuff
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@ -12,24 +13,14 @@ from selenium.webdriver.chrome.options import Options
from urllib.parse import parse_qs, urlparse
import os
from os.path import sep
import requests
import time
import getpass
import json
import re
import sys
import argparse
import pathlib
import utils.selectors
from utils.asset import Asset, RequestStack
from utils.asset import RequestStack
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
from constants.constants import BASE_URL, DL_DIR
from utils.login import login
from utils.selectors import Selectors
from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
import code
from random import randint
from utils.utils import download_file, get_assignment_name, save_html
from pathlib import Path
from selenium.common.exceptions import ElementNotInteractableException
@ -40,8 +31,6 @@ try:
except:
def get_etc(*args): return False
cookie = None
# stupid bug
def click_the_fing_button(driver,button):
try:
@ -54,9 +43,9 @@ def click_the_fing_button(driver,button):
driver.maximize_window()
# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
def scrape_further(driver,path):
def scrape_further(driver,path,session):
# attempts for bb-held tests
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assessment')]")
attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts):
name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
@ -66,13 +55,13 @@ def scrape_further(driver,path):
driver.switch_to.window(driver.window_handles[2])
save_html(path, name, driver.page_source)
if testing:
get_etc(driver, cookie, path)
get_etc(driver, session, path)
driver.close()
driver.switch_to.window(driver.window_handles[1])
# submission file for assignment
request_stack = RequestStack(cookie)
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assignment/download')]")
request_stack = RequestStack(session)
attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
attempts = [ x.get_attribute('href') for x in attempts ]
for i, attempt in enumerate(attempts):
request_stack.add_file(attempt,path)
@ -118,13 +107,16 @@ OPTIONS = Options()
OPTIONS.add_experimental_option("prefs", prefs)
# OPTIONS.add_argument("--headless")
driver = webdriver.Chrome(
executable_path='chromedriver',
executable_path='chromedriver.exe',
desired_capabilities=CAPABILITIES,
options=OPTIONS
)
driver.maximize_window()
cookie = {'Cookie': login(args, driver)} # do Login.
cookies = login(args, driver) # do Login.
session = requests.Session()
for cookie in cookies:
session.cookies.set(cookie["name"], cookie["value"])
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
@ -137,15 +129,15 @@ driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygr
save_html(sep.join(path), 'entrypoint', driver.page_source)
# get courseIDs
courses = driver.find_element_by_id("left_stream_mygrades")\
.find_elements_by_xpath("//div[@role='tab']")
courses = driver.find_element(By.ID, "left_stream_mygrades")\
.find_elements(By.XPATH, "//div[@role='tab']")
course_details = []
for i, course_results in enumerate(courses):
course_results = courses[i]
ActionChains(driver).move_to_element(course_results).perform()
course_url = course_results.get_attribute("bb:rhs")
course_name = course_results.find_elements_by_xpath("//span[@class='stream_area_name']")[i].text
course_name = course_results.find_elements(By.XPATH, "//span[@class='stream_area_name']")[i].text
course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
course_details.append({
'name': course_name,
@ -166,19 +158,19 @@ for i, course in enumerate(course_details):
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
for i, assignment in enumerate(table):
print(i)
buttons = assignment.find_elements_by_tag_name("input")
buttons = assignment.find_elements(By.TAG_NAME, "input")
block = None
assignment_name = None
information_link = False
try:
block = assignment.find_element_by_xpath("./div[@class='cell gradable']/a[@onclick]")
block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
information_link = True
except:
block = assignment.find_element_by_xpath("./div[@class='cell gradable']")
block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']")
assignment_name = get_assignment_name(driver,block)
path.append(assignment_name)
# download information if it exists.
@ -189,7 +181,7 @@ for i, course in enumerate(course_details):
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
driver.switch_to.window(driver.window_handles[1])
save_html(sep.join(path),"information",driver.page_source)
scrape_further(driver, sep.join(path))
scrape_further(driver, sep.join(path), session)
driver.close()
driver.switch_to.window(driver.window_handles[0])
except ElementNotInteractableException:
@ -203,10 +195,10 @@ for i, course in enumerate(course_details):
driver.switch_to.window(driver.window_handles[1])
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
save_html(sep.join(path),"rubric",driver.page_source)
driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
driver.find_element(By.XPATH, "//li[@id='listViewTab']/a").click()
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
save_html(sep.join(path),"list",driver.page_source)
detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
detailed_buttons = driver.find_elements(By.XPATH, "//div[@class='u_controlsWrapper']/input")
detailed_buttons[1].click()
detailed_buttons[0].click()
save_html(sep.join(path),"list_detailed",driver.page_source)

View File

@ -1,4 +1,3 @@
import wget
from constants.constants import BASE_URL
import re
import hashlib
@ -26,6 +25,7 @@ class RequestStack:
def download_all(self):
for file in self.request_stack:
print(f"\tDownloading {file.url}")
file.download(self.token)
class Asset:
@ -36,14 +36,14 @@ class Asset:
self.path.mkdir(parents=True, exist_ok=True)
super().__init__()
def download(self,req_headers):
response = requests.get(BASE_URL+self.url, stream=True, headers=req_headers, allow_redirects=False)
def download(self,session):
response = session.get(BASE_URL+self.url, stream=True, allow_redirects=False)
headers = response.headers
if response.status_code == 302 and len(headers['location']) > 0:
Asset(headers['location'], self.path).download(req_headers)
Asset(headers['location'], self.path).download(session)
return
elif response.status_code != 200:
print("Error "+str(response.status_code))
print("[!] Error "+str(response.status_code))
return response.status_code
headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
if 'Content-Disposition' in headers.keys():

View File

@ -1,6 +1,6 @@
import sys
from utils.wait import WaitClickable
from utils.selectors import Selectors
import sys
from selenium.webdriver.support.wait import WebDriverWait
from urllib.parse import urlparse
from selenium.webdriver.support import expected_conditions as EC
@ -9,16 +9,8 @@ from constants.constants import BASE_URL
import re
import json
def try_cookie(driver):
for entry in driver.get_log('performance'):
parameters = json.loads(entry["message"])['message']['params']
if (
'documentURL' in parameters.keys()
and re.search(r'https://lms.uwa.edu.au/webapps/portal.*', parameters['documentURL']) != None
):
return parameters['redirectResponse']['requestHeaders']['Cookie']
def login(args, driver):
driver.get(BASE_URL)
USERNAME = args.username
if len(USERNAME) == 0:
print('UserID: ')
@ -27,8 +19,6 @@ def login(args, driver):
print('Password: ')
PASSWORD = getpass('')
driver.get(BASE_URL)
WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
WaitClickable(driver,Selectors.BUTTON_NEXT).click()
print('Entered username.')
@ -44,10 +34,10 @@ def login(args, driver):
WaitClickable(driver,Selectors.BUTTON_DENY).click()
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
current_uri = urlparse(driver.current_url)
if '{uri.scheme}://{uri.netloc}'.format(uri=current_uri) != BASE_URL:
driver.quit()
print("Login failed.")
exit(-1)
return try_cookie(driver)
cookie = driver.get_cookies()
if not cookie == None: return cookie
print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
driver.quit()
exit(1)

View File

@ -1,9 +1,6 @@
import pathlib
import re
from constants.constants import DL_DIR
from utils.wait import WaitClickable
from utils.asset import Asset
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

View File

@ -1,7 +1,7 @@
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
timeout = 4
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
timeout = 5
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))