mirror of
https://github.com/peter-tanner/Blackboard-marks.git
synced 2024-11-30 11:40:16 +08:00
Update to new LMS, update chromdriver to 95, unspaghettified *some* code
This commit is contained in:
parent
8e76eb8b55
commit
e3ed2765d6
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
||||||
grades/
|
grades*
|
||||||
tmp/
|
tmp/
|
||||||
__pycache__
|
__pycache__
|
||||||
chromedriver*
|
chromedriver*
|
||||||
test*
|
test*
|
||||||
|
.vscode/
|
|
@ -20,4 +20,5 @@ Just made it able to download the graded results which may contain annotations.
|
||||||
## Note:
|
## Note:
|
||||||
* Does not download turnitin reports. You have to click the link manually to the feedback site.
|
* Does not download turnitin reports. You have to click the link manually to the feedback site.
|
||||||
* Does not download multiple submission attempts - only downloads the last/graded attempt.
|
* Does not download multiple submission attempts - only downloads the last/graded attempt.
|
||||||
* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
|
* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
|
||||||
|
* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
|
56
main.py
56
main.py
|
@ -1,9 +1,10 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import requests
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.common.keys import Keys
|
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
# For chrome stuff
|
# For chrome stuff
|
||||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
|
@ -12,24 +13,14 @@ from selenium.webdriver.chrome.options import Options
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
import os
|
import os
|
||||||
from os.path import sep
|
from os.path import sep
|
||||||
import requests
|
|
||||||
import time
|
|
||||||
import getpass
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
import argparse
|
import argparse
|
||||||
import pathlib
|
|
||||||
|
|
||||||
import utils.selectors
|
from utils.asset import RequestStack
|
||||||
from utils.asset import Asset, RequestStack
|
|
||||||
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
|
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
|
||||||
from constants.constants import BASE_URL, DL_DIR
|
from constants.constants import BASE_URL, DL_DIR
|
||||||
from utils.login import login
|
from utils.login import login
|
||||||
from utils.selectors import Selectors
|
from utils.utils import download_file, get_assignment_name, save_html
|
||||||
from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
|
|
||||||
import code
|
|
||||||
from random import randint
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from selenium.common.exceptions import ElementNotInteractableException
|
from selenium.common.exceptions import ElementNotInteractableException
|
||||||
|
|
||||||
|
@ -40,8 +31,6 @@ try:
|
||||||
except:
|
except:
|
||||||
def get_etc(*args): return False
|
def get_etc(*args): return False
|
||||||
|
|
||||||
cookie = None
|
|
||||||
|
|
||||||
# stupid bug
|
# stupid bug
|
||||||
def click_the_fing_button(driver,button):
|
def click_the_fing_button(driver,button):
|
||||||
try:
|
try:
|
||||||
|
@ -54,9 +43,9 @@ def click_the_fing_button(driver,button):
|
||||||
driver.maximize_window()
|
driver.maximize_window()
|
||||||
|
|
||||||
# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
|
# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
|
||||||
def scrape_further(driver,path):
|
def scrape_further(driver,path,session):
|
||||||
# attempts for bb-held tests
|
# attempts for bb-held tests
|
||||||
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assessment')]")
|
attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
|
||||||
attempts = [ x.get_attribute('href') for x in attempts ]
|
attempts = [ x.get_attribute('href') for x in attempts ]
|
||||||
for i, attempt in enumerate(attempts):
|
for i, attempt in enumerate(attempts):
|
||||||
name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
|
name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
|
||||||
|
@ -66,13 +55,13 @@ def scrape_further(driver,path):
|
||||||
driver.switch_to.window(driver.window_handles[2])
|
driver.switch_to.window(driver.window_handles[2])
|
||||||
save_html(path, name, driver.page_source)
|
save_html(path, name, driver.page_source)
|
||||||
if testing:
|
if testing:
|
||||||
get_etc(driver, cookie, path)
|
get_etc(driver, session, path)
|
||||||
driver.close()
|
driver.close()
|
||||||
driver.switch_to.window(driver.window_handles[1])
|
driver.switch_to.window(driver.window_handles[1])
|
||||||
|
|
||||||
# submission file for assignment
|
# submission file for assignment
|
||||||
request_stack = RequestStack(cookie)
|
request_stack = RequestStack(session)
|
||||||
attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assignment/download')]")
|
attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
|
||||||
attempts = [ x.get_attribute('href') for x in attempts ]
|
attempts = [ x.get_attribute('href') for x in attempts ]
|
||||||
for i, attempt in enumerate(attempts):
|
for i, attempt in enumerate(attempts):
|
||||||
request_stack.add_file(attempt,path)
|
request_stack.add_file(attempt,path)
|
||||||
|
@ -118,13 +107,16 @@ OPTIONS = Options()
|
||||||
OPTIONS.add_experimental_option("prefs", prefs)
|
OPTIONS.add_experimental_option("prefs", prefs)
|
||||||
# OPTIONS.add_argument("--headless")
|
# OPTIONS.add_argument("--headless")
|
||||||
driver = webdriver.Chrome(
|
driver = webdriver.Chrome(
|
||||||
executable_path='chromedriver',
|
executable_path='chromedriver.exe',
|
||||||
desired_capabilities=CAPABILITIES,
|
desired_capabilities=CAPABILITIES,
|
||||||
options=OPTIONS
|
options=OPTIONS
|
||||||
)
|
)
|
||||||
driver.maximize_window()
|
driver.maximize_window()
|
||||||
|
|
||||||
cookie = {'Cookie': login(args, driver)} # do Login.
|
cookies = login(args, driver) # do Login.
|
||||||
|
session = requests.Session()
|
||||||
|
for cookie in cookies:
|
||||||
|
session.cookies.set(cookie["name"], cookie["value"])
|
||||||
|
|
||||||
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
|
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
|
||||||
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
|
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
|
||||||
|
@ -137,15 +129,15 @@ driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygr
|
||||||
save_html(sep.join(path), 'entrypoint', driver.page_source)
|
save_html(sep.join(path), 'entrypoint', driver.page_source)
|
||||||
|
|
||||||
# get courseIDs
|
# get courseIDs
|
||||||
courses = driver.find_element_by_id("left_stream_mygrades")\
|
courses = driver.find_element(By.ID, "left_stream_mygrades")\
|
||||||
.find_elements_by_xpath("//div[@role='tab']")
|
.find_elements(By.XPATH, "//div[@role='tab']")
|
||||||
|
|
||||||
course_details = []
|
course_details = []
|
||||||
for i, course_results in enumerate(courses):
|
for i, course_results in enumerate(courses):
|
||||||
course_results = courses[i]
|
course_results = courses[i]
|
||||||
ActionChains(driver).move_to_element(course_results).perform()
|
ActionChains(driver).move_to_element(course_results).perform()
|
||||||
course_url = course_results.get_attribute("bb:rhs")
|
course_url = course_results.get_attribute("bb:rhs")
|
||||||
course_name = course_results.find_elements_by_xpath("//span[@class='stream_area_name']")[i].text
|
course_name = course_results.find_elements(By.XPATH, "//span[@class='stream_area_name']")[i].text
|
||||||
course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
|
course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
|
||||||
course_details.append({
|
course_details.append({
|
||||||
'name': course_name,
|
'name': course_name,
|
||||||
|
@ -166,19 +158,19 @@ for i, course in enumerate(course_details):
|
||||||
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
|
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
|
||||||
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
|
WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
|
||||||
|
|
||||||
table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
|
table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
|
||||||
|
|
||||||
for i, assignment in enumerate(table):
|
for i, assignment in enumerate(table):
|
||||||
print(i)
|
print(i)
|
||||||
buttons = assignment.find_elements_by_tag_name("input")
|
buttons = assignment.find_elements(By.TAG_NAME, "input")
|
||||||
block = None
|
block = None
|
||||||
assignment_name = None
|
assignment_name = None
|
||||||
information_link = False
|
information_link = False
|
||||||
try:
|
try:
|
||||||
block = assignment.find_element_by_xpath("./div[@class='cell gradable']/a[@onclick]")
|
block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
|
||||||
information_link = True
|
information_link = True
|
||||||
except:
|
except:
|
||||||
block = assignment.find_element_by_xpath("./div[@class='cell gradable']")
|
block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']")
|
||||||
assignment_name = get_assignment_name(driver,block)
|
assignment_name = get_assignment_name(driver,block)
|
||||||
path.append(assignment_name)
|
path.append(assignment_name)
|
||||||
# download information if it exists.
|
# download information if it exists.
|
||||||
|
@ -189,7 +181,7 @@ for i, course in enumerate(course_details):
|
||||||
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
|
WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
|
||||||
driver.switch_to.window(driver.window_handles[1])
|
driver.switch_to.window(driver.window_handles[1])
|
||||||
save_html(sep.join(path),"information",driver.page_source)
|
save_html(sep.join(path),"information",driver.page_source)
|
||||||
scrape_further(driver, sep.join(path))
|
scrape_further(driver, sep.join(path), session)
|
||||||
driver.close()
|
driver.close()
|
||||||
driver.switch_to.window(driver.window_handles[0])
|
driver.switch_to.window(driver.window_handles[0])
|
||||||
except ElementNotInteractableException:
|
except ElementNotInteractableException:
|
||||||
|
@ -203,10 +195,10 @@ for i, course in enumerate(course_details):
|
||||||
driver.switch_to.window(driver.window_handles[1])
|
driver.switch_to.window(driver.window_handles[1])
|
||||||
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
|
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
|
||||||
save_html(sep.join(path),"rubric",driver.page_source)
|
save_html(sep.join(path),"rubric",driver.page_source)
|
||||||
driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
|
driver.find_element(By.XPATH, "//li[@id='listViewTab']/a").click()
|
||||||
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
|
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
|
||||||
save_html(sep.join(path),"list",driver.page_source)
|
save_html(sep.join(path),"list",driver.page_source)
|
||||||
detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
|
detailed_buttons = driver.find_elements(By.XPATH, "//div[@class='u_controlsWrapper']/input")
|
||||||
detailed_buttons[1].click()
|
detailed_buttons[1].click()
|
||||||
detailed_buttons[0].click()
|
detailed_buttons[0].click()
|
||||||
save_html(sep.join(path),"list_detailed",driver.page_source)
|
save_html(sep.join(path),"list_detailed",driver.page_source)
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import wget
|
|
||||||
from constants.constants import BASE_URL
|
from constants.constants import BASE_URL
|
||||||
import re
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -26,6 +25,7 @@ class RequestStack:
|
||||||
|
|
||||||
def download_all(self):
|
def download_all(self):
|
||||||
for file in self.request_stack:
|
for file in self.request_stack:
|
||||||
|
print(f"\tDownloading {file.url}")
|
||||||
file.download(self.token)
|
file.download(self.token)
|
||||||
|
|
||||||
class Asset:
|
class Asset:
|
||||||
|
@ -36,14 +36,14 @@ class Asset:
|
||||||
self.path.mkdir(parents=True, exist_ok=True)
|
self.path.mkdir(parents=True, exist_ok=True)
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def download(self,req_headers):
|
def download(self,session):
|
||||||
response = requests.get(BASE_URL+self.url, stream=True, headers=req_headers, allow_redirects=False)
|
response = session.get(BASE_URL+self.url, stream=True, allow_redirects=False)
|
||||||
headers = response.headers
|
headers = response.headers
|
||||||
if response.status_code == 302 and len(headers['location']) > 0:
|
if response.status_code == 302 and len(headers['location']) > 0:
|
||||||
Asset(headers['location'], self.path).download(req_headers)
|
Asset(headers['location'], self.path).download(session)
|
||||||
return
|
return
|
||||||
elif response.status_code != 200:
|
elif response.status_code != 200:
|
||||||
print("Error "+str(response.status_code))
|
print("[!] Error "+str(response.status_code))
|
||||||
return response.status_code
|
return response.status_code
|
||||||
headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
|
headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
|
||||||
if 'Content-Disposition' in headers.keys():
|
if 'Content-Disposition' in headers.keys():
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
|
import sys
|
||||||
from utils.wait import WaitClickable
|
from utils.wait import WaitClickable
|
||||||
from utils.selectors import Selectors
|
from utils.selectors import Selectors
|
||||||
import sys
|
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
@ -9,16 +9,8 @@ from constants.constants import BASE_URL
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
def try_cookie(driver):
|
|
||||||
for entry in driver.get_log('performance'):
|
|
||||||
parameters = json.loads(entry["message"])['message']['params']
|
|
||||||
if (
|
|
||||||
'documentURL' in parameters.keys()
|
|
||||||
and re.search(r'https://lms.uwa.edu.au/webapps/portal.*', parameters['documentURL']) != None
|
|
||||||
):
|
|
||||||
return parameters['redirectResponse']['requestHeaders']['Cookie']
|
|
||||||
|
|
||||||
def login(args, driver):
|
def login(args, driver):
|
||||||
|
driver.get(BASE_URL)
|
||||||
USERNAME = args.username
|
USERNAME = args.username
|
||||||
if len(USERNAME) == 0:
|
if len(USERNAME) == 0:
|
||||||
print('UserID: ')
|
print('UserID: ')
|
||||||
|
@ -26,8 +18,6 @@ def login(args, driver):
|
||||||
USERNAME += '@student.uwa.edu.au'
|
USERNAME += '@student.uwa.edu.au'
|
||||||
print('Password: ')
|
print('Password: ')
|
||||||
PASSWORD = getpass('')
|
PASSWORD = getpass('')
|
||||||
|
|
||||||
driver.get(BASE_URL)
|
|
||||||
|
|
||||||
WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
|
WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
|
||||||
WaitClickable(driver,Selectors.BUTTON_NEXT).click()
|
WaitClickable(driver,Selectors.BUTTON_NEXT).click()
|
||||||
|
@ -44,10 +34,10 @@ def login(args, driver):
|
||||||
|
|
||||||
WaitClickable(driver,Selectors.BUTTON_DENY).click()
|
WaitClickable(driver,Selectors.BUTTON_DENY).click()
|
||||||
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
|
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
|
||||||
current_uri = urlparse(driver.current_url)
|
|
||||||
if '{uri.scheme}://{uri.netloc}'.format(uri=current_uri) != BASE_URL:
|
|
||||||
driver.quit()
|
|
||||||
print("Login failed.")
|
|
||||||
exit(-1)
|
|
||||||
|
|
||||||
return try_cookie(driver)
|
cookie = driver.get_cookies()
|
||||||
|
if not cookie == None: return cookie
|
||||||
|
|
||||||
|
print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
|
||||||
|
driver.quit()
|
||||||
|
exit(1)
|
|
@ -1,9 +1,6 @@
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
from constants.constants import DL_DIR
|
from constants.constants import DL_DIR
|
||||||
from utils.wait import WaitClickable
|
|
||||||
from utils.asset import Asset
|
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
timeout = 4
|
timeout = 5
|
||||||
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
|
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
|
||||||
WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
|
WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
|
||||||
WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
|
WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
|
||||||
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
|
SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
|
Loading…
Reference in New Issue
Block a user