mirror of
https://github.com/peter-tanner/Blackboard-marks.git
synced 2024-11-30 11:40:16 +08:00
update selenium, 2023 version
This commit is contained in:
parent
234cd31bf4
commit
c97582bef2
1
.gitignore
vendored
Normal file → Executable file
1
.gitignore
vendored
Normal file → Executable file
|
@ -6,3 +6,4 @@ test*
|
||||||
.vscode/
|
.vscode/
|
||||||
*.7z
|
*.7z
|
||||||
*.tar
|
*.tar
|
||||||
|
.venv
|
18
README.md
Normal file → Executable file
18
README.md
Normal file → Executable file
|
@ -1,24 +1,28 @@
|
||||||
## Blackboard marks downloader (UWA)
|
## Blackboard marks downloader (UWA)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**Dependencies**:
|
**Dependencies**:
|
||||||
|
|
||||||
- python
|
- python
|
||||||
- selenium
|
- selenium
|
||||||
- chromedriver, placed relative to this directory
|
- chromedriver, placed relative to this directory
|
||||||
|
|
||||||
Run the script with `py main.py` and enter your student number and password. I'm not taking your personal details, but *don't take my word for it* - always check the source if you don't trust it!
|
Run the script with `py main.py` and enter your student number and password. I'm not taking your personal details, but _don't take my word for it_ - always check the source if you don't trust it!
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
Made this script to download my marks, receipts and all the stuff I uploaded for my first semester. It's a fucking mess of spaghetti python code because to be honest I really just wanted to get this out of the way and have some time for other stuff after the first round of exams. It's a mess of code, with some bits (the login) being picked from the scraper script and some of the scraper asset objects being translated from ruby to python here (in a quick and incomplete way). This will probably will break in some way when the UI is overhauled for next semester :/
|
Made this script to download my marks, receipts and all the stuff I uploaded for my first semester.
|
||||||
|
|
||||||
There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. It relies on a lot of js crap so I ended up using selenium all the way through. Doesn't download styles to save space, you'll have to download the css and js yourself and it has to be absolute because the script makes no effort to make the links relative.
|
There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. Saves visited pages to `URLS.txt` so you can use something like SingleFile extension and use their batch save url feature to save the list of urls visited (I recommend enabling scripts in the singlefile settings so that comments are saved)
|
||||||
|
|
||||||
This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
|
This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
|
||||||
|
|
||||||
Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
|
Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
|
||||||
|
|
||||||
## Note:
|
## Note:
|
||||||
* Does not download turnitin reports. You have to click the link manually to the feedback site.
|
|
||||||
* Does not download multiple submission attempts - only downloads the last/graded attempt.
|
- Does not download turnitin reports. You have to click the link manually to the feedback site.
|
||||||
* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
|
- Does not download multiple submission attempts - only downloads the last/graded attempt.
|
||||||
* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
|
- Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
|
||||||
|
- Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
|
||||||
|
|
8
constants/constants.py
Normal file → Executable file
8
constants/constants.py
Normal file → Executable file
|
@ -1,7 +1,11 @@
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
|
BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
|
||||||
|
|
||||||
DL_DIR = os.getcwd()+os.path.sep+"tmp"+os.path.sep
|
DL_DIR = os.getcwd() + os.path.sep + "tmp" + os.path.sep
|
||||||
Path(DL_DIR).mkdir(parents=True, exist_ok=True)
|
Path(DL_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
SAVE_DIR = "grades"
|
||||||
|
|
||||||
|
URL_LIST = SAVE_DIR + os.path.sep + "URLS.txt"
|
||||||
|
|
80
main.py
Normal file → Executable file
80
main.py
Normal file → Executable file
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from selenium.webdriver.remote.webdriver import WebDriver
|
||||||
|
from typing import cast
|
||||||
import requests
|
import requests
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
@ -18,48 +20,55 @@ import argparse
|
||||||
|
|
||||||
from utils.asset import RequestStack
|
from utils.asset import RequestStack
|
||||||
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
|
from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
|
||||||
from constants.constants import BASE_URL, DL_DIR
|
from constants.constants import BASE_URL, DL_DIR, SAVE_DIR
|
||||||
from utils.login import login
|
from utils.login import login
|
||||||
from utils.utils import download_file, get_assignment_name, save_html
|
from utils.utils import download_file, get_assignment_name, save_html
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from selenium.common.exceptions import ElementNotInteractableException
|
from selenium.common.exceptions import ElementNotInteractableException
|
||||||
|
|
||||||
testing = False
|
testing = False
|
||||||
try:
|
# try:
|
||||||
testing = True
|
# testing = True
|
||||||
from utils.test import get_etc
|
# from utils.test import get_etc
|
||||||
except:
|
# except Exception:
|
||||||
def get_etc(*args): return False
|
|
||||||
|
|
||||||
|
def get_etc(*args):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# stupid bug
|
# stupid bug
|
||||||
def click_the_fing_button(driver, button):
|
|
||||||
|
|
||||||
|
def click_the_fing_button(driver: WebDriver, button):
|
||||||
try:
|
try:
|
||||||
ActionChains(driver).move_to_element(button)
|
ActionChains(driver).move_to_element(button)
|
||||||
ActionChains(driver).click(button).perform()
|
ActionChains(driver).click(button).perform()
|
||||||
WebDriverWait(driver, 2).until(EC.number_of_windows_to_be(2))
|
WebDriverWait(driver, 2).until(EC.number_of_windows_to_be(2))
|
||||||
except:
|
except Exception:
|
||||||
# hack to wake selenium up when it doesnt want to click the button!
|
# hack to wake selenium up when it doesnt want to click the button!
|
||||||
driver.set_window_size(1024, 768)
|
driver.set_window_size(1024, 768)
|
||||||
click_the_fing_button(driver, button)
|
click_the_fing_button(driver, button)
|
||||||
driver.maximize_window()
|
driver.maximize_window()
|
||||||
|
|
||||||
# You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
|
# You can probably replace this with a recursive method like in blackboard
|
||||||
|
# scraper but tbh i just want to get this script done so i can stop working for
|
||||||
|
# once.
|
||||||
|
|
||||||
|
|
||||||
def scrape_further(driver, path, session):
|
def scrape_further(driver: WebDriver, path, session):
|
||||||
# attempts for bb-held tests
|
# attempts for bb-held tests
|
||||||
attempts = driver.find_elements(
|
attempts = driver.find_elements(
|
||||||
By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
|
By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
|
||||||
attempts = [x.get_attribute('href') for x in attempts]
|
attempts = [x.get_attribute('href') for x in attempts]
|
||||||
for i, attempt in enumerate(attempts):
|
for i, attempt in enumerate(attempts):
|
||||||
name = "attempt_" + \
|
name = "attempt_" + \
|
||||||
str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
|
str(i) + "_[" + parse_qs(urlparse(attempt).query)['attempt_id'][0] + "]"
|
||||||
attempt = re.sub("^"+BASE_URL, "", attempt)
|
attempt = re.sub("^" + BASE_URL, "", attempt)
|
||||||
driver.execute_script("window.open('"+BASE_URL+attempt+"')")
|
driver.execute_script("window.open('" + BASE_URL + attempt + "')")
|
||||||
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(3))
|
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(3))
|
||||||
driver.switch_to.window(driver.window_handles[2])
|
driver.switch_to.window(driver.window_handles[2])
|
||||||
save_html(path, name, driver.page_source)
|
save_html(path, name, driver, True)
|
||||||
if testing:
|
if testing:
|
||||||
get_etc(driver, session, path)
|
get_etc(driver, session, path)
|
||||||
driver.close()
|
driver.close()
|
||||||
|
@ -71,7 +80,7 @@ def scrape_further(driver, path, session):
|
||||||
By.XPATH, "//a[contains(@href, '/bbcswebdav')]")
|
By.XPATH, "//a[contains(@href, '/bbcswebdav')]")
|
||||||
etc_files = [x.get_attribute('href') for x in etc_files]
|
etc_files = [x.get_attribute('href') for x in etc_files]
|
||||||
for i, item in enumerate(etc_files):
|
for i, item in enumerate(etc_files):
|
||||||
if (not item is None) and ("bbcswebdav" in item):
|
if (item is not None) and ("bbcswebdav" in item):
|
||||||
request_stack.add_file(item, path)
|
request_stack.add_file(item, path)
|
||||||
|
|
||||||
# submission file for assignment
|
# submission file for assignment
|
||||||
|
@ -90,7 +99,7 @@ def scrape_further(driver, path, session):
|
||||||
driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
|
driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
|
||||||
SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
|
SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
|
||||||
get_feedback = True
|
get_feedback = True
|
||||||
except:
|
except Exception:
|
||||||
print("No feedback to download")
|
print("No feedback to download")
|
||||||
if get_feedback:
|
if get_feedback:
|
||||||
dl_button = WaitClickable(
|
dl_button = WaitClickable(
|
||||||
|
@ -106,10 +115,11 @@ parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
|
||||||
parser.add_argument("-u", "--username",
|
parser.add_argument("-u", "--username",
|
||||||
help="Automatically use provided userID", default="")
|
help="Automatically use provided userID", default="")
|
||||||
|
|
||||||
path = ['grades']
|
path = [SAVE_DIR]
|
||||||
|
Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
CAPABILITIES = DesiredCapabilities.CHROME
|
CAPABILITIES = cast("dict[str, object]", DesiredCapabilities.CHROME.copy())
|
||||||
CAPABILITIES['goog:loggingPrefs'] = {
|
CAPABILITIES['goog:loggingPrefs'] = {
|
||||||
'performance': 'ALL',
|
'performance': 'ALL',
|
||||||
}
|
}
|
||||||
|
@ -139,15 +149,15 @@ for cookie in cookies:
|
||||||
session.cookies.set(cookie["name"], cookie["value"])
|
session.cookies.set(cookie["name"], cookie["value"])
|
||||||
|
|
||||||
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
|
# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
|
||||||
driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
|
driver.get(BASE_URL + "/webapps/gradebook/do/student/viewCourses")
|
||||||
try:
|
try:
|
||||||
WaitClickable(driver, (By.CLASS_NAME, "button-1")).click()
|
WaitClickable(driver, (By.CLASS_NAME, "button-1")).click()
|
||||||
except:
|
except Exception:
|
||||||
print("no tos warning - skipped")
|
print("no tos warning - skipped")
|
||||||
|
|
||||||
driver.get(
|
driver.get(
|
||||||
BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
|
BASE_URL + "/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
|
||||||
save_html(sep.join(path), 'entrypoint', driver.page_source)
|
save_html(sep.join(path), 'entrypoint', driver, True)
|
||||||
|
|
||||||
WaitClickable(driver, (By.ID, "left_stream_mygrades"))
|
WaitClickable(driver, (By.ID, "left_stream_mygrades"))
|
||||||
# get courseIDs
|
# get courseIDs
|
||||||
|
@ -162,7 +172,7 @@ for i, course_results in enumerate(courses):
|
||||||
course_name = course_results.find_elements(
|
course_name = course_results.find_elements(
|
||||||
By.XPATH, "//span[@class='stream_area_name']")[i].text
|
By.XPATH, "//span[@class='stream_area_name']")[i].text
|
||||||
course_name += " [" + \
|
course_name += " [" + \
|
||||||
parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
|
parse_qs(urlparse(course_url).query)['course_id'][0] + "]"
|
||||||
course_details.append({
|
course_details.append({
|
||||||
'name': course_name,
|
'name': course_name,
|
||||||
'url': course_url
|
'url': course_url
|
||||||
|
@ -171,7 +181,7 @@ for i, course_results in enumerate(courses):
|
||||||
for i, course in enumerate(course_details):
|
for i, course in enumerate(course_details):
|
||||||
path.append(course['name']) # course name
|
path.append(course['name']) # course name
|
||||||
print(course['name'])
|
print(course['name'])
|
||||||
driver.get(BASE_URL+course['url'])
|
driver.get(BASE_URL + course['url'])
|
||||||
|
|
||||||
driver.execute_script("""
|
driver.execute_script("""
|
||||||
mygrades.loadContentFrame = function(url) {
|
mygrades.loadContentFrame = function(url) {
|
||||||
|
@ -194,7 +204,7 @@ for i, course in enumerate(course_details):
|
||||||
block = assignment.find_element(
|
block = assignment.find_element(
|
||||||
By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
|
By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
|
||||||
information_link = True
|
information_link = True
|
||||||
except:
|
except Exception:
|
||||||
block = assignment.find_element(
|
block = assignment.find_element(
|
||||||
By.XPATH, "./div[@class='cell gradable']")
|
By.XPATH, "./div[@class='cell gradable']")
|
||||||
assignment_name = get_assignment_name(driver, block)
|
assignment_name = get_assignment_name(driver, block)
|
||||||
|
@ -204,10 +214,10 @@ for i, course in enumerate(course_details):
|
||||||
try:
|
try:
|
||||||
ActionChains(driver).move_to_element(
|
ActionChains(driver).move_to_element(
|
||||||
block).click(block).perform()
|
block).click(block).perform()
|
||||||
print("Switched "+assignment_name)
|
print("Switched " + assignment_name)
|
||||||
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
|
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
|
||||||
driver.switch_to.window(driver.window_handles[1])
|
driver.switch_to.window(driver.window_handles[1])
|
||||||
save_html(sep.join(path), "information", driver.page_source)
|
save_html(sep.join(path), "information", driver, True)
|
||||||
scrape_further(driver, sep.join(path), session)
|
scrape_further(driver, sep.join(path), session)
|
||||||
driver.close()
|
driver.close()
|
||||||
driver.switch_to.window(driver.window_handles[0])
|
driver.switch_to.window(driver.window_handles[0])
|
||||||
|
@ -216,36 +226,36 @@ for i, course in enumerate(course_details):
|
||||||
# download rubric if it exists.
|
# download rubric if it exists.
|
||||||
for button in buttons:
|
for button in buttons:
|
||||||
action = button.get_attribute("onclick")
|
action = button.get_attribute("onclick")
|
||||||
if action != None and "showInLightBox" not in action:
|
if action is not None and "showInLightBox" not in action:
|
||||||
click_the_fing_button(driver, button)
|
click_the_fing_button(driver, button)
|
||||||
driver.execute_script(
|
driver.execute_script(
|
||||||
"window.scrollTo(0, document.body.scrollHeight)")
|
"window.scrollTo(0, document.body.scrollHeight)")
|
||||||
driver.switch_to.window(driver.window_handles[1])
|
driver.switch_to.window(driver.window_handles[1])
|
||||||
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
|
WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
|
||||||
save_html(sep.join(path), "rubric", driver.page_source)
|
save_html(sep.join(path), "rubric", driver, True)
|
||||||
driver.find_element(
|
driver.find_element(
|
||||||
By.XPATH, "//li[@id='listViewTab']/a").click()
|
By.XPATH, "//li[@id='listViewTab']/a").click()
|
||||||
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
|
WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
|
||||||
save_html(sep.join(path), "list", driver.page_source)
|
save_html(sep.join(path), "list", driver, True)
|
||||||
detailed_buttons = driver.find_elements(
|
detailed_buttons = driver.find_elements(
|
||||||
By.XPATH, "//div[@class='u_controlsWrapper']/input")
|
By.XPATH, "//div[@class='u_controlsWrapper']/input")
|
||||||
detailed_buttons[1].click()
|
detailed_buttons[1].click()
|
||||||
detailed_buttons[0].click()
|
detailed_buttons[0].click()
|
||||||
save_html(sep.join(path), "list_detailed", driver.page_source)
|
save_html(sep.join(path), "list_detailed", driver, True)
|
||||||
driver.close()
|
driver.close()
|
||||||
driver.switch_to.window(driver.window_handles[0])
|
driver.switch_to.window(driver.window_handles[0])
|
||||||
path.pop()
|
path.pop()
|
||||||
save_html(sep.join(path), path[0], driver.page_source)
|
save_html(sep.join(path), path[0], driver, True)
|
||||||
WaitClickable(driver, (By.XPATH, "//a[@value='S']")).click()
|
WaitClickable(driver, (By.XPATH, "//a[@value='S']")).click()
|
||||||
save_html(sep.join(path), "submitted", driver.page_source)
|
save_html(sep.join(path), "submitted", driver, True)
|
||||||
try:
|
try:
|
||||||
WaitClickable(
|
WaitClickable(
|
||||||
driver, (By.XPATH, "//div[@id='submissionReceipts']//a")).click()
|
driver, (By.XPATH, "//div[@id='submissionReceipts']//a")).click()
|
||||||
WaitClickable(
|
WaitClickable(
|
||||||
driver, (By.XPATH, "//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
|
driver, (By.XPATH, "//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
|
||||||
except:
|
except Exception:
|
||||||
print('No items?')
|
print('No items?')
|
||||||
save_html(sep.join(path), "receipts", driver.page_source)
|
save_html(sep.join(path), "receipts", driver, True)
|
||||||
path.pop()
|
path.pop()
|
||||||
|
|
||||||
|
|
||||||
|
|
BIN
requirements.txt
Executable file
BIN
requirements.txt
Executable file
Binary file not shown.
0
utils/__init__.py
Normal file → Executable file
0
utils/__init__.py
Normal file → Executable file
30
utils/asset.py
Normal file → Executable file
30
utils/asset.py
Normal file → Executable file
|
@ -10,9 +10,9 @@ from pathlib import Path
|
||||||
def convert_filename(name, hash):
|
def convert_filename(name, hash):
|
||||||
_name = name.split('.')
|
_name = name.split('.')
|
||||||
if len(_name) > 1:
|
if len(_name) > 1:
|
||||||
_name[-2] += ("["+hash+"]")
|
_name[-2] += ("[" + hash + "]")
|
||||||
else:
|
else:
|
||||||
_name[0] += ("["+hash+"]")
|
_name[0] += ("[" + hash + "]")
|
||||||
return '.'.join(_name)
|
return '.'.join(_name)
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,20 +34,20 @@ class RequestStack:
|
||||||
class Asset:
|
class Asset:
|
||||||
def __init__(self, url, path):
|
def __init__(self, url, path):
|
||||||
self.path = Path(path)
|
self.path = Path(path)
|
||||||
self.url = re.sub("^"+BASE_URL, "", url)
|
self.url = re.sub("^" + BASE_URL, "", url)
|
||||||
# self.file_id = re.findall('file_id=(.+)&',url)
|
# self.file_id = re.findall('file_id=(.+)&',url)
|
||||||
self.path.mkdir(parents=True, exist_ok=True)
|
self.path.mkdir(parents=True, exist_ok=True)
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def download(self, session):
|
def download(self, session):
|
||||||
response = session.get(
|
response = session.get(
|
||||||
BASE_URL+self.url, stream=True, allow_redirects=False)
|
BASE_URL + self.url, stream=True, allow_redirects=False)
|
||||||
headers = response.headers
|
headers = response.headers
|
||||||
if response.status_code == 302 and len(headers['location']) > 0:
|
if response.status_code == 302 and len(headers['location']) > 0:
|
||||||
Asset(headers['location'], self.path).download(session)
|
Asset(headers['location'], self.path).download(session)
|
||||||
return
|
return
|
||||||
elif response.status_code != 200:
|
elif response.status_code != 200:
|
||||||
print("[!] Error "+str(response.status_code))
|
print("[!] Error " + str(response.status_code))
|
||||||
return response.status_code
|
return response.status_code
|
||||||
headers = {x: re.sub(r'^"*|"*?$', '', headers.get(x))
|
headers = {x: re.sub(r'^"*|"*?$', '', headers.get(x))
|
||||||
for x in headers} # ewww regex
|
for x in headers} # ewww regex
|
||||||
|
@ -66,19 +66,19 @@ class Asset:
|
||||||
|
|
||||||
def write_metadata(self, headers):
|
def write_metadata(self, headers):
|
||||||
metacsv = [
|
metacsv = [
|
||||||
["original_filename", self.original_filename],
|
["original_filename", self.original_filename],
|
||||||
["readable_filename", self.filename],
|
["readable_filename", self.filename],
|
||||||
["url", self.url],
|
["url", self.url],
|
||||||
["pathhash", hashlib.md5(
|
["pathhash", hashlib.md5(
|
||||||
self.url.encode()).hexdigest()],
|
self.url.encode()).hexdigest()],
|
||||||
["etag", headers['ETag']],
|
["etag", headers['ETag']],
|
||||||
["etaghash", self.etag_hash],
|
["etaghash", self.etag_hash],
|
||||||
["last-modified", headers["Last-Modified"]],
|
["last-modified", headers["Last-Modified"]],
|
||||||
["content-length", headers["Content-Length"]],
|
["content-length", headers["Content-Length"]],
|
||||||
["age", ""],
|
["age", ""],
|
||||||
]
|
]
|
||||||
csvpath = self.path.joinpath("ZZZ_metadata")
|
csvpath = self.path.joinpath("ZZZ_metadata")
|
||||||
csvpath.mkdir(parents=True, exist_ok=True)
|
csvpath.mkdir(parents=True, exist_ok=True)
|
||||||
with open(csvpath.joinpath(self.filename+"__metadata.csv"), "w", newline="") as f:
|
with open(csvpath.joinpath(self.filename + "__metadata.csv"), "w", newline="") as f:
|
||||||
writer = csv.writer(f)
|
writer = csv.writer(f)
|
||||||
writer.writerows(metacsv)
|
writer.writerows(metacsv)
|
||||||
|
|
4
utils/login.py
Normal file → Executable file
4
utils/login.py
Normal file → Executable file
|
@ -28,7 +28,7 @@ def login(args, driver):
|
||||||
WaitClickable(driver, Selectors.BOX_PASSWORD).send_keys(PASSWORD)
|
WaitClickable(driver, Selectors.BOX_PASSWORD).send_keys(PASSWORD)
|
||||||
WaitClickable(driver, Selectors.BUTTON_NEXT).click()
|
WaitClickable(driver, Selectors.BUTTON_NEXT).click()
|
||||||
print('Entered password.')
|
print('Entered password.')
|
||||||
except:
|
except Exception:
|
||||||
print(WebDriverWait(driver, 1).until(
|
print(WebDriverWait(driver, 1).until(
|
||||||
EC.visibility_of_element_located(Selectors.DIV_USERERROR)).text)
|
EC.visibility_of_element_located(Selectors.DIV_USERERROR)).text)
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
@ -38,7 +38,7 @@ def login(args, driver):
|
||||||
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
|
# WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
|
||||||
|
|
||||||
cookie = driver.get_cookies()
|
cookie = driver.get_cookies()
|
||||||
if not cookie == None:
|
if cookie is not None:
|
||||||
return cookie
|
return cookie
|
||||||
|
|
||||||
print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
|
print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
|
||||||
|
|
0
utils/selectors.py
Normal file → Executable file
0
utils/selectors.py
Normal file → Executable file
23
utils/utils.py
Normal file → Executable file
23
utils/utils.py
Normal file → Executable file
|
@ -1,6 +1,8 @@
|
||||||
|
from selenium.webdriver.remote.webdriver import WebDriver
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
from constants.constants import DL_DIR
|
from typing import Union
|
||||||
|
from constants.constants import DL_DIR, URL_LIST
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
@ -23,18 +25,21 @@ def friendly_dirname(name):
|
||||||
return name.strip()
|
return name.strip()
|
||||||
|
|
||||||
|
|
||||||
def get_assignment_name(driver, block):
|
def get_assignment_name(driver: WebDriver, block):
|
||||||
s = friendly_filename(get_text_excluding_children(driver, block))
|
s = friendly_filename(get_text_excluding_children(driver, block))
|
||||||
print("Assesment: "+s)
|
print("Assesment: " + s)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def save_html(dir, filename, page_source):
|
def save_html(dir, filename, driver: WebDriver, page_log_file=False):
|
||||||
|
if page_log_file:
|
||||||
|
with open(URL_LIST, "a", encoding="utf-8") as f:
|
||||||
|
f.write(driver.current_url + "\n")
|
||||||
dir = pathlib.Path(friendly_dirname(dir))
|
dir = pathlib.Path(friendly_dirname(dir))
|
||||||
dir.mkdir(parents=True, exist_ok=True)
|
dir.mkdir(parents=True, exist_ok=True)
|
||||||
file = dir.joinpath(friendly_filename(filename)+".html")
|
file = dir.joinpath(friendly_filename(filename) + ".html")
|
||||||
with open(file, "w", encoding="utf-8") as f:
|
with open(file, "w", encoding="utf-8") as f:
|
||||||
f.write(page_source)
|
f.write(driver.page_source)
|
||||||
|
|
||||||
# NOTE: Switching to a "download" tab causes issues so we must use the in built
|
# NOTE: Switching to a "download" tab causes issues so we must use the in built
|
||||||
# download in Chrome, which does not have etag or metadata information.
|
# download in Chrome, which does not have etag or metadata information.
|
||||||
|
@ -54,12 +59,12 @@ def download_file(dest):
|
||||||
poll *= 1.5
|
poll *= 1.5
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
_dest = Path(dest).joinpath("MARKED__"+f)
|
_dest = Path(dest).joinpath("MARKED__" + f)
|
||||||
try:
|
try:
|
||||||
shutil.move(d.joinpath(f), _dest)
|
shutil.move(str(d.joinpath(f)), _dest)
|
||||||
except shutil.SameFileError:
|
except shutil.SameFileError:
|
||||||
os.remove(_dest)
|
os.remove(_dest)
|
||||||
shutil.move(d.joinpath(f), _dest)
|
shutil.move(str(d.joinpath(f)), _dest)
|
||||||
|
|
||||||
if len(os.listdir(d)) == 0:
|
if len(os.listdir(d)) == 0:
|
||||||
downloading = False
|
downloading = False
|
||||||
|
|
13
utils/wait.py
Normal file → Executable file
13
utils/wait.py
Normal file → Executable file
|
@ -1,16 +1,15 @@
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
timeout = 5
|
timeout = 5
|
||||||
# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
|
|
||||||
|
|
||||||
|
|
||||||
def WaitClickable(driver, locator): return WebDriverWait(
|
def WaitClickable(driver, locator):
|
||||||
driver, timeout).until(EC.element_to_be_clickable(locator))
|
return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
|
||||||
|
|
||||||
|
|
||||||
def WaitDiv(driver, locator): return WebDriverWait(
|
def WaitDiv(driver, locator):
|
||||||
driver, timeout).until(EC.presence_of_element_located(locator))
|
return WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
|
||||||
|
|
||||||
|
|
||||||
def SwitchToIFrame(driver, locator): return WebDriverWait(
|
def SwitchToIFrame(driver, locator):
|
||||||
driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
|
return WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user