Update to new LMS, update chromdriver to 95, unspaghettified *some* code

2024-11-30 11:40:16 +08:00 · 2021-11-14 01:53:15 +08:00 · 2021-11-14 01:53:15 +08:00 · e3ed2765d6
commit e3ed2765d6
parent 8e76eb8b55
7 changed files with 44 additions and 63 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
-grades/
+grades*
 tmp/
 __pycache__
 chromedriver*
 test*
+.vscode/
--- a/README.md
+++ b/README.md
@ -21,3 +21,4 @@ Just made it able to download the graded results which may contain annotations.
 * Does not download turnitin reports. You have to click the link manually to the feedback site.
 * Does not download multiple submission attempts - only downloads the last/graded attempt.
 * Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
+* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
--- a/main.py
+++ b/main.py
@ -1,9 +1,10 @@
+#!/usr/bin/env python3

+import requests
 from selenium import webdriver
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.action_chains import ActionChains
 # For chrome stuff
 from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@ -12,24 +13,14 @@ from selenium.webdriver.chrome.options import Options
 from urllib.parse import parse_qs, urlparse
 import os
 from os.path import sep
-import requests
-import time
-import getpass
-import json
 import re
-import sys
 import argparse
-import pathlib

-import utils.selectors
-from utils.asset import Asset, RequestStack
+from utils.asset import RequestStack
 from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
 from constants.constants import BASE_URL, DL_DIR
 from utils.login import login
-from utils.selectors import Selectors
-from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
-import code
-from random import randint
+from utils.utils import download_file, get_assignment_name, save_html
 from pathlib import Path
 from selenium.common.exceptions import ElementNotInteractableException

@ -40,8 +31,6 @@ try:
 except:
    def get_etc(*args): return False

-cookie = None
-
 # stupid bug
 def click_the_fing_button(driver,button):
    try:
@ -54,9 +43,9 @@ def click_the_fing_button(driver,button):
        driver.maximize_window()

 # You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
-def scrape_further(driver,path):
+def scrape_further(driver,path,session):
    # attempts for bb-held tests
-    attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assessment')]")
+    attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
    attempts = [ x.get_attribute('href') for x in attempts ]
    for i, attempt in enumerate(attempts):
        name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
@ -66,13 +55,13 @@ def scrape_further(driver,path):
        driver.switch_to.window(driver.window_handles[2])
        save_html(path, name, driver.page_source)
        if testing:
-            get_etc(driver, cookie, path)
+            get_etc(driver, session, path)
        driver.close()
        driver.switch_to.window(driver.window_handles[1])

    # submission file for assignment
-    request_stack = RequestStack(cookie)
-    attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assignment/download')]")
+    request_stack = RequestStack(session)
+    attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
    attempts = [ x.get_attribute('href') for x in attempts ]
    for i, attempt in enumerate(attempts):
        request_stack.add_file(attempt,path)
@ -118,13 +107,16 @@ OPTIONS = Options()
 OPTIONS.add_experimental_option("prefs", prefs)
 # OPTIONS.add_argument("--headless")
 driver = webdriver.Chrome(
-                            executable_path='chromedriver',
+                            executable_path='chromedriver.exe',
                            desired_capabilities=CAPABILITIES,
                            options=OPTIONS
                        )
 driver.maximize_window()

-cookie = {'Cookie': login(args, driver)} # do Login.
+cookies = login(args, driver) # do Login.
+session = requests.Session()
+for cookie in cookies:
+    session.cookies.set(cookie["name"], cookie["value"])

 # need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
 driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
@ -137,15 +129,15 @@ driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygr
 save_html(sep.join(path), 'entrypoint', driver.page_source)

 # get courseIDs
-courses = driver.find_element_by_id("left_stream_mygrades")\
-                .find_elements_by_xpath("//div[@role='tab']")
+courses = driver.find_element(By.ID, "left_stream_mygrades")\
+                .find_elements(By.XPATH, "//div[@role='tab']")

 course_details = []
 for i, course_results in enumerate(courses):
    course_results = courses[i]
    ActionChains(driver).move_to_element(course_results).perform()
    course_url = course_results.get_attribute("bb:rhs")
-    course_name = course_results.find_elements_by_xpath("//span[@class='stream_area_name']")[i].text
+    course_name = course_results.find_elements(By.XPATH, "//span[@class='stream_area_name']")[i].text
    course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
    course_details.append({
        'name': course_name,
@ -166,19 +158,19 @@ for i, course in enumerate(course_details):
    WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
    WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()

-    table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
+    table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")

    for i, assignment in enumerate(table):
        print(i)
-        buttons = assignment.find_elements_by_tag_name("input")
+        buttons = assignment.find_elements(By.TAG_NAME, "input")
        block = None
        assignment_name = None
        information_link = False
        try:
-            block = assignment.find_element_by_xpath("./div[@class='cell gradable']/a[@onclick]")
+            block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
            information_link = True
        except:
-            block = assignment.find_element_by_xpath("./div[@class='cell gradable']")
+            block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']")
        assignment_name = get_assignment_name(driver,block)
        path.append(assignment_name)
        # download information if it exists.
@ -189,7 +181,7 @@ for i, course in enumerate(course_details):
                WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
                driver.switch_to.window(driver.window_handles[1])
                save_html(sep.join(path),"information",driver.page_source)
-                scrape_further(driver, sep.join(path))
+                scrape_further(driver, sep.join(path), session)
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
            except ElementNotInteractableException:
@ -203,10 +195,10 @@ for i, course in enumerate(course_details):
                driver.switch_to.window(driver.window_handles[1])
                WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
                save_html(sep.join(path),"rubric",driver.page_source)
-                driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
+                driver.find_element(By.XPATH, "//li[@id='listViewTab']/a").click()
                WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
                save_html(sep.join(path),"list",driver.page_source)
-                detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
+                detailed_buttons = driver.find_elements(By.XPATH, "//div[@class='u_controlsWrapper']/input")
                detailed_buttons[1].click()
                detailed_buttons[0].click()
                save_html(sep.join(path),"list_detailed",driver.page_source)
--- a/utils/asset.py
+++ b/utils/asset.py
@ -1,4 +1,3 @@
-import wget
 from constants.constants import BASE_URL
 import re
 import hashlib
@ -26,6 +25,7 @@ class RequestStack:
    
    def download_all(self):
        for file in self.request_stack:
+            print(f"\tDownloading {file.url}")
            file.download(self.token)

 class Asset:
@ -36,14 +36,14 @@ class Asset:
        self.path.mkdir(parents=True, exist_ok=True)
        super().__init__()

-    def download(self,req_headers):
-        response = requests.get(BASE_URL+self.url, stream=True, headers=req_headers, allow_redirects=False)
+    def download(self,session):
+        response = session.get(BASE_URL+self.url, stream=True, allow_redirects=False)
        headers = response.headers
        if response.status_code == 302 and len(headers['location']) > 0:
-            Asset(headers['location'], self.path).download(req_headers)
+            Asset(headers['location'], self.path).download(session)
            return
        elif response.status_code != 200:
-            print("Error "+str(response.status_code))
+            print("[!] Error "+str(response.status_code))
            return response.status_code
        headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
        if 'Content-Disposition' in headers.keys():
--- a/utils/login.py
+++ b/utils/login.py
@ -1,6 +1,6 @@
+import sys
 from utils.wait import WaitClickable
 from utils.selectors import Selectors
-import sys
 from selenium.webdriver.support.wait import WebDriverWait
 from urllib.parse import urlparse
 from selenium.webdriver.support import expected_conditions as EC
@ -9,16 +9,8 @@ from constants.constants import BASE_URL
 import re
 import json

-def try_cookie(driver):
-    for entry in driver.get_log('performance'):
-        parameters = json.loads(entry["message"])['message']['params']
-        if (
-            'documentURL' in  parameters.keys()
-            and re.search(r'https://lms.uwa.edu.au/webapps/portal.*', parameters['documentURL']) != None
-        ):
-            return parameters['redirectResponse']['requestHeaders']['Cookie']
-
 def login(args, driver):
+    driver.get(BASE_URL)
    USERNAME = args.username
    if len(USERNAME) == 0:
        print('UserID: ')
@ -27,8 +19,6 @@ def login(args, driver):
    print('Password: ')
    PASSWORD = getpass('')

-    driver.get(BASE_URL)
-
    WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
    WaitClickable(driver,Selectors.BUTTON_NEXT).click()
    print('Entered username.')
@ -44,10 +34,10 @@ def login(args, driver):

    WaitClickable(driver,Selectors.BUTTON_DENY).click()
    # WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
-    current_uri = urlparse(driver.current_url)
-    if '{uri.scheme}://{uri.netloc}'.format(uri=current_uri) != BASE_URL:
-        driver.quit()
-        print("Login failed.")
-        exit(-1)
    
-    return try_cookie(driver)
+    cookie = driver.get_cookies()
+    if not cookie == None: return cookie
+    
+    print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
+    driver.quit()
+    exit(1)
--- a/utils/utils.py
+++ b/utils/utils.py
@ -1,9 +1,6 @@
 import pathlib
 import re
 from constants.constants import DL_DIR
-from utils.wait import WaitClickable
-from utils.asset import Asset
-from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import time
 import os
--- a/utils/wait.py
+++ b/utils/wait.py
@ -1,7 +1,7 @@
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-timeout = 4
-# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
+timeout = 5
+# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
 WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
 WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
 SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))