From e3ed2765d6c2dab1d6384b4cf187e31dee5d9b66 Mon Sep 17 00:00:00 2001
From: Peter <peterr.00000@gmail.com>
Date: Sun, 14 Nov 2021 01:53:15 +0800
Subject: [PATCH] Update to new LMS, update chromdriver to 95, unspaghettified
 *some* code

---
 .gitignore     |  5 +++--
 README.md      |  3 ++-
 main.py        | 56 ++++++++++++++++++++++----------------------------
 utils/asset.py | 10 ++++-----
 utils/login.py | 26 ++++++++---------------
 utils/utils.py |  3 ---
 utils/wait.py  |  4 ++--
 7 files changed, 44 insertions(+), 63 deletions(-)

diff --git a/.gitignore b/.gitignore
index 21727bf..d4b1961 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
-grades/
+grades*
 tmp/
 __pycache__
 chromedriver*
-test*
\ No newline at end of file
+test*
+.vscode/
\ No newline at end of file
diff --git a/README.md b/README.md
index 2453be0..35601b8 100644
--- a/README.md
+++ b/README.md
@@ -20,4 +20,5 @@ Just made it able to download the graded results which may contain annotations.
 ## Note:
 * Does not download turnitin reports. You have to click the link manually to the feedback site.
 * Does not download multiple submission attempts - only downloads the last/graded attempt.
-* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
\ No newline at end of file
+* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
+* Sometimes chromedriver closes after logging in, when not in headless mode. Try interacting with the page before logging in.
\ No newline at end of file
diff --git a/main.py b/main.py
index 4364272..ebb0e57 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,10 @@
+#!/usr/bin/env python3
 
+import requests
 from selenium import webdriver
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.action_chains import ActionChains
 # For chrome stuff
 from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@@ -12,24 +13,14 @@ from selenium.webdriver.chrome.options import Options
 from urllib.parse import parse_qs, urlparse
 import os
 from os.path import sep
-import requests
-import time
-import getpass
-import json
 import re
-import sys
 import argparse
-import pathlib
 
-import utils.selectors
-from utils.asset import Asset, RequestStack
+from utils.asset import RequestStack
 from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
 from constants.constants import BASE_URL, DL_DIR
 from utils.login import login
-from utils.selectors import Selectors
-from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
-import code
-from random import randint
+from utils.utils import download_file, get_assignment_name, save_html
 from pathlib import Path
 from selenium.common.exceptions import ElementNotInteractableException
 
@@ -40,8 +31,6 @@ try:
 except:
     def get_etc(*args): return False
 
-cookie = None
-
 # stupid bug
 def click_the_fing_button(driver,button):
     try:
@@ -54,9 +43,9 @@ def click_the_fing_button(driver,button):
         driver.maximize_window()
 
 # You can probably replace this with a recursive method like in blackboard scraper but tbh i just want to get this script done so i can stop working for once.
-def scrape_further(driver,path):
+def scrape_further(driver,path,session):
     # attempts for bb-held tests
-    attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assessment')]")
+    attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assessment')]")
     attempts = [ x.get_attribute('href') for x in attempts ]
     for i, attempt in enumerate(attempts):
         name = "attempt_"+str(i)+"_["+parse_qs(urlparse(attempt).query)['attempt_id'][0]+"]"
@@ -66,13 +55,13 @@ def scrape_further(driver,path):
         driver.switch_to.window(driver.window_handles[2])
         save_html(path, name, driver.page_source)
         if testing:
-            get_etc(driver, cookie, path)
+            get_etc(driver, session, path)
         driver.close()
         driver.switch_to.window(driver.window_handles[1])
 
     # submission file for assignment
-    request_stack = RequestStack(cookie)
-    attempts = driver.find_elements_by_xpath("//a[starts-with(@href, '/webapps/assignment/download')]")
+    request_stack = RequestStack(session)
+    attempts = driver.find_elements(By.XPATH, "//a[starts-with(@href, '/webapps/assignment/download')]")
     attempts = [ x.get_attribute('href') for x in attempts ]
     for i, attempt in enumerate(attempts):
         request_stack.add_file(attempt,path)
@@ -118,13 +107,16 @@ OPTIONS = Options()
 OPTIONS.add_experimental_option("prefs", prefs)
 # OPTIONS.add_argument("--headless")
 driver = webdriver.Chrome(
-                            executable_path='chromedriver',
+                            executable_path='chromedriver.exe',
                             desired_capabilities=CAPABILITIES,
                             options=OPTIONS
                         )
 driver.maximize_window()
 
-cookie = {'Cookie': login(args, driver)} # do Login.
+cookies = login(args, driver) # do Login.
+session = requests.Session()
+for cookie in cookies:
+    session.cookies.set(cookie["name"], cookie["value"])
 
 # need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
 driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
@@ -137,15 +129,15 @@ driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygr
 save_html(sep.join(path), 'entrypoint', driver.page_source)
 
 # get courseIDs
-courses = driver.find_element_by_id("left_stream_mygrades")\
-                .find_elements_by_xpath("//div[@role='tab']")
+courses = driver.find_element(By.ID, "left_stream_mygrades")\
+                .find_elements(By.XPATH, "//div[@role='tab']")
 
 course_details = []
 for i, course_results in enumerate(courses):
     course_results = courses[i]
     ActionChains(driver).move_to_element(course_results).perform()
     course_url = course_results.get_attribute("bb:rhs")
-    course_name = course_results.find_elements_by_xpath("//span[@class='stream_area_name']")[i].text
+    course_name = course_results.find_elements(By.XPATH, "//span[@class='stream_area_name']")[i].text
     course_name += " ["+parse_qs(urlparse(course_url).query)['course_id'][0]+"]"
     course_details.append({
         'name': course_name,
@@ -166,19 +158,19 @@ for i, course in enumerate(course_details):
     WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
     WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
 
-    table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
+    table = driver.find_elements(By.XPATH, "//div[@id='grades_wrapper']/div")
 
     for i, assignment in enumerate(table):
         print(i)
-        buttons = assignment.find_elements_by_tag_name("input")
+        buttons = assignment.find_elements(By.TAG_NAME, "input")
         block = None
         assignment_name = None
         information_link = False
         try:
-            block = assignment.find_element_by_xpath("./div[@class='cell gradable']/a[@onclick]")
+            block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']/a[@onclick]")
             information_link = True
         except:
-            block = assignment.find_element_by_xpath("./div[@class='cell gradable']")
+            block = assignment.find_element(By.XPATH, "./div[@class='cell gradable']")
         assignment_name = get_assignment_name(driver,block)
         path.append(assignment_name)
         # download information if it exists.
@@ -189,7 +181,7 @@ for i, course in enumerate(course_details):
                 WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
                 driver.switch_to.window(driver.window_handles[1])
                 save_html(sep.join(path),"information",driver.page_source)
-                scrape_further(driver, sep.join(path))
+                scrape_further(driver, sep.join(path), session)
                 driver.close()
                 driver.switch_to.window(driver.window_handles[0])
             except ElementNotInteractableException:
@@ -203,10 +195,10 @@ for i, course in enumerate(course_details):
                 driver.switch_to.window(driver.window_handles[1])
                 WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
                 save_html(sep.join(path),"rubric",driver.page_source)
-                driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
+                driver.find_element(By.XPATH, "//li[@id='listViewTab']/a").click()
                 WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
                 save_html(sep.join(path),"list",driver.page_source)
-                detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
+                detailed_buttons = driver.find_elements(By.XPATH, "//div[@class='u_controlsWrapper']/input")
                 detailed_buttons[1].click()
                 detailed_buttons[0].click()
                 save_html(sep.join(path),"list_detailed",driver.page_source)
diff --git a/utils/asset.py b/utils/asset.py
index 65f008b..b4eb000 100644
--- a/utils/asset.py
+++ b/utils/asset.py
@@ -1,4 +1,3 @@
-import wget
 from constants.constants import BASE_URL
 import re
 import hashlib
@@ -26,6 +25,7 @@ class RequestStack:
     
     def download_all(self):
         for file in self.request_stack:
+            print(f"\tDownloading {file.url}")
             file.download(self.token)
 
 class Asset:
@@ -36,14 +36,14 @@ class Asset:
         self.path.mkdir(parents=True, exist_ok=True)
         super().__init__()
 
-    def download(self,req_headers):
-        response = requests.get(BASE_URL+self.url, stream=True, headers=req_headers, allow_redirects=False)
+    def download(self,session):
+        response = session.get(BASE_URL+self.url, stream=True, allow_redirects=False)
         headers = response.headers
         if response.status_code == 302 and len(headers['location']) > 0:
-            Asset(headers['location'], self.path).download(req_headers)
+            Asset(headers['location'], self.path).download(session)
             return
         elif response.status_code != 200:
-            print("Error "+str(response.status_code))
+            print("[!] Error "+str(response.status_code))
             return response.status_code
         headers = { x:re.sub(r'^"*|"*?$', '', headers.get(x)) for x in headers } # ewww regex
         if 'Content-Disposition' in headers.keys():
diff --git a/utils/login.py b/utils/login.py
index daef890..c74f0d6 100644
--- a/utils/login.py
+++ b/utils/login.py
@@ -1,6 +1,6 @@
+import sys
 from utils.wait import WaitClickable
 from utils.selectors import Selectors
-import sys
 from selenium.webdriver.support.wait import WebDriverWait
 from urllib.parse import urlparse
 from selenium.webdriver.support import expected_conditions as EC
@@ -9,16 +9,8 @@ from constants.constants import BASE_URL
 import re
 import json
 
-def try_cookie(driver):
-    for entry in driver.get_log('performance'):
-        parameters = json.loads(entry["message"])['message']['params']
-        if (
-            'documentURL' in  parameters.keys()
-            and re.search(r'https://lms.uwa.edu.au/webapps/portal.*', parameters['documentURL']) != None
-        ):
-            return parameters['redirectResponse']['requestHeaders']['Cookie']
-
 def login(args, driver):
+    driver.get(BASE_URL)
     USERNAME = args.username
     if len(USERNAME) == 0:
         print('UserID: ')
@@ -26,8 +18,6 @@ def login(args, driver):
     USERNAME += '@student.uwa.edu.au'
     print('Password: ')
     PASSWORD = getpass('')
-    
-    driver.get(BASE_URL)
 
     WaitClickable(driver,Selectors.BOX_USERNAME).send_keys(USERNAME)
     WaitClickable(driver,Selectors.BUTTON_NEXT).click()
@@ -44,10 +34,10 @@ def login(args, driver):
 
     WaitClickable(driver,Selectors.BUTTON_DENY).click()
     # WaitClickable(driver,BUTTON_NEXT).click() #IF you want to remember credentials, switch these comments
-    current_uri = urlparse(driver.current_url)
-    if '{uri.scheme}://{uri.netloc}'.format(uri=current_uri) != BASE_URL:
-        driver.quit()
-        print("Login failed.")
-        exit(-1)
     
-    return try_cookie(driver)
\ No newline at end of file
+    cookie = driver.get_cookies()
+    if not cookie == None: return cookie
+    
+    print('Could not get auth cookie - Invalid ID or password?', file=sys.stderr)
+    driver.quit()
+    exit(1)
\ No newline at end of file
diff --git a/utils/utils.py b/utils/utils.py
index 068221b..358fb92 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,9 +1,6 @@
 import pathlib
 import re
 from constants.constants import DL_DIR
-from utils.wait import WaitClickable
-from utils.asset import Asset
-from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import time
 import os
diff --git a/utils/wait.py b/utils/wait.py
index b7eeca7..c582169 100644
--- a/utils/wait.py
+++ b/utils/wait.py
@@ -1,7 +1,7 @@
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-timeout = 4
-# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
+timeout = 5
+# find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element(By.ID, name))
 WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
 WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
 SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
\ No newline at end of file