From 8e76eb8b55469fbffd2a84d5569f09ccd955df8f Mon Sep 17 00:00:00 2001
From: peter <npc.strider@gmail.com>
Date: Fri, 18 Jun 2021 19:23:19 +0800
Subject: [PATCH] more hackyness - download marked files w/ feedback

---
 .gitignore             |  1 +
 README.md              |  9 ++++-
 constants/constants.py |  6 ++++
 main.py                | 82 ++++++++++++++++++++++++++++++------------
 utils/utils.py         | 41 +++++++++++++++++++++
 utils/wait.py          |  8 ++---
 6 files changed, 119 insertions(+), 28 deletions(-)

diff --git a/.gitignore b/.gitignore
index ec9f6f6..21727bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 grades/
+tmp/
 __pycache__
 chromedriver*
 test*
\ No newline at end of file
diff --git a/README.md b/README.md
index 029a844..2453be0 100644
--- a/README.md
+++ b/README.md
@@ -13,4 +13,11 @@ Made this script to download my marks, receipts and all the stuff I uploaded for
 
 There is no bulk marks download feature in the current lms, even though it seems other blackboard installations can give students this bulk download ability. It relies on a lot of js crap so I ended up using selenium all the way through. Doesn't download styles to save space, you'll have to download the css and js yourself and it has to be absolute because the script makes no effort to make the links relative.
 
-This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
\ No newline at end of file
+This one was made for UWA but you may be able to tweak it for your institution (see constants.py).
+
+Just made it able to download the graded results which may contain annotations. Using a really hacky method to do it so it doesn't create a metadata file for it.
+
+## Note:
+* Does not download turnitin reports. You have to click the link manually to the feedback site.
+* Does not download multiple submission attempts - only downloads the last/graded attempt.
+* Check that the default page is the 'all' category for the marks instead of something else like the submitted category. The script should correct this but just to be safe click on all if it isn't already
\ No newline at end of file
diff --git a/constants/constants.py b/constants/constants.py
index ab2b1bb..bce7679 100644
--- a/constants/constants.py
+++ b/constants/constants.py
@@ -1 +1,7 @@
+import os
+from pathlib import Path
+
 BASE_URL = "https://lms.uwa.edu.au" # Include protocol.
+
+DL_DIR = os.getcwd()+os.path.sep+"tmp"+os.path.sep
+Path(DL_DIR).mkdir(parents=True, exist_ok=True)
\ No newline at end of file
diff --git a/main.py b/main.py
index d866dfd..4364272 100644
--- a/main.py
+++ b/main.py
@@ -11,6 +11,7 @@ from selenium.webdriver.chrome.options import Options
 # ---
 from urllib.parse import parse_qs, urlparse
 import os
+from os.path import sep
 import requests
 import time
 import getpass
@@ -23,12 +24,14 @@ import pathlib
 import utils.selectors
 from utils.asset import Asset, RequestStack
 from utils.wait import SwitchToIFrame, WaitClickable, WaitDiv
-from constants.constants import BASE_URL
+from constants.constants import BASE_URL, DL_DIR
 from utils.login import login
 from utils.selectors import Selectors
-from utils.utils import friendly_filename, get_assignment_name, get_text_excluding_children, save_html
+from utils.utils import download_file, friendly_filename, get_assignment_name, get_text_excluding_children, save_html
 import code
 from random import randint
+from pathlib import Path
+from selenium.common.exceptions import ElementNotInteractableException
 
 testing = False
 try:
@@ -39,8 +42,8 @@ except:
 
 cookie = None
 
+# stupid bug
 def click_the_fing_button(driver,button):
-    # https://stackoverflow.com/a/67414801 stupid bug
     try:
         ActionChains(driver).move_to_element(button)
         ActionChains(driver).click(button).perform()
@@ -73,17 +76,46 @@ def scrape_further(driver,path):
     attempts = [ x.get_attribute('href') for x in attempts ]
     for i, attempt in enumerate(attempts):
         request_stack.add_file(attempt,path)
+
+    get_feedback = False
+    try:
+        # download button causes a tab to appear quickly, download, then disappear
+        # need to capture the url to get the metadata and dl to the correct location
+        # cant be arsed to figure out how the pspdfkit js that executes this download works.
+        SwitchToIFrame(driver, (By.XPATH, "//iframe[@class='docviewer_iframe_embed']"))
+        SwitchToIFrame(driver, (By.XPATH, "//iframe[@title='PSPDFKit']"))
+        get_feedback = True
+    except:
+        print("No feedback to download")
+    if get_feedback:
+        dl_button = WaitClickable(driver,(By.XPATH, "//button[contains(@class,'PSPDFKit-Toolbar-Button PSPDFKit-Tool-Button')][@title='Download']"))
+        dl_button.click()
+        download_file(path)
     request_stack.download_all()
+# end of scrape_further
+
 
 parser = argparse.ArgumentParser(description='Automated microsoft SSO login.')
 # parser.add_argument("-p", "--password", help="Automatically use provided password", default="")
 parser.add_argument("-u", "--username", help="Automatically use provided userID", default="")
 
+path = ['grades']
 args = parser.parse_args()
 
 CAPABILITIES = DesiredCapabilities.CHROME
-CAPABILITIES['goog:loggingPrefs'] = {'performance': 'ALL'}
+CAPABILITIES['goog:loggingPrefs'] = {
+    'performance'           : 'ALL',
+}
+
+for f in os.listdir(DL_DIR):
+    os.remove(Path(DL_DIR).joinpath(f))
+prefs = {
+            "profile.default_content_settings.popups": 0,
+            "download.default_directory": DL_DIR,
+            "directory_upgrade": True
+        }
 OPTIONS = Options()
+OPTIONS.add_experimental_option("prefs", prefs)
 # OPTIONS.add_argument("--headless")
 driver = webdriver.Chrome(
                             executable_path='chromedriver',
@@ -94,13 +126,15 @@ driver.maximize_window()
 
 cookie = {'Cookie': login(args, driver)} # do Login.
 
+# need to load this page JUST to remove the tos warning so it doesnt fuck up everything down the line.
 driver.get(BASE_URL+"/webapps/gradebook/do/student/viewCourses")
-
 try:
     WaitClickable(driver,(By.CLASS_NAME, "button-1")).click()
 except:
     print("no tos warning - skipped")
-SwitchToIFrame(driver, (By.ID, 'mybbCanvas'))
+
+driver.get(BASE_URL+"/webapps/streamViewer/streamViewer?cmd=view&streamName=mygrades")
+save_html(sep.join(path), 'entrypoint', driver.page_source)
 
 # get courseIDs
 courses = driver.find_element_by_id("left_stream_mygrades")\
@@ -118,7 +152,6 @@ for i, course_results in enumerate(courses):
         'url' : course_url
     })
 
-path = ['grades']
 for i, course in enumerate(course_details):
     path.append(course['name']) # course name
     print(course['name'])
@@ -130,12 +163,11 @@ for i, course in enumerate(course_details):
     }
     """)
 
+    WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
     WaitClickable(driver,(By.XPATH,"//a[@value='A']")).click()
 
     table = driver.find_elements_by_xpath("//div[@id='grades_wrapper']/div")
 
-    save_html("/".join(path), path[0], driver.page_source)
-
     for i, assignment in enumerate(table):
         print(i)
         buttons = assignment.find_elements_by_tag_name("input")
@@ -151,14 +183,17 @@ for i, course in enumerate(course_details):
         path.append(assignment_name)
         # download information if it exists.
         if information_link:
-            ActionChains(driver).move_to_element(block).click(block).perform()
-            print("Switched "+assignment_name)
-            WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
-            driver.switch_to.window(driver.window_handles[1])
-            save_html("/".join(path),"information",driver.page_source)
-            scrape_further(driver, "/".join(path))
-            driver.close()
-            driver.switch_to.window(driver.window_handles[0])
+            try:
+                ActionChains(driver).move_to_element(block).click(block).perform()
+                print("Switched "+assignment_name)
+                WebDriverWait(driver,10).until(EC.number_of_windows_to_be(2))
+                driver.switch_to.window(driver.window_handles[1])
+                save_html(sep.join(path),"information",driver.page_source)
+                scrape_further(driver, sep.join(path))
+                driver.close()
+                driver.switch_to.window(driver.window_handles[0])
+            except ElementNotInteractableException:
+                print('idk')
         # download rubric if it exists.
         for button in buttons:
             action = button.get_attribute("onclick")
@@ -167,25 +202,26 @@ for i, course in enumerate(course_details):
                 driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
                 driver.switch_to.window(driver.window_handles[1])
                 WaitDiv(driver, (By.CLASS_NAME, "rubricControlContainer"))
-                save_html("/".join(path),"rubric",driver.page_source)
+                save_html(sep.join(path),"rubric",driver.page_source)
                 driver.find_element_by_xpath("//li[@id='listViewTab']/a").click()
                 WaitDiv(driver, (By.CLASS_NAME, "rubricGradingList"))
-                save_html("/".join(path),"list",driver.page_source)
+                save_html(sep.join(path),"list",driver.page_source)
                 detailed_buttons = driver.find_elements_by_xpath("//div[@class='u_controlsWrapper']/input")
                 detailed_buttons[1].click()
                 detailed_buttons[0].click()
-                save_html("/".join(path),"list_detailed",driver.page_source)
+                save_html(sep.join(path),"list_detailed",driver.page_source)
                 driver.close()
                 driver.switch_to.window(driver.window_handles[0])
-        path.pop()
+        path.pop() 
+    save_html(sep.join(path), path[0], driver.page_source)
     WaitClickable(driver,(By.XPATH,"//a[@value='S']")).click()
-    save_html("/".join(path),"submitted",driver.page_source)
+    save_html(sep.join(path),"submitted",driver.page_source)
     try:
         WaitClickable(driver,(By.XPATH,"//div[@id='submissionReceipts']//a")).click()
         WaitClickable(driver,(By.XPATH,"//div[@id='listContainer_itemcount']//a[@class='pagelink']")).click()
     except:
         print('No items?')
-    save_html("/".join(path),"receipts",driver.page_source)
+    save_html(sep.join(path),"receipts",driver.page_source)
     path.pop()
 
 
diff --git a/utils/utils.py b/utils/utils.py
index e53dd4b..068221b 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,5 +1,14 @@
 import pathlib
 import re
+from constants.constants import DL_DIR
+from utils.wait import WaitClickable
+from utils.asset import Asset
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import os
+from pathlib import Path
+import shutil
 
 def friendly_filename(name):
     name = friendly_dirname(name)
@@ -27,6 +36,38 @@ def save_html(dir,filename,page_source):
     with open(file, "w", encoding="utf-8") as f:
         f.write(page_source)
 
+# Why is it so hard to just get the url of a single tab...
+# def get_fast_dl(driver,button):
+#     windows = len(driver.window_handles)
+#     return 
+
+# Because selenium seems to fuck up the url switching to a "download" tab, 
+# I have to use the inbuilt download in chrome :(. That also means no etag/metadata 
+# but to be honest it's using annotate-au.foundations.blackboard.com and not bbcswebdav system 
+# so the tag may not exist in the first place.
+def download_file(dest):
+    d = Path(DL_DIR)
+    time.sleep(2)
+    downloading = True
+    poll = 1.0
+    while downloading:
+        for f in os.listdir(d):
+            if Path(f).suffix == '.crdownload':
+                time.sleep(poll)
+                poll *= 1.5
+                break
+            else:
+                _dest = Path(dest).joinpath("MARKED__"+f)
+                try:
+                    shutil.move(d.joinpath(f),_dest)
+                except shutil.SameFileError:
+                    os.remove(_dest)
+                    shutil.move(d.joinpath(f),_dest)
+
+        if len(os.listdir(d)) == 0:
+            downloading = False
+
+
 # https://stackoverflow.com/a/19040341
 def get_text_excluding_children(driver, element):
     return driver.execute_script("""
diff --git a/utils/wait.py b/utils/wait.py
index a491830..b7eeca7 100644
--- a/utils/wait.py
+++ b/utils/wait.py
@@ -1,7 +1,7 @@
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-
+timeout = 4
 # find_element_safe = lambda name,timeout=30:WebDriverWait(driver, timeout).until(lambda x: x.find_element_by_id(name))
-WaitClickable = lambda driver,locator:WebDriverWait(driver, 10).until(EC.element_to_be_clickable(locator))
-WaitDiv = lambda driver,locator:WebDriverWait(driver, 5).until(EC.presence_of_element_located(locator))
-SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, 5).until(EC.frame_to_be_available_and_switch_to_it(locator))
\ No newline at end of file
+WaitClickable = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
+WaitDiv = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.presence_of_element_located(locator))
+SwitchToIFrame = lambda driver,locator:WebDriverWait(driver, timeout).until(EC.frame_to_be_available_and_switch_to_it(locator))
\ No newline at end of file