mirror of
https://github.com/peter-tanner/Blackboard-marks.git
synced 2024-12-02 20:50:16 +08:00
75 lines
2.2 KiB
Python
75 lines
2.2 KiB
Python
import pathlib
|
|
import re
|
|
from constants.constants import DL_DIR
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
import time
|
|
import os
|
|
from pathlib import Path
|
|
import shutil
|
|
|
|
|
|
def friendly_filename(name):
|
|
name = friendly_dirname(name)
|
|
return re.sub("[\\\/]", '', name)
|
|
|
|
|
|
def friendly_dirname(name):
|
|
# .gsub(/[^\w\s_-]+/, '')
|
|
# .gsub(/\s+/, '_')
|
|
# pipeline:
|
|
name = re.sub("[\x00-\x1f]", '', name)
|
|
name = re.sub("[\:\<\>\"\|\?\*]", '', name)
|
|
name = re.sub("(^|\b\s)\s+($|\s?\b)", '\\1\\2', name)
|
|
return name.strip()
|
|
|
|
|
|
def get_assignment_name(driver, block):
|
|
s = friendly_filename(get_text_excluding_children(driver, block))
|
|
print("Assesment: "+s)
|
|
return s
|
|
|
|
|
|
def save_html(dir, filename, page_source):
|
|
dir = pathlib.Path(friendly_dirname(dir))
|
|
dir.mkdir(parents=True, exist_ok=True)
|
|
file = dir.joinpath(friendly_filename(filename)+".html")
|
|
with open(file, "w", encoding="utf-8") as f:
|
|
f.write(page_source)
|
|
|
|
# NOTE: Switching to a "download" tab causes issues so we must use the in built
|
|
# download in Chrome, which does not have etag or metadata information.
|
|
# Files are using annotate-au.foundations.blackboard.com and not bbcswebdav system
|
|
# so the tag may not exist in the first place.
|
|
|
|
|
|
def download_file(dest):
|
|
d = Path(DL_DIR)
|
|
time.sleep(2)
|
|
downloading = True
|
|
poll = 1.0
|
|
while downloading:
|
|
for f in os.listdir(d):
|
|
if Path(f).suffix == '.crdownload':
|
|
time.sleep(poll)
|
|
poll *= 1.5
|
|
break
|
|
else:
|
|
_dest = Path(dest).joinpath("MARKED__"+f)
|
|
try:
|
|
shutil.move(d.joinpath(f), _dest)
|
|
except shutil.SameFileError:
|
|
os.remove(_dest)
|
|
shutil.move(d.joinpath(f), _dest)
|
|
|
|
if len(os.listdir(d)) == 0:
|
|
downloading = False
|
|
|
|
|
|
# https://stackoverflow.com/a/19040341
|
|
def get_text_excluding_children(driver, element):
|
|
return driver.execute_script("""
|
|
return jQuery(arguments[0]).contents().filter(function() {
|
|
return this.nodeType == Node.TEXT_NODE;
|
|
}).text();
|
|
""", element)
|