Blackboard-marks/utils/utils.py

import pathlib
import re
from constants.constants import DL_DIR
from selenium.webdriver.support import expected_conditions as EC
import time
import os
from pathlib import Path
import shutil


def friendly_filename(name):
    name = friendly_dirname(name)
    return re.sub("[\\\/]", '', name)


def friendly_dirname(name):
    # .gsub(/[^\w\s_-]+/, '')
    # .gsub(/\s+/, '_')
    # pipeline:
    name = re.sub("[\x00-\x1f]", '', name)
    name = re.sub("[\:\<\>\"\|\?\*]", '', name)
    name = re.sub("(^|\b\s)\s+($|\s?\b)", '\\1\\2', name)
    return name.strip()


def get_assignment_name(driver, block):
    s = friendly_filename(get_text_excluding_children(driver, block))
    print("Assesment: "+s)
    return s


def save_html(dir, filename, page_source):
    dir = pathlib.Path(friendly_dirname(dir))
    dir.mkdir(parents=True, exist_ok=True)
    file = dir.joinpath(friendly_filename(filename)+".html")
    with open(file, "w", encoding="utf-8") as f:
        f.write(page_source)

# NOTE: Switching to a "download" tab causes issues so we must use the in built
# download in Chrome, which does not have etag or metadata information.
# Files are using annotate-au.foundations.blackboard.com and not bbcswebdav system
# so the tag may not exist in the first place.


def download_file(dest):
    d = Path(DL_DIR)
    time.sleep(2)
    downloading = True
    poll = 1.0
    while downloading:
        for f in os.listdir(d):
            if Path(f).suffix == '.crdownload':
                time.sleep(poll)
                poll *= 1.5
                break
            else:
                _dest = Path(dest).joinpath("MARKED__"+f)
                try:
                    shutil.move(d.joinpath(f), _dest)
                except shutil.SameFileError:
                    os.remove(_dest)
                    shutil.move(d.joinpath(f), _dest)

        if len(os.listdir(d)) == 0:
            downloading = False


# https://stackoverflow.com/a/19040341
def get_text_excluding_children(driver, element):
    return driver.execute_script("""
    return jQuery(arguments[0]).contents().filter(function() {
        return this.nodeType == Node.TEXT_NODE;
    }).text();
    """, element)