Forgot to commit week 8 workshop. Added some fixes to the code too.

2024-11-30 10:50:19 +08:00 · 2021-10-03 23:39:24 +08:00 · 2021-10-03 23:39:24 +08:00 · 0b83189faf
commit 0b83189faf
parent c9a9de671b
8 changed files with 283 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,7 +2,9 @@
 !*.*
 !*/
 *.o
 *.out
 *.zip
 .vscode/
 .gdb_history
--- a/8-workshop/benchmark_hashtable.c
+++ b/8-workshop/benchmark_hashtable.c
@ -0,0 +1,113 @@
 /*
 * 6. 🌶 Why did we do this? The performance of a hashtable can be dramatically
  better than other data structures, such as arrays, lists and binary trees.
    ** ANSWER: Hashtables have almost O(1) performance. A traditional O(n) array
       lookup is used for each bucket, but because the size of a bucket (n) is
       small, this is a relatively small amount of time. The hash function is 
       used to access one bucket. This function is independent of size 
       (constant time complexity), so performance will tend to O(1) as the 
       number of buckets tends to infinity (Assuming a perfect hash function).
       For an array, the traditional array search has to potentially go through
       n array items, so it has O(n) performance which is worse.
 * For a collection of several thousand (a million?) random strings use the
  gettimeofday() system-call to measure how long in takes to insert, and to then
  find, the same set of strings with both an array and your new hashtable.
 */
 #include "benchmark_hashtable.h"
 char *random_string(size_t length)
 {
    char *string = malloc(length);
    for (size_t i = 0; i < length; i++) {
        string[i] = '0' + rand() % 72;
    }
    return string;
 }
 bool find_string_array(char *string, char **arr, size_t length)
 {
    for (size_t i = 0; i < length; i++) {
        if (strcmp(string, arr[i]) == 0) {
            return true;
        }
    }
    return false;
 }
 void intialize_benchmark(char **arr, HASHTABLE *hashtable)
 {
    for (size_t i = 0; i < TEST_LENGTH; i++) {
        char *string = random_string(STRING_LENGTH);
        arr[i] = string;
        add_string_to_hashtable(hashtable, string);
    }
 }
 bool one_benchmark(char **arr, HASHTABLE *hashtable,
                   suseconds_t *dt_arr, suseconds_t *dt_hash)
 {
    char *random_string = arr[rand() % TEST_LENGTH];
    struct timeval start;
    struct timeval end;
    gettimeofday(&start, NULL);
    find_string_array(random_string, arr, TEST_LENGTH);
    gettimeofday(&end, NULL);
    *dt_arr = end.tv_usec - start.tv_usec;
    if (end.tv_usec < start.tv_usec) return false;
    gettimeofday(&start, NULL);
    find_string_in_hashtable(hashtable, random_string);
    gettimeofday(&end, NULL);
    *dt_hash = end.tv_usec - start.tv_usec;
    if (end.tv_usec < start.tv_usec) return false;
    return true;
 }
 void print_stats(suseconds_t *times, size_t length, char *fmt_str)
 {
    double avg = 0;
    suseconds_t min = __LONG_MAX__;
    suseconds_t max = 0;
    for (size_t i = 0; i < length; i++)
    {
        avg += times[i]*1.0/length; // LESS PRECISE DUE TO FP MATH.
        if (times[i] < min) {
            min = times[i];
        }
        if (times[i] > max) {
            max = times[i];
        }
    }
    printf(fmt_str, avg, min, max);
 }
 void benchmark(char **arr, HASHTABLE *hashtable)
 {
    suseconds_t times_hash[BENCHMARK_ITERATIONS];
    suseconds_t times_array[BENCHMARK_ITERATIONS];
    for (size_t i = 0; i < BENCHMARK_ITERATIONS; i++) {
        bool success = false;
        while (!success) {
            success = one_benchmark(arr, hashtable,
                      &times_array[i], &times_hash[i]);
        }
    }
    print_stats(times_array, BENCHMARK_ITERATIONS, "ARRAY   : AVERAGE %lf MIN %d MAX %d\n");
    print_stats(times_hash, BENCHMARK_ITERATIONS,  "HASHMAP : AVERAGE %lf MIN %d MAX %d\n");
 }
 int main(int argc, char const *argv[])
 {
    srand(time(NULL));
    HASHTABLE *hashtable = new_hashtable();
    char *arr[TEST_LENGTH];
    intialize_benchmark(arr,hashtable);
    benchmark(arr,hashtable);
    return 0;
 }
--- a/8-workshop/benchmark_hashtable.h
+++ b/8-workshop/benchmark_hashtable.h
@ -0,0 +1,11 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <time.h>
 #include <sys/time.h>
 #include "hashtable.h"
 #define TEST_LENGTH 100000  // SOME LARGE VALUE.
 #define STRING_LENGTH 127
 #define BENCHMARK_ITERATIONS 10000
--- a/8-workshop/hashstring.c
+++ b/8-workshop/hashstring.c
@ -0,0 +1,17 @@
 #include "hashstring.h"
 //  FUNCTION hash_string() ACCEPTS A STRING PARAMETER,
 //  AND RETURNS AN UNSIGNED 32-BIT INTEGER AS ITS RESULT
 //
 //  see:  https://en.cppreference.com/w/c/types/integer
 uint32_t hash_string(char *string)
 {
    uint32_t hash = 0;
    while(*string != '\0') {
        hash = hash*33 + *string;
        ++string;
    }
    return hash;
 }
--- a/8-workshop/hashstring.h
+++ b/8-workshop/hashstring.h
@ -0,0 +1,3 @@
 #include <stdint.h>
 extern uint32_t hash_string(char*);
--- a/8-workshop/hashtable.c
+++ b/8-workshop/hashtable.c
@ -0,0 +1,63 @@
 #include "hashtable.h"
 /*
 *   LIST DATATYPE
 */
 bool find_in_list(LIST *list, char *string)
 {
    while (list != NULL) {
        if (strcmp(string, list->item) == 0) {
            return true;
        }
        list = list->next;
    }
    return false;
 }
 LIST *add_to_list(LIST *list, char *string)
 {
    if (find_in_list(list,string)) {
        return list;
    }
    char *string_copy = strdup(string);
    if (string_copy == NULL) {
        fprintf(stderr, "strdup failed.\n");
        exit(EXIT_FAILURE);
    }
    LIST *new_node = malloc(sizeof(LIST));
    if (new_node == NULL) {
        fprintf(stderr, "malloc failed.\n");
        exit(EXIT_FAILURE);
    }
    new_node->item = string_copy;
    new_node->next = list;
    return new_node;
 }
 #define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
 #define NEW_LIST() (NULL)
 /*
 *   HASHTABLE DATATYPE
 */
 HASHTABLE *new_hashtable()
 {
    HASHTABLE *hashtable = malloc( sizeof(HASHTABLE) );
    return hashtable;
 }
 void add_string_to_hashtable(HASHTABLE *hashtable, char *string)
 {
    size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
    ADD_TO_LIST(hashtable->buckets[index], string);
 }
 bool find_string_in_hashtable(HASHTABLE *hashtable, char *string)
 {
    size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
    return find_in_list(hashtable->buckets[index], string);
 }
--- a/8-workshop/hashtable.h
+++ b/8-workshop/hashtable.h
@ -0,0 +1,32 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <string.h>
 #include "hashstring.h"
 #if defined(__linux__)
 extern char *strdup(const char *);
 #endif
 #define HASHTABLE_N_BUCKETS 1000
 typedef struct __list__ {
    char *item;
    struct __list__ *next;
 } LIST;
 typedef struct {
    LIST *buckets[HASHTABLE_N_BUCKETS];
 } HASHTABLE;
 extern bool find_in_list(LIST*, char*);
 #define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
 #define NEW_LIST() (NULL)
 extern HASHTABLE *new_hashtable();
 extern void add_string_to_hashtable(HASHTABLE*, char*);
 extern bool find_string_in_hashtable(HASHTABLE*, char*);
--- a/8-workshop/test_hashtable.c
+++ b/8-workshop/test_hashtable.c
@ -0,0 +1,42 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "hashtable.h"
 void print_list(LIST *list)
 {
    int content = 0;
    while (list != NULL) {
        printf("%s => ",list->item);
        list = list->next;
    }
    putchar('\n');
 }
 void print_hashtable(HASHTABLE *hashtable)
 {
    for (size_t i = 0; i < HASHTABLE_N_BUCKETS; i++) {
        if (hashtable->buckets[i] != NULL) {
            printf("%lu\t|\t",i);
            print_list(hashtable->buckets[i]);
        }
    }
 }
 int main(int argc, char const *argv[])
 {
    LIST *list = NEW_LIST();
    ADD_TO_LIST(list, "World");
    ADD_TO_LIST(list, "Hello");
    print_list(list);
    printf("%d\n", find_in_list(list,"Hello"));
    printf("%d\n", find_in_list(list,"Hell"));
    HASHTABLE *hashtable = new_hashtable();
    add_string_to_hashtable(hashtable, "Hello");
    printf("%d\n", find_string_in_hashtable(hashtable,"Hell"));
    printf("%d\n", find_string_in_hashtable(hashtable,"World"));
    add_string_to_hashtable(hashtable, "World");
    printf("%d\n", find_string_in_hashtable(hashtable,"World"));
    print_hashtable(hashtable);
    return 0;
 }
		`@ -0,0 +1,3 @@`
							`#include <stdint.h>`

							`extern uint32_t hash_string(char*);`