Forgot to commit week 8 workshop. Added some fixes to the code too.

2024-11-30 10:50:19 +08:00 · 2021-10-03 23:39:24 +08:00 · 2021-10-03 23:39:24 +08:00 · 0b83189faf
commit 0b83189faf
parent c9a9de671b
8 changed files with 283 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,7 +2,9 @@
 !*.*
 !*/

+*.o
 *.out
+*.zip
 .vscode/
 .gdb_history

--- a/8-workshop/benchmark_hashtable.c
+++ b/8-workshop/benchmark_hashtable.c
@ -0,0 +1,113 @@
+/*
+* 6. 🌶 Why did we do this? The performance of a hashtable can be dramatically
+  better than other data structures, such as arrays, lists and binary trees.
+
+    ** ANSWER: Hashtables have almost O(1) performance. A traditional O(n) array
+       lookup is used for each bucket, but because the size of a bucket (n) is
+       small, this is a relatively small amount of time. The hash function is 
+       used to access one bucket. This function is independent of size 
+       (constant time complexity), so performance will tend to O(1) as the 
+       number of buckets tends to infinity (Assuming a perfect hash function).
+       For an array, the traditional array search has to potentially go through
+       n array items, so it has O(n) performance which is worse.
+
+* For a collection of several thousand (a million?) random strings use the
+  gettimeofday() system-call to measure how long in takes to insert, and to then
+  find, the same set of strings with both an array and your new hashtable.
+*/
+
+#include "benchmark_hashtable.h"
+
+char *random_string(size_t length)
+{
+    char *string = malloc(length);
+    for (size_t i = 0; i < length; i++) {
+        string[i] = '0' + rand() % 72;
+    }
+    return string;
+}
+
+bool find_string_array(char *string, char **arr, size_t length)
+{
+    for (size_t i = 0; i < length; i++) {
+        if (strcmp(string, arr[i]) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void intialize_benchmark(char **arr, HASHTABLE *hashtable)
+{
+    for (size_t i = 0; i < TEST_LENGTH; i++) {
+        char *string = random_string(STRING_LENGTH);
+        arr[i] = string;
+        add_string_to_hashtable(hashtable, string);
+    }
+}
+
+bool one_benchmark(char **arr, HASHTABLE *hashtable,
+                   suseconds_t *dt_arr, suseconds_t *dt_hash)
+{
+
+    char *random_string = arr[rand() % TEST_LENGTH];
+    struct timeval start;
+    struct timeval end;
+    gettimeofday(&start, NULL);
+    find_string_array(random_string, arr, TEST_LENGTH);
+    gettimeofday(&end, NULL);
+    *dt_arr = end.tv_usec - start.tv_usec;
+    if (end.tv_usec < start.tv_usec) return false;
+    
+    
+    gettimeofday(&start, NULL);
+    find_string_in_hashtable(hashtable, random_string);
+    gettimeofday(&end, NULL);
+    *dt_hash = end.tv_usec - start.tv_usec;
+    if (end.tv_usec < start.tv_usec) return false;
+
+    return true;
+}
+
+void print_stats(suseconds_t *times, size_t length, char *fmt_str)
+{
+    double avg = 0;
+    suseconds_t min = __LONG_MAX__;
+    suseconds_t max = 0;
+    for (size_t i = 0; i < length; i++)
+    {
+        avg += times[i]*1.0/length; // LESS PRECISE DUE TO FP MATH.
+        if (times[i] < min) {
+            min = times[i];
+        }
+        if (times[i] > max) {
+            max = times[i];
+        }
+    }
+    printf(fmt_str, avg, min, max);
+}
+
+void benchmark(char **arr, HASHTABLE *hashtable)
+{
+    suseconds_t times_hash[BENCHMARK_ITERATIONS];
+    suseconds_t times_array[BENCHMARK_ITERATIONS];
+    for (size_t i = 0; i < BENCHMARK_ITERATIONS; i++) {
+        bool success = false;
+        while (!success) {
+            success = one_benchmark(arr, hashtable,
+                      &times_array[i], &times_hash[i]);
+        }
+    }
+    print_stats(times_array, BENCHMARK_ITERATIONS, "ARRAY   : AVERAGE %lf MIN %d MAX %d\n");
+    print_stats(times_hash, BENCHMARK_ITERATIONS,  "HASHMAP : AVERAGE %lf MIN %d MAX %d\n");
+}
+
+int main(int argc, char const *argv[])
+{
+    srand(time(NULL));
+    HASHTABLE *hashtable = new_hashtable();
+    char *arr[TEST_LENGTH];
+    intialize_benchmark(arr,hashtable);
+    benchmark(arr,hashtable);
+    return 0;
+}
--- a/8-workshop/benchmark_hashtable.h
+++ b/8-workshop/benchmark_hashtable.h
@ -0,0 +1,11 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <time.h>
+#include <sys/time.h>
+
+#include "hashtable.h"
+
+#define TEST_LENGTH 100000  // SOME LARGE VALUE.
+#define STRING_LENGTH 127
+#define BENCHMARK_ITERATIONS 10000
--- a/8-workshop/hashstring.c
+++ b/8-workshop/hashstring.c
@ -0,0 +1,17 @@
+#include "hashstring.h"
+
+//  FUNCTION hash_string() ACCEPTS A STRING PARAMETER,
+//  AND RETURNS AN UNSIGNED 32-BIT INTEGER AS ITS RESULT
+//
+//  see:  https://en.cppreference.com/w/c/types/integer
+
+uint32_t hash_string(char *string)
+{
+    uint32_t hash = 0;
+
+    while(*string != '\0') {
+        hash = hash*33 + *string;
+        ++string;
+    }
+    return hash;
+}
--- a/8-workshop/hashstring.h
+++ b/8-workshop/hashstring.h
@ -0,0 +1,3 @@
+#include <stdint.h>
+
+extern uint32_t hash_string(char*);
--- a/8-workshop/hashtable.c
+++ b/8-workshop/hashtable.c
@ -0,0 +1,63 @@
+#include "hashtable.h"
+
+/*
+*   LIST DATATYPE
+*/
+
+bool find_in_list(LIST *list, char *string)
+{
+    while (list != NULL) {
+        if (strcmp(string, list->item) == 0) {
+            return true;
+        }
+        list = list->next;
+    }
+    return false;
+}
+
+LIST *add_to_list(LIST *list, char *string)
+{
+    if (find_in_list(list,string)) {
+        return list;
+    }
+
+    char *string_copy = strdup(string);
+    if (string_copy == NULL) {
+        fprintf(stderr, "strdup failed.\n");
+        exit(EXIT_FAILURE);
+    }
+
+    LIST *new_node = malloc(sizeof(LIST));
+    if (new_node == NULL) {
+        fprintf(stderr, "malloc failed.\n");
+        exit(EXIT_FAILURE);
+    }
+    new_node->item = string_copy;
+    new_node->next = list;
+    return new_node;
+}
+
+#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
+#define NEW_LIST() (NULL)
+
+/*
+*   HASHTABLE DATATYPE
+*/
+
+HASHTABLE *new_hashtable()
+{
+    HASHTABLE *hashtable = malloc( sizeof(HASHTABLE) );
+    return hashtable;
+}
+
+void add_string_to_hashtable(HASHTABLE *hashtable, char *string)
+{
+    size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
+    ADD_TO_LIST(hashtable->buckets[index], string);
+}
+
+bool find_string_in_hashtable(HASHTABLE *hashtable, char *string)
+{
+    size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
+    return find_in_list(hashtable->buckets[index], string);
+}
--- a/8-workshop/hashtable.h
+++ b/8-workshop/hashtable.h
@ -0,0 +1,32 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include "hashstring.h"
+
+#if defined(__linux__)
+extern char *strdup(const char *);
+#endif
+
+#define HASHTABLE_N_BUCKETS 1000
+
+typedef struct __list__ {
+    char *item;
+    struct __list__ *next;
+} LIST;
+
+typedef struct {
+    LIST *buckets[HASHTABLE_N_BUCKETS];
+} HASHTABLE;
+
+extern bool find_in_list(LIST*, char*);
+
+#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
+
+#define NEW_LIST() (NULL)
+
+extern HASHTABLE *new_hashtable();
+
+extern void add_string_to_hashtable(HASHTABLE*, char*);
+
+extern bool find_string_in_hashtable(HASHTABLE*, char*);
--- a/8-workshop/test_hashtable.c
+++ b/8-workshop/test_hashtable.c
@ -0,0 +1,42 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "hashtable.h"
+
+void print_list(LIST *list)
+{
+    int content = 0;
+    while (list != NULL) {
+        printf("%s => ",list->item);
+        list = list->next;
+    }
+    putchar('\n');
+}
+
+void print_hashtable(HASHTABLE *hashtable)
+{
+    for (size_t i = 0; i < HASHTABLE_N_BUCKETS; i++) {
+        if (hashtable->buckets[i] != NULL) {
+            printf("%lu\t|\t",i);
+            print_list(hashtable->buckets[i]);
+        }
+    }
+}
+
+int main(int argc, char const *argv[])
+{
+    LIST *list = NEW_LIST();
+    ADD_TO_LIST(list, "World");
+    ADD_TO_LIST(list, "Hello");
+    print_list(list);
+    printf("%d\n", find_in_list(list,"Hello"));
+    printf("%d\n", find_in_list(list,"Hell"));
+
+    HASHTABLE *hashtable = new_hashtable();
+    add_string_to_hashtable(hashtable, "Hello");
+    printf("%d\n", find_string_in_hashtable(hashtable,"Hell"));
+    printf("%d\n", find_string_in_hashtable(hashtable,"World"));
+    add_string_to_hashtable(hashtable, "World");
+    printf("%d\n", find_string_in_hashtable(hashtable,"World"));
+    print_hashtable(hashtable);
+    return 0;
+}