diff --git a/.gitignore b/.gitignore index 6f34b18..8cec509 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,9 @@ !*.* !*/ +*.o *.out +*.zip .vscode/ .gdb_history diff --git a/Week 8-workshop/benchmark_hashtable.c b/Week 8-workshop/benchmark_hashtable.c new file mode 100644 index 0000000..3c5784d --- /dev/null +++ b/Week 8-workshop/benchmark_hashtable.c @@ -0,0 +1,113 @@ +/* +* 6. 🌶 Why did we do this? The performance of a hashtable can be dramatically + better than other data structures, such as arrays, lists and binary trees. + + ** ANSWER: Hashtables have almost O(1) performance. A traditional O(n) array + lookup is used for each bucket, but because the size of a bucket (n) is + small, this is a relatively small amount of time. The hash function is + used to access one bucket. This function is independent of size + (constant time complexity), so performance will tend to O(1) as the + number of buckets tends to infinity (Assuming a perfect hash function). + For an array, the traditional array search has to potentially go through + n array items, so it has O(n) performance which is worse. + +* For a collection of several thousand (a million?) random strings use the + gettimeofday() system-call to measure how long in takes to insert, and to then + find, the same set of strings with both an array and your new hashtable. +*/ + +#include "benchmark_hashtable.h" + +char *random_string(size_t length) +{ + char *string = malloc(length); + for (size_t i = 0; i < length; i++) { + string[i] = '0' + rand() % 72; + } + return string; +} + +bool find_string_array(char *string, char **arr, size_t length) +{ + for (size_t i = 0; i < length; i++) { + if (strcmp(string, arr[i]) == 0) { + return true; + } + } + return false; +} + +void intialize_benchmark(char **arr, HASHTABLE *hashtable) +{ + for (size_t i = 0; i < TEST_LENGTH; i++) { + char *string = random_string(STRING_LENGTH); + arr[i] = string; + add_string_to_hashtable(hashtable, string); + } +} + +bool one_benchmark(char **arr, HASHTABLE *hashtable, + suseconds_t *dt_arr, suseconds_t *dt_hash) +{ + + char *random_string = arr[rand() % TEST_LENGTH]; + struct timeval start; + struct timeval end; + gettimeofday(&start, NULL); + find_string_array(random_string, arr, TEST_LENGTH); + gettimeofday(&end, NULL); + *dt_arr = end.tv_usec - start.tv_usec; + if (end.tv_usec < start.tv_usec) return false; + + + gettimeofday(&start, NULL); + find_string_in_hashtable(hashtable, random_string); + gettimeofday(&end, NULL); + *dt_hash = end.tv_usec - start.tv_usec; + if (end.tv_usec < start.tv_usec) return false; + + return true; +} + +void print_stats(suseconds_t *times, size_t length, char *fmt_str) +{ + double avg = 0; + suseconds_t min = __LONG_MAX__; + suseconds_t max = 0; + for (size_t i = 0; i < length; i++) + { + avg += times[i]*1.0/length; // LESS PRECISE DUE TO FP MATH. + if (times[i] < min) { + min = times[i]; + } + if (times[i] > max) { + max = times[i]; + } + } + printf(fmt_str, avg, min, max); +} + +void benchmark(char **arr, HASHTABLE *hashtable) +{ + suseconds_t times_hash[BENCHMARK_ITERATIONS]; + suseconds_t times_array[BENCHMARK_ITERATIONS]; + for (size_t i = 0; i < BENCHMARK_ITERATIONS; i++) { + bool success = false; + while (!success) { + success = one_benchmark(arr, hashtable, + ×_array[i], ×_hash[i]); + } + } + print_stats(times_array, BENCHMARK_ITERATIONS, "ARRAY : AVERAGE %lf MIN %d MAX %d\n"); + print_stats(times_hash, BENCHMARK_ITERATIONS, "HASHMAP : AVERAGE %lf MIN %d MAX %d\n"); +} + +int main(int argc, char const *argv[]) +{ + srand(time(NULL)); + HASHTABLE *hashtable = new_hashtable(); + char *arr[TEST_LENGTH]; + intialize_benchmark(arr,hashtable); + benchmark(arr,hashtable); + return 0; +} \ No newline at end of file diff --git a/Week 8-workshop/benchmark_hashtable.h b/Week 8-workshop/benchmark_hashtable.h new file mode 100644 index 0000000..9c1d9eb --- /dev/null +++ b/Week 8-workshop/benchmark_hashtable.h @@ -0,0 +1,11 @@ +#include +#include +#include +#include +#include + +#include "hashtable.h" + +#define TEST_LENGTH 100000 // SOME LARGE VALUE. +#define STRING_LENGTH 127 +#define BENCHMARK_ITERATIONS 10000 diff --git a/Week 8-workshop/hashstring.c b/Week 8-workshop/hashstring.c new file mode 100644 index 0000000..0208bfe --- /dev/null +++ b/Week 8-workshop/hashstring.c @@ -0,0 +1,17 @@ +#include "hashstring.h" + +// FUNCTION hash_string() ACCEPTS A STRING PARAMETER, +// AND RETURNS AN UNSIGNED 32-BIT INTEGER AS ITS RESULT +// +// see: https://en.cppreference.com/w/c/types/integer + +uint32_t hash_string(char *string) +{ + uint32_t hash = 0; + + while(*string != '\0') { + hash = hash*33 + *string; + ++string; + } + return hash; +} diff --git a/Week 8-workshop/hashstring.h b/Week 8-workshop/hashstring.h new file mode 100644 index 0000000..44fb215 --- /dev/null +++ b/Week 8-workshop/hashstring.h @@ -0,0 +1,3 @@ +#include + +extern uint32_t hash_string(char*); \ No newline at end of file diff --git a/Week 8-workshop/hashtable.c b/Week 8-workshop/hashtable.c new file mode 100644 index 0000000..3b3c7de --- /dev/null +++ b/Week 8-workshop/hashtable.c @@ -0,0 +1,63 @@ +#include "hashtable.h" + +/* +* LIST DATATYPE +*/ + +bool find_in_list(LIST *list, char *string) +{ + while (list != NULL) { + if (strcmp(string, list->item) == 0) { + return true; + } + list = list->next; + } + return false; +} + +LIST *add_to_list(LIST *list, char *string) +{ + if (find_in_list(list,string)) { + return list; + } + + char *string_copy = strdup(string); + if (string_copy == NULL) { + fprintf(stderr, "strdup failed.\n"); + exit(EXIT_FAILURE); + } + + LIST *new_node = malloc(sizeof(LIST)); + if (new_node == NULL) { + fprintf(stderr, "malloc failed.\n"); + exit(EXIT_FAILURE); + } + new_node->item = string_copy; + new_node->next = list; + return new_node; +} + +#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string))) +#define NEW_LIST() (NULL) + +/* +* HASHTABLE DATATYPE +*/ + +HASHTABLE *new_hashtable() +{ + HASHTABLE *hashtable = malloc( sizeof(HASHTABLE) ); + return hashtable; +} + +void add_string_to_hashtable(HASHTABLE *hashtable, char *string) +{ + size_t index = hash_string(string) % HASHTABLE_N_BUCKETS; + ADD_TO_LIST(hashtable->buckets[index], string); +} + +bool find_string_in_hashtable(HASHTABLE *hashtable, char *string) +{ + size_t index = hash_string(string) % HASHTABLE_N_BUCKETS; + return find_in_list(hashtable->buckets[index], string); +} \ No newline at end of file diff --git a/Week 8-workshop/hashtable.h b/Week 8-workshop/hashtable.h new file mode 100644 index 0000000..8f11695 --- /dev/null +++ b/Week 8-workshop/hashtable.h @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "hashstring.h" + +#if defined(__linux__) +extern char *strdup(const char *); +#endif + +#define HASHTABLE_N_BUCKETS 1000 + +typedef struct __list__ { + char *item; + struct __list__ *next; +} LIST; + +typedef struct { + LIST *buckets[HASHTABLE_N_BUCKETS]; +} HASHTABLE; + +extern bool find_in_list(LIST*, char*); + +#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string))) + +#define NEW_LIST() (NULL) + +extern HASHTABLE *new_hashtable(); + +extern void add_string_to_hashtable(HASHTABLE*, char*); + +extern bool find_string_in_hashtable(HASHTABLE*, char*); diff --git a/Week 8-workshop/test_hashtable.c b/Week 8-workshop/test_hashtable.c new file mode 100644 index 0000000..a1fa3a4 --- /dev/null +++ b/Week 8-workshop/test_hashtable.c @@ -0,0 +1,42 @@ +#include +#include +#include "hashtable.h" + +void print_list(LIST *list) +{ + int content = 0; + while (list != NULL) { + printf("%s => ",list->item); + list = list->next; + } + putchar('\n'); +} + +void print_hashtable(HASHTABLE *hashtable) +{ + for (size_t i = 0; i < HASHTABLE_N_BUCKETS; i++) { + if (hashtable->buckets[i] != NULL) { + printf("%lu\t|\t",i); + print_list(hashtable->buckets[i]); + } + } +} + +int main(int argc, char const *argv[]) +{ + LIST *list = NEW_LIST(); + ADD_TO_LIST(list, "World"); + ADD_TO_LIST(list, "Hello"); + print_list(list); + printf("%d\n", find_in_list(list,"Hello")); + printf("%d\n", find_in_list(list,"Hell")); + + HASHTABLE *hashtable = new_hashtable(); + add_string_to_hashtable(hashtable, "Hello"); + printf("%d\n", find_string_in_hashtable(hashtable,"Hell")); + printf("%d\n", find_string_in_hashtable(hashtable,"World")); + add_string_to_hashtable(hashtable, "World"); + printf("%d\n", find_string_in_hashtable(hashtable,"World")); + print_hashtable(hashtable); + return 0; +} \ No newline at end of file