Forgot to commit week 8 workshop. Added some fixes to the code too.

This commit is contained in:
Peter 2021-10-03 23:39:24 +08:00
parent c9a9de671b
commit 0b83189faf
8 changed files with 283 additions and 0 deletions

2
.gitignore vendored
View File

@ -2,7 +2,9 @@
!*.* !*.*
!*/ !*/
*.o
*.out *.out
*.zip
.vscode/ .vscode/
.gdb_history .gdb_history

View File

@ -0,0 +1,113 @@
/*
* 6. 🌶 Why did we do this? The performance of a hashtable can be dramatically
better than other data structures, such as arrays, lists and binary trees.
** ANSWER: Hashtables have almost O(1) performance. A traditional O(n) array
lookup is used for each bucket, but because the size of a bucket (n) is
small, this is a relatively small amount of time. The hash function is
used to access one bucket. This function is independent of size
(constant time complexity), so performance will tend to O(1) as the
number of buckets tends to infinity (Assuming a perfect hash function).
For an array, the traditional array search has to potentially go through
n array items, so it has O(n) performance which is worse.
* For a collection of several thousand (a million?) random strings use the
gettimeofday() system-call to measure how long in takes to insert, and to then
find, the same set of strings with both an array and your new hashtable.
*/
#include "benchmark_hashtable.h"
char *random_string(size_t length)
{
char *string = malloc(length);
for (size_t i = 0; i < length; i++) {
string[i] = '0' + rand() % 72;
}
return string;
}
bool find_string_array(char *string, char **arr, size_t length)
{
for (size_t i = 0; i < length; i++) {
if (strcmp(string, arr[i]) == 0) {
return true;
}
}
return false;
}
void intialize_benchmark(char **arr, HASHTABLE *hashtable)
{
for (size_t i = 0; i < TEST_LENGTH; i++) {
char *string = random_string(STRING_LENGTH);
arr[i] = string;
add_string_to_hashtable(hashtable, string);
}
}
bool one_benchmark(char **arr, HASHTABLE *hashtable,
suseconds_t *dt_arr, suseconds_t *dt_hash)
{
char *random_string = arr[rand() % TEST_LENGTH];
struct timeval start;
struct timeval end;
gettimeofday(&start, NULL);
find_string_array(random_string, arr, TEST_LENGTH);
gettimeofday(&end, NULL);
*dt_arr = end.tv_usec - start.tv_usec;
if (end.tv_usec < start.tv_usec) return false;
gettimeofday(&start, NULL);
find_string_in_hashtable(hashtable, random_string);
gettimeofday(&end, NULL);
*dt_hash = end.tv_usec - start.tv_usec;
if (end.tv_usec < start.tv_usec) return false;
return true;
}
void print_stats(suseconds_t *times, size_t length, char *fmt_str)
{
double avg = 0;
suseconds_t min = __LONG_MAX__;
suseconds_t max = 0;
for (size_t i = 0; i < length; i++)
{
avg += times[i]*1.0/length; // LESS PRECISE DUE TO FP MATH.
if (times[i] < min) {
min = times[i];
}
if (times[i] > max) {
max = times[i];
}
}
printf(fmt_str, avg, min, max);
}
void benchmark(char **arr, HASHTABLE *hashtable)
{
suseconds_t times_hash[BENCHMARK_ITERATIONS];
suseconds_t times_array[BENCHMARK_ITERATIONS];
for (size_t i = 0; i < BENCHMARK_ITERATIONS; i++) {
bool success = false;
while (!success) {
success = one_benchmark(arr, hashtable,
&times_array[i], &times_hash[i]);
}
}
print_stats(times_array, BENCHMARK_ITERATIONS, "ARRAY : AVERAGE %lf MIN %d MAX %d\n");
print_stats(times_hash, BENCHMARK_ITERATIONS, "HASHMAP : AVERAGE %lf MIN %d MAX %d\n");
}
int main(int argc, char const *argv[])
{
srand(time(NULL));
HASHTABLE *hashtable = new_hashtable();
char *arr[TEST_LENGTH];
intialize_benchmark(arr,hashtable);
benchmark(arr,hashtable);
return 0;
}

View File

@ -0,0 +1,11 @@
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <time.h>
#include <sys/time.h>
#include "hashtable.h"
#define TEST_LENGTH 100000 // SOME LARGE VALUE.
#define STRING_LENGTH 127
#define BENCHMARK_ITERATIONS 10000

View File

@ -0,0 +1,17 @@
#include "hashstring.h"
// FUNCTION hash_string() ACCEPTS A STRING PARAMETER,
// AND RETURNS AN UNSIGNED 32-BIT INTEGER AS ITS RESULT
//
// see: https://en.cppreference.com/w/c/types/integer
uint32_t hash_string(char *string)
{
uint32_t hash = 0;
while(*string != '\0') {
hash = hash*33 + *string;
++string;
}
return hash;
}

View File

@ -0,0 +1,3 @@
#include <stdint.h>
extern uint32_t hash_string(char*);

View File

@ -0,0 +1,63 @@
#include "hashtable.h"
/*
* LIST DATATYPE
*/
bool find_in_list(LIST *list, char *string)
{
while (list != NULL) {
if (strcmp(string, list->item) == 0) {
return true;
}
list = list->next;
}
return false;
}
LIST *add_to_list(LIST *list, char *string)
{
if (find_in_list(list,string)) {
return list;
}
char *string_copy = strdup(string);
if (string_copy == NULL) {
fprintf(stderr, "strdup failed.\n");
exit(EXIT_FAILURE);
}
LIST *new_node = malloc(sizeof(LIST));
if (new_node == NULL) {
fprintf(stderr, "malloc failed.\n");
exit(EXIT_FAILURE);
}
new_node->item = string_copy;
new_node->next = list;
return new_node;
}
#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
#define NEW_LIST() (NULL)
/*
* HASHTABLE DATATYPE
*/
HASHTABLE *new_hashtable()
{
HASHTABLE *hashtable = malloc( sizeof(HASHTABLE) );
return hashtable;
}
void add_string_to_hashtable(HASHTABLE *hashtable, char *string)
{
size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
ADD_TO_LIST(hashtable->buckets[index], string);
}
bool find_string_in_hashtable(HASHTABLE *hashtable, char *string)
{
size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
return find_in_list(hashtable->buckets[index], string);
}

View File

@ -0,0 +1,32 @@
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include "hashstring.h"
#if defined(__linux__)
extern char *strdup(const char *);
#endif
#define HASHTABLE_N_BUCKETS 1000
typedef struct __list__ {
char *item;
struct __list__ *next;
} LIST;
typedef struct {
LIST *buckets[HASHTABLE_N_BUCKETS];
} HASHTABLE;
extern bool find_in_list(LIST*, char*);
#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
#define NEW_LIST() (NULL)
extern HASHTABLE *new_hashtable();
extern void add_string_to_hashtable(HASHTABLE*, char*);
extern bool find_string_in_hashtable(HASHTABLE*, char*);

View File

@ -0,0 +1,42 @@
#include <stdlib.h>
#include <stdio.h>
#include "hashtable.h"
void print_list(LIST *list)
{
int content = 0;
while (list != NULL) {
printf("%s => ",list->item);
list = list->next;
}
putchar('\n');
}
void print_hashtable(HASHTABLE *hashtable)
{
for (size_t i = 0; i < HASHTABLE_N_BUCKETS; i++) {
if (hashtable->buckets[i] != NULL) {
printf("%lu\t|\t",i);
print_list(hashtable->buckets[i]);
}
}
}
int main(int argc, char const *argv[])
{
LIST *list = NEW_LIST();
ADD_TO_LIST(list, "World");
ADD_TO_LIST(list, "Hello");
print_list(list);
printf("%d\n", find_in_list(list,"Hello"));
printf("%d\n", find_in_list(list,"Hell"));
HASHTABLE *hashtable = new_hashtable();
add_string_to_hashtable(hashtable, "Hello");
printf("%d\n", find_string_in_hashtable(hashtable,"Hell"));
printf("%d\n", find_string_in_hashtable(hashtable,"World"));
add_string_to_hashtable(hashtable, "World");
printf("%d\n", find_string_in_hashtable(hashtable,"World"));
print_hashtable(hashtable);
return 0;
}