mirror of
https://github.com/peter-tanner/Systems-programming-labs.git
synced 2024-11-30 10:50:19 +08:00
Forgot to commit week 8 workshop. Added some fixes to the code too.
This commit is contained in:
parent
c9a9de671b
commit
0b83189faf
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -2,7 +2,9 @@
|
|||
!*.*
|
||||
!*/
|
||||
|
||||
*.o
|
||||
*.out
|
||||
*.zip
|
||||
.vscode/
|
||||
.gdb_history
|
||||
|
||||
|
|
113
Week 8-workshop/benchmark_hashtable.c
Normal file
113
Week 8-workshop/benchmark_hashtable.c
Normal file
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* 6. 🌶 Why did we do this? The performance of a hashtable can be dramatically
|
||||
better than other data structures, such as arrays, lists and binary trees.
|
||||
|
||||
** ANSWER: Hashtables have almost O(1) performance. A traditional O(n) array
|
||||
lookup is used for each bucket, but because the size of a bucket (n) is
|
||||
small, this is a relatively small amount of time. The hash function is
|
||||
used to access one bucket. This function is independent of size
|
||||
(constant time complexity), so performance will tend to O(1) as the
|
||||
number of buckets tends to infinity (Assuming a perfect hash function).
|
||||
For an array, the traditional array search has to potentially go through
|
||||
n array items, so it has O(n) performance which is worse.
|
||||
|
||||
* For a collection of several thousand (a million?) random strings use the
|
||||
gettimeofday() system-call to measure how long in takes to insert, and to then
|
||||
find, the same set of strings with both an array and your new hashtable.
|
||||
*/
|
||||
|
||||
#include "benchmark_hashtable.h"
|
||||
|
||||
char *random_string(size_t length)
|
||||
{
|
||||
char *string = malloc(length);
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
string[i] = '0' + rand() % 72;
|
||||
}
|
||||
return string;
|
||||
}
|
||||
|
||||
bool find_string_array(char *string, char **arr, size_t length)
|
||||
{
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (strcmp(string, arr[i]) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void intialize_benchmark(char **arr, HASHTABLE *hashtable)
|
||||
{
|
||||
for (size_t i = 0; i < TEST_LENGTH; i++) {
|
||||
char *string = random_string(STRING_LENGTH);
|
||||
arr[i] = string;
|
||||
add_string_to_hashtable(hashtable, string);
|
||||
}
|
||||
}
|
||||
|
||||
bool one_benchmark(char **arr, HASHTABLE *hashtable,
|
||||
suseconds_t *dt_arr, suseconds_t *dt_hash)
|
||||
{
|
||||
|
||||
char *random_string = arr[rand() % TEST_LENGTH];
|
||||
struct timeval start;
|
||||
struct timeval end;
|
||||
gettimeofday(&start, NULL);
|
||||
find_string_array(random_string, arr, TEST_LENGTH);
|
||||
gettimeofday(&end, NULL);
|
||||
*dt_arr = end.tv_usec - start.tv_usec;
|
||||
if (end.tv_usec < start.tv_usec) return false;
|
||||
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
find_string_in_hashtable(hashtable, random_string);
|
||||
gettimeofday(&end, NULL);
|
||||
*dt_hash = end.tv_usec - start.tv_usec;
|
||||
if (end.tv_usec < start.tv_usec) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void print_stats(suseconds_t *times, size_t length, char *fmt_str)
|
||||
{
|
||||
double avg = 0;
|
||||
suseconds_t min = __LONG_MAX__;
|
||||
suseconds_t max = 0;
|
||||
for (size_t i = 0; i < length; i++)
|
||||
{
|
||||
avg += times[i]*1.0/length; // LESS PRECISE DUE TO FP MATH.
|
||||
if (times[i] < min) {
|
||||
min = times[i];
|
||||
}
|
||||
if (times[i] > max) {
|
||||
max = times[i];
|
||||
}
|
||||
}
|
||||
printf(fmt_str, avg, min, max);
|
||||
}
|
||||
|
||||
void benchmark(char **arr, HASHTABLE *hashtable)
|
||||
{
|
||||
suseconds_t times_hash[BENCHMARK_ITERATIONS];
|
||||
suseconds_t times_array[BENCHMARK_ITERATIONS];
|
||||
for (size_t i = 0; i < BENCHMARK_ITERATIONS; i++) {
|
||||
bool success = false;
|
||||
while (!success) {
|
||||
success = one_benchmark(arr, hashtable,
|
||||
×_array[i], ×_hash[i]);
|
||||
}
|
||||
}
|
||||
print_stats(times_array, BENCHMARK_ITERATIONS, "ARRAY : AVERAGE %lf MIN %d MAX %d\n");
|
||||
print_stats(times_hash, BENCHMARK_ITERATIONS, "HASHMAP : AVERAGE %lf MIN %d MAX %d\n");
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
srand(time(NULL));
|
||||
HASHTABLE *hashtable = new_hashtable();
|
||||
char *arr[TEST_LENGTH];
|
||||
intialize_benchmark(arr,hashtable);
|
||||
benchmark(arr,hashtable);
|
||||
return 0;
|
||||
}
|
11
Week 8-workshop/benchmark_hashtable.h
Normal file
11
Week 8-workshop/benchmark_hashtable.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "hashtable.h"
|
||||
|
||||
#define TEST_LENGTH 100000 // SOME LARGE VALUE.
|
||||
#define STRING_LENGTH 127
|
||||
#define BENCHMARK_ITERATIONS 10000
|
17
Week 8-workshop/hashstring.c
Normal file
17
Week 8-workshop/hashstring.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
#include "hashstring.h"
|
||||
|
||||
// FUNCTION hash_string() ACCEPTS A STRING PARAMETER,
|
||||
// AND RETURNS AN UNSIGNED 32-BIT INTEGER AS ITS RESULT
|
||||
//
|
||||
// see: https://en.cppreference.com/w/c/types/integer
|
||||
|
||||
uint32_t hash_string(char *string)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
while(*string != '\0') {
|
||||
hash = hash*33 + *string;
|
||||
++string;
|
||||
}
|
||||
return hash;
|
||||
}
|
3
Week 8-workshop/hashstring.h
Normal file
3
Week 8-workshop/hashstring.h
Normal file
|
@ -0,0 +1,3 @@
|
|||
#include <stdint.h>
|
||||
|
||||
extern uint32_t hash_string(char*);
|
63
Week 8-workshop/hashtable.c
Normal file
63
Week 8-workshop/hashtable.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
#include "hashtable.h"
|
||||
|
||||
/*
|
||||
* LIST DATATYPE
|
||||
*/
|
||||
|
||||
bool find_in_list(LIST *list, char *string)
|
||||
{
|
||||
while (list != NULL) {
|
||||
if (strcmp(string, list->item) == 0) {
|
||||
return true;
|
||||
}
|
||||
list = list->next;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
LIST *add_to_list(LIST *list, char *string)
|
||||
{
|
||||
if (find_in_list(list,string)) {
|
||||
return list;
|
||||
}
|
||||
|
||||
char *string_copy = strdup(string);
|
||||
if (string_copy == NULL) {
|
||||
fprintf(stderr, "strdup failed.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
LIST *new_node = malloc(sizeof(LIST));
|
||||
if (new_node == NULL) {
|
||||
fprintf(stderr, "malloc failed.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
new_node->item = string_copy;
|
||||
new_node->next = list;
|
||||
return new_node;
|
||||
}
|
||||
|
||||
#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
|
||||
#define NEW_LIST() (NULL)
|
||||
|
||||
/*
|
||||
* HASHTABLE DATATYPE
|
||||
*/
|
||||
|
||||
HASHTABLE *new_hashtable()
|
||||
{
|
||||
HASHTABLE *hashtable = malloc( sizeof(HASHTABLE) );
|
||||
return hashtable;
|
||||
}
|
||||
|
||||
void add_string_to_hashtable(HASHTABLE *hashtable, char *string)
|
||||
{
|
||||
size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
|
||||
ADD_TO_LIST(hashtable->buckets[index], string);
|
||||
}
|
||||
|
||||
bool find_string_in_hashtable(HASHTABLE *hashtable, char *string)
|
||||
{
|
||||
size_t index = hash_string(string) % HASHTABLE_N_BUCKETS;
|
||||
return find_in_list(hashtable->buckets[index], string);
|
||||
}
|
32
Week 8-workshop/hashtable.h
Normal file
32
Week 8-workshop/hashtable.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include "hashstring.h"
|
||||
|
||||
#if defined(__linux__)
|
||||
extern char *strdup(const char *);
|
||||
#endif
|
||||
|
||||
#define HASHTABLE_N_BUCKETS 1000
|
||||
|
||||
typedef struct __list__ {
|
||||
char *item;
|
||||
struct __list__ *next;
|
||||
} LIST;
|
||||
|
||||
typedef struct {
|
||||
LIST *buckets[HASHTABLE_N_BUCKETS];
|
||||
} HASHTABLE;
|
||||
|
||||
extern bool find_in_list(LIST*, char*);
|
||||
|
||||
#define ADD_TO_LIST(list,string) ((list) = add_to_list((list),(string)))
|
||||
|
||||
#define NEW_LIST() (NULL)
|
||||
|
||||
extern HASHTABLE *new_hashtable();
|
||||
|
||||
extern void add_string_to_hashtable(HASHTABLE*, char*);
|
||||
|
||||
extern bool find_string_in_hashtable(HASHTABLE*, char*);
|
42
Week 8-workshop/test_hashtable.c
Normal file
42
Week 8-workshop/test_hashtable.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "hashtable.h"
|
||||
|
||||
void print_list(LIST *list)
|
||||
{
|
||||
int content = 0;
|
||||
while (list != NULL) {
|
||||
printf("%s => ",list->item);
|
||||
list = list->next;
|
||||
}
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
void print_hashtable(HASHTABLE *hashtable)
|
||||
{
|
||||
for (size_t i = 0; i < HASHTABLE_N_BUCKETS; i++) {
|
||||
if (hashtable->buckets[i] != NULL) {
|
||||
printf("%lu\t|\t",i);
|
||||
print_list(hashtable->buckets[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char const *argv[])
|
||||
{
|
||||
LIST *list = NEW_LIST();
|
||||
ADD_TO_LIST(list, "World");
|
||||
ADD_TO_LIST(list, "Hello");
|
||||
print_list(list);
|
||||
printf("%d\n", find_in_list(list,"Hello"));
|
||||
printf("%d\n", find_in_list(list,"Hell"));
|
||||
|
||||
HASHTABLE *hashtable = new_hashtable();
|
||||
add_string_to_hashtable(hashtable, "Hello");
|
||||
printf("%d\n", find_string_in_hashtable(hashtable,"Hell"));
|
||||
printf("%d\n", find_string_in_hashtable(hashtable,"World"));
|
||||
add_string_to_hashtable(hashtable, "World");
|
||||
printf("%d\n", find_string_in_hashtable(hashtable,"World"));
|
||||
print_hashtable(hashtable);
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user