Browse Source

Very basic and inefficient on-disk hash table using mmap

Getty Ritter 8 years ago
commit
c9394a6bc9
3 changed files with 324 additions and 0 deletions
  1. 7 0
      Makefile
  2. 260 0
      corned_beef.c
  3. 57 0
      corned_beef.h

+ 7 - 0
Makefile

@@ -0,0 +1,7 @@
+all: corned_beef
+
+corned_beef: corned_beef.c corned_beef.h
+	$(CC) $< -o $@
+
+clean:
+	rm -f corned_beef

+ 260 - 0
corned_beef.c

@@ -0,0 +1,260 @@
+#include "corned_beef.h"
+
+/* mmap'ed memory has to be aligned to the page size of the ambient
+ * system, so we'll just use that as the granularity for individual
+ * chunks. */
+#define PAGE_SZ (sysconf(_SC_PAGE_SIZE))
+/* An intermediate node in our 'hash table' has to fit onto a page,
+ * and it'll contain two size_t values to act as 'pointers' as well
+ * as a null terminator, so the longest a key can be is this. A
+ * value could be slightly longer, but we can cap it at this, too,
+ * because that'll only cost us a handful of bytes. */
+#define MAX_STR_SIZE (sysconf(_SC_PAGE_SIZE) - sizeof(size_t) * 2 - 1)
+
+/* This is the terrible hash function we will use to hash strings. */
+unsigned char
+very_bad_hash(char* input)
+{
+	unsigned char ret = 0;
+	while (*input)
+		ret += *(input++);
+	return ret;
+}
+
+cb_handle
+corned_beef_open(char* filename)
+{
+	/* Our handle contains both the file descriptor and the mmap'ed
+     * index page of our hash table. That way, we can get easy access
+     * to both! */
+	cb_handle handle;
+	/* Here we're opening a file read-write; I could be more granular
+     * in my permissions, but... eh... */
+	handle.file = open(filename, O_RDWR | O_CREAT, 0644);
+	/* And here's mmap itself! The memory it returns corresponds exactly
+	 * to memory on the disk, so any changes that we make to the
+     * in-memory representation will get propagated back to the disk. */
+	handle.idx = mmap(NULL, /* This parameter indicates where we want the
+							 * mmap'ed memory to live in the address space,
+							 * but it's really just a hint. If we leave it
+							 * as NULL, then the OS will give us a pointer
+							 * that's aligned appropriate and leave it at
+							 * that. */
+					  PAGE_SZ, /* This parameter indicates how big we want
+								* the mapping to be: we'll just use the
+								* page size, to keep things uniform. This
+								* does mean we're probably making a pretty
+								* sparse file, if most of our keys/values
+								* are small. */
+					  PROT_READ | PROT_WRITE, /* This parameter tells us
+											   * we want to both read from
+											   * and write to this memory,
+											   * and consequently the file.
+											   * This needs to be consistent
+											   * with the mode we used to
+											   * open the file! */
+					  MAP_SHARED, /* The MAP_SHARED parameter allows us to
+								   * share the memory with other processes
+								   * and also back onto the disk. There are
+								   * a lot of other flags we could choose
+								   * here, too. Look 'em up! */
+					  handle.file, /* The file that this memory corresponds
+									* to: the one we just opened. */
+					  0); /* And the offset into the file. This needs to be
+						   * a multiple of the page size, but in this case,
+						   * we're filling in the index, which will always
+						   * be on the first page. */
+	return handle;
+}
+
+void
+corned_beef_close(cb_handle handle)
+{
+	/* To close our handle, unmap the index file... */
+	munmap(handle.idx, PAGE_SZ);
+	/* And close the file itself. */
+	close(handle.file);
+}
+
+int
+corned_beef_init(cb_handle handle)
+{
+	int i;
+	/* To initializee our database, start by resizing the file to exactly
+     * one page. I'm assuming that the cb_index structure is smaller than
+     * a page, which is fine for me but not a good thing in general. */
+	ftruncate(handle.file, PAGE_SZ);
+	/* Zero all the bucket pointers, because all the buckets are empty. */
+	for (i = 0; i < 0xff; i++)
+		handle.idx->list_head[i] = 0;
+	/* And if we get another page, we should start counting at one. */
+	handle.idx->next_free_page = 1;
+}
+
+void*
+corned_beef_get_page(cb_handle handle, size_t offset)
+{
+	/* This is simple enough: mmap the page that corresponds to the
+     * given offset. */
+	return mmap(NULL, /* allocate it wherever the kernel wants... */
+				PAGE_SZ, /* make it PAGE_SZ bytes... */
+				PROT_READ | PROT_WRITE, /* allow reading and writing... */
+				MAP_SHARED, /* share it, so changes get written back... */
+				handle.file, /* use the file descriptor in the handle... */
+				offset * PAGE_SZ); /* And use the specified byte offset. */
+}
+
+size_t
+corned_beef_new_page(cb_handle handle, void** dest)
+{
+	/* Our index page keeps track of the next unused index, so we take
+	 * that one, and bump it up for the next use... */
+	size_t new_page = handle.idx->next_free_page;
+	handle.idx->next_free_page += 1;
+	/* Resize the file so we have the space to actually use the new
+	 * page... */
+	ftruncate(handle.file, PAGE_SZ * (new_page + 1));
+	/* Set the dest pointer to point to the mmap'ed chunk of the file
+	 * that corresponds to the new page... */
+	*dest = corned_beef_get_page(handle, new_page);
+	/* And return the index into the new page. */
+	return new_page;
+}
+
+int
+corned_beef_lookup(cb_handle handle, char* key)
+{
+	/* When looking up, we start by finding the right bucket to
+	 * start our search. */
+	size_t ptr = handle.idx->list_head[very_bad_hash(key)];
+
+	/* It's possible that the bucket is empty, in which case this
+     * loop will get skipped. */
+	while (ptr) {
+		/* We find the page corresponding to the current index */
+		cb_node* node = corned_beef_get_page(handle, ptr);
+
+		/* ...and check to see whether we've found the right key. */
+		if (strcmp(key, node->key) == 0) {
+			/* If so, we grab the page pointed to by the value and
+			 * print it to stdout. */
+			char* val = corned_beef_get_page(handle, node->val_page);
+
+			printf("%s\n", val);
+			munmap(val, PAGE_SZ);
+			munmap(node, PAGE_SZ);
+
+			return 0;
+		} else {
+			/* Otherwise, we move on to the next element in the list. */
+			ptr = node->next;
+			munmap(node, PAGE_SZ);
+		}
+	}
+
+	fprintf(stderr, "Unable to find value for key \"%s\"\n", key);
+	
+	return 99;
+}
+
+size_t
+corned_beef_add_node(cb_handle handle, char* key, char* val)
+{
+	cb_node* new;
+	char* dest;
+	/* We create a new page and get its index. This new page will
+     * contain our new node */
+	size_t result = corned_beef_new_page(handle, (void**) &new);
+
+	/* the next index is zero, because this is at the current end
+     * of the list. */
+	new->next = 0;
+	/* The key we can copy into the memory. */
+	strcpy(new->key, key);
+	/* And the value is on a different page, which create here */
+	new->val_page = corned_beef_new_page(handle, (void**) &dest);
+	/* Again, copying the value to the new page is trivial. */
+	strcpy(dest, val);
+
+	munmap(dest, PAGE_SZ);
+	munmap(new, PAGE_SZ);
+	
+	return result;
+}
+
+int
+corned_beef_insert(cb_handle handle, char* key, char* val)
+{
+	/* We figure out which bucket to start looking in, which will give us
+     * a page offset into the file. */
+	unsigned char hash = very_bad_hash(key);
+	size_t ptr = handle.idx->list_head[hash];
+
+	if (!ptr) {
+		/* Zero is not a valid page offset---that's where our index page
+         * is---so if the page offset is zero, just add a new node here. */
+		handle.idx->list_head[hash] = corned_beef_add_node(handle, key, val);
+		return 0;
+	}
+
+	while (ptr) {
+		/* Otherwise, fetch the specified node and start moving along. */
+		cb_node* node = corned_beef_get_page(handle, ptr);
+
+		if (strcmp(key, node->key) == 0) {
+			/* If the key we're looking for already exists, then we can
+			 * grab the page that contains the value and replace the
+			 * value with the new one, which because of mmap, is just a
+			 * simple strcpy. */
+			char* dest = corned_beef_get_page(handle, node->val_page);
+			strcpy(dest, val);
+			
+			munmap(node, PAGE_SZ);
+			munmap(dest, PAGE_SZ);
+			return 0;
+		} else if (node->next == 0) {
+			/* If we haven't found it yet and we're at the end of the
+             * linked list, then set the next page to a new one that
+             * contains the key and value we're adding. */
+			node->next = corned_beef_add_node(handle, key, val);
+			
+			munmap(node, PAGE_SZ);
+			return 0;
+		} else {
+			/* Otherwise, let's keep going down the list. */
+			ptr = node->next;
+			munmap(node, PAGE_SZ);
+		}
+	}
+
+	/* We shouldn't ever hit this part if we're ignoring race conditions.
+	 * (And I am ignoring them, for the purposes of this silly program.) */
+	return 99;
+}
+
+/* Our man function is a pretty trivial wrapper over the operations above
+ * so we can use this to initialize, modify, and query on-disk hash tables. */
+int
+main(int argc, char* argv[])
+{
+	if (argc == 3 && !strcmp(argv[2], "init")) {
+		cb_handle h = corned_beef_open(argv[1]);
+		corned_beef_init(h);
+		corned_beef_close(h);
+	} else if (argc == 5 && !strcmp(argv[2], "insert")) {
+		cb_handle h = corned_beef_open(argv[1]);
+		corned_beef_insert(h, argv[3], argv[4]);
+		corned_beef_close(h);
+	} else if (argc == 4 && !strcmp(argv[2], "lookup")) {
+		cb_handle h = corned_beef_open(argv[1]);
+		corned_beef_lookup(h, argv[3]);
+		corned_beef_close(h);
+	} else {
+		fprintf(stderr, "Usage:\n");
+		fprintf(stderr, "  corned_beef [db] init\n");
+		fprintf(stderr, "  corned_beef [db] insert [key]\n");
+		fprintf(stderr, "  corned_beef [db] lookup [key] [val]\n");
+		return 99;
+	}
+	return 0;
+}

+ 57 - 0
corned_beef.h

@@ -0,0 +1,57 @@
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+/* We're going to have three kinds of pages in our hash table: the
+ * first is the 'index', which we'll have exactly one of. It will
+ * contain the roots to a bunch of linked lists, each one corresponding
+ * to a hash bucket, and a field to keep track of where the next
+ * free page is. I'm not including any operations to delete keys or
+ * values, so that number will increase strictly. */
+typedef struct {
+	size_t list_head[0xff];
+	size_t next_free_page;
+} cb_index;
+
+/* An intermediate node contains a key, a pointer to the next node in
+ * the list, and a pointer to the corresponding value page. The 'next'
+ * field will be zero to indicate that we're at the end of a list, and
+ * otherwise it'll be an index into the on-disk file, pointing to a
+ * particular page.
+*/
+typedef struct {
+	size_t next;
+	size_t val_page;
+	char key[];
+} cb_node;
+
+/* The thirst kind of page is a 'value', which needs no pointers, and
+ * will be entirely given to a single string. This is all ridiculously
+ * inefficient: pages are quite large, and we're not doing anything to
+ * pack pages. It's entirely possible we'll have a hash table like
+ *   {'a': 'x', 'b': 'y'}
+ * which would involve a total of five pages with mostly empty space.
+ */
+
+/* Our 'handle' just contains the file descriptor and the index page
+ * that we've mapped from the underlying file. */
+typedef struct {
+	int file;
+	cb_index* idx;
+} cb_handle;
+
+/* Our hash function is bad. No, really. */
+unsigned char very_bad_hash(char* input);
+
+/* Functions for opening/initializing a handle and closing one. */
+cb_handle corned_beef_open(char* filename);
+void corned_beef_close(cb_handle handle);
+
+/* Functions for initializing a hash table, looking up a key, and
+ * inserting a value with a key. */
+int corned_beef_init(cb_handle handle);
+int corned_beef_lookup(cb_handle handle, char* key);
+int corned_beef_insert(cb_handle handle, char* key, char* val);