9 years ago · c9394a6bc9
--- a/Makefile
+++ b/Makefile
@@ -0,0 +1,7 @@
 
				+all: corned_beef
			
 
				+
			
 
				+corned_beef: corned_beef.c corned_beef.h
			
 
				+	$(CC) $< -o $@
			
 
				+
			
 
				+clean:
			
 
				+	rm -f corned_beef
			
--- a/corned_beef.c
+++ b/corned_beef.c
@@ -0,0 +1,260 @@
 
				+#include "corned_beef.h"
			
 
				+
			
 
				+/* mmap'ed memory has to be aligned to the page size of the ambient
			
 
				+ * system, so we'll just use that as the granularity for individual
			
 
				+ * chunks. */
			
 
				+#define PAGE_SZ (sysconf(_SC_PAGE_SIZE))
			
 
				+/* An intermediate node in our 'hash table' has to fit onto a page,
			
 
				+ * and it'll contain two size_t values to act as 'pointers' as well
			
 
				+ * as a null terminator, so the longest a key can be is this. A
			
 
				+ * value could be slightly longer, but we can cap it at this, too,
			
 
				+ * because that'll only cost us a handful of bytes. */
			
 
				+#define MAX_STR_SIZE (sysconf(_SC_PAGE_SIZE) - sizeof(size_t) * 2 - 1)
			
 
				+
			
 
				+/* This is the terrible hash function we will use to hash strings. */
			
 
				+unsigned char
			
 
				+very_bad_hash(char* input)
			
 
				+{
			
 
				+	unsigned char ret = 0;
			
 
				+	while (*input)
			
 
				+		ret += *(input++);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+cb_handle
			
 
				+corned_beef_open(char* filename)
			
 
				+{
			
 
				+	/* Our handle contains both the file descriptor and the mmap'ed
			
 
				+     * index page of our hash table. That way, we can get easy access
			
 
				+     * to both! */
			
 
				+	cb_handle handle;
			
 
				+	/* Here we're opening a file read-write; I could be more granular
			
 
				+     * in my permissions, but... eh... */
			
 
				+	handle.file = open(filename, O_RDWR | O_CREAT, 0644);
			
 
				+	/* And here's mmap itself! The memory it returns corresponds exactly
			
 
				+	 * to memory on the disk, so any changes that we make to the
			
 
				+     * in-memory representation will get propagated back to the disk. */
			
 
				+	handle.idx = mmap(NULL, /* This parameter indicates where we want the
			
 
				+							 * mmap'ed memory to live in the address space,
			
 
				+							 * but it's really just a hint. If we leave it
			
 
				+							 * as NULL, then the OS will give us a pointer
			
 
				+							 * that's aligned appropriate and leave it at
			
 
				+							 * that. */
			
 
				+					  PAGE_SZ, /* This parameter indicates how big we want
			
 
				+								* the mapping to be: we'll just use the
			
 
				+								* page size, to keep things uniform. This
			
 
				+								* does mean we're probably making a pretty
			
 
				+								* sparse file, if most of our keys/values
			
 
				+								* are small. */
			
 
				+					  PROT_READ | PROT_WRITE, /* This parameter tells us
			
 
				+											   * we want to both read from
			
 
				+											   * and write to this memory,
			
 
				+											   * and consequently the file.
			
 
				+											   * This needs to be consistent
			
 
				+											   * with the mode we used to
			
 
				+											   * open the file! */
			
 
				+					  MAP_SHARED, /* The MAP_SHARED parameter allows us to
			
 
				+								   * share the memory with other processes
			
 
				+								   * and also back onto the disk. There are
			
 
				+								   * a lot of other flags we could choose
			
 
				+								   * here, too. Look 'em up! */
			
 
				+					  handle.file, /* The file that this memory corresponds
			
 
				+									* to: the one we just opened. */
			
 
				+					  0); /* And the offset into the file. This needs to be
			
 
				+						   * a multiple of the page size, but in this case,
			
 
				+						   * we're filling in the index, which will always
			
 
				+						   * be on the first page. */
			
 
				+	return handle;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+corned_beef_close(cb_handle handle)
			
 
				+{
			
 
				+	/* To close our handle, unmap the index file... */
			
 
				+	munmap(handle.idx, PAGE_SZ);
			
 
				+	/* And close the file itself. */
			
 
				+	close(handle.file);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+corned_beef_init(cb_handle handle)
			
 
				+{
			
 
				+	int i;
			
 
				+	/* To initializee our database, start by resizing the file to exactly
			
 
				+     * one page. I'm assuming that the cb_index structure is smaller than
			
 
				+     * a page, which is fine for me but not a good thing in general. */
			
 
				+	ftruncate(handle.file, PAGE_SZ);
			
 
				+	/* Zero all the bucket pointers, because all the buckets are empty. */
			
 
				+	for (i = 0; i < 0xff; i++)
			
 
				+		handle.idx->list_head[i] = 0;
			
 
				+	/* And if we get another page, we should start counting at one. */
			
 
				+	handle.idx->next_free_page = 1;
			
 
				+}
			
 
				+
			
 
				+void*
			
 
				+corned_beef_get_page(cb_handle handle, size_t offset)
			
 
				+{
			
 
				+	/* This is simple enough: mmap the page that corresponds to the
			
 
				+     * given offset. */
			
 
				+	return mmap(NULL, /* allocate it wherever the kernel wants... */
			
 
				+				PAGE_SZ, /* make it PAGE_SZ bytes... */
			
 
				+				PROT_READ | PROT_WRITE, /* allow reading and writing... */
			
 
				+				MAP_SHARED, /* share it, so changes get written back... */
			
 
				+				handle.file, /* use the file descriptor in the handle... */
			
 
				+				offset * PAGE_SZ); /* And use the specified byte offset. */
			
 
				+}
			
 
				+
			
 
				+size_t
			
 
				+corned_beef_new_page(cb_handle handle, void** dest)
			
 
				+{
			
 
				+	/* Our index page keeps track of the next unused index, so we take
			
 
				+	 * that one, and bump it up for the next use... */
			
 
				+	size_t new_page = handle.idx->next_free_page;
			
 
				+	handle.idx->next_free_page += 1;
			
 
				+	/* Resize the file so we have the space to actually use the new
			
 
				+	 * page... */
			
 
				+	ftruncate(handle.file, PAGE_SZ * (new_page + 1));
			
 
				+	/* Set the dest pointer to point to the mmap'ed chunk of the file
			
 
				+	 * that corresponds to the new page... */
			
 
				+	*dest = corned_beef_get_page(handle, new_page);
			
 
				+	/* And return the index into the new page. */
			
 
				+	return new_page;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+corned_beef_lookup(cb_handle handle, char* key)
			
 
				+{
			
 
				+	/* When looking up, we start by finding the right bucket to
			
 
				+	 * start our search. */
			
 
				+	size_t ptr = handle.idx->list_head[very_bad_hash(key)];
			
 
				+
			
 
				+	/* It's possible that the bucket is empty, in which case this
			
 
				+     * loop will get skipped. */
			
 
				+	while (ptr) {
			
 
				+		/* We find the page corresponding to the current index */
			
 
				+		cb_node* node = corned_beef_get_page(handle, ptr);
			
 
				+
			
 
				+		/* ...and check to see whether we've found the right key. */
			
 
				+		if (strcmp(key, node->key) == 0) {
			
 
				+			/* If so, we grab the page pointed to by the value and
			
 
				+			 * print it to stdout. */
			
 
				+			char* val = corned_beef_get_page(handle, node->val_page);
			
 
				+
			
 
				+			printf("%s\n", val);
			
 
				+			munmap(val, PAGE_SZ);
			
 
				+			munmap(node, PAGE_SZ);
			
 
				+
			
 
				+			return 0;
			
 
				+		} else {
			
 
				+			/* Otherwise, we move on to the next element in the list. */
			
 
				+			ptr = node->next;
			
 
				+			munmap(node, PAGE_SZ);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	fprintf(stderr, "Unable to find value for key \"%s\"\n", key);
			
 
				+	
			
 
				+	return 99;
			
 
				+}
			
 
				+
			
 
				+size_t
			
 
				+corned_beef_add_node(cb_handle handle, char* key, char* val)
			
 
				+{
			
 
				+	cb_node* new;
			
 
				+	char* dest;
			
 
				+	/* We create a new page and get its index. This new page will
			
 
				+     * contain our new node */
			
 
				+	size_t result = corned_beef_new_page(handle, (void**) &new);
			
 
				+
			
 
				+	/* the next index is zero, because this is at the current end
			
 
				+     * of the list. */
			
 
				+	new->next = 0;
			
 
				+	/* The key we can copy into the memory. */
			
 
				+	strcpy(new->key, key);
			
 
				+	/* And the value is on a different page, which create here */
			
 
				+	new->val_page = corned_beef_new_page(handle, (void**) &dest);
			
 
				+	/* Again, copying the value to the new page is trivial. */
			
 
				+	strcpy(dest, val);
			
 
				+
			
 
				+	munmap(dest, PAGE_SZ);
			
 
				+	munmap(new, PAGE_SZ);
			
 
				+	
			
 
				+	return result;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+corned_beef_insert(cb_handle handle, char* key, char* val)
			
 
				+{
			
 
				+	/* We figure out which bucket to start looking in, which will give us
			
 
				+     * a page offset into the file. */
			
 
				+	unsigned char hash = very_bad_hash(key);
			
 
				+	size_t ptr = handle.idx->list_head[hash];
			
 
				+
			
 
				+	if (!ptr) {
			
 
				+		/* Zero is not a valid page offset---that's where our index page
			
 
				+         * is---so if the page offset is zero, just add a new node here. */
			
 
				+		handle.idx->list_head[hash] = corned_beef_add_node(handle, key, val);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	while (ptr) {
			
 
				+		/* Otherwise, fetch the specified node and start moving along. */
			
 
				+		cb_node* node = corned_beef_get_page(handle, ptr);
			
 
				+
			
 
				+		if (strcmp(key, node->key) == 0) {
			
 
				+			/* If the key we're looking for already exists, then we can
			
 
				+			 * grab the page that contains the value and replace the
			
 
				+			 * value with the new one, which because of mmap, is just a
			
 
				+			 * simple strcpy. */
			
 
				+			char* dest = corned_beef_get_page(handle, node->val_page);
			
 
				+			strcpy(dest, val);
			
 
				+			
			
 
				+			munmap(node, PAGE_SZ);
			
 
				+			munmap(dest, PAGE_SZ);
			
 
				+			return 0;
			
 
				+		} else if (node->next == 0) {
			
 
				+			/* If we haven't found it yet and we're at the end of the
			
 
				+             * linked list, then set the next page to a new one that
			
 
				+             * contains the key and value we're adding. */
			
 
				+			node->next = corned_beef_add_node(handle, key, val);
			
 
				+			
			
 
				+			munmap(node, PAGE_SZ);
			
 
				+			return 0;
			
 
				+		} else {
			
 
				+			/* Otherwise, let's keep going down the list. */
			
 
				+			ptr = node->next;
			
 
				+			munmap(node, PAGE_SZ);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* We shouldn't ever hit this part if we're ignoring race conditions.
			
 
				+	 * (And I am ignoring them, for the purposes of this silly program.) */
			
 
				+	return 99;
			
 
				+}
			
 
				+
			
 
				+/* Our man function is a pretty trivial wrapper over the operations above
			
 
				+ * so we can use this to initialize, modify, and query on-disk hash tables. */
			
 
				+int
			
 
				+main(int argc, char* argv[])
			
 
				+{
			
 
				+	if (argc == 3 && !strcmp(argv[2], "init")) {
			
 
				+		cb_handle h = corned_beef_open(argv[1]);
			
 
				+		corned_beef_init(h);
			
 
				+		corned_beef_close(h);
			
 
				+	} else if (argc == 5 && !strcmp(argv[2], "insert")) {
			
 
				+		cb_handle h = corned_beef_open(argv[1]);
			
 
				+		corned_beef_insert(h, argv[3], argv[4]);
			
 
				+		corned_beef_close(h);
			
 
				+	} else if (argc == 4 && !strcmp(argv[2], "lookup")) {
			
 
				+		cb_handle h = corned_beef_open(argv[1]);
			
 
				+		corned_beef_lookup(h, argv[3]);
			
 
				+		corned_beef_close(h);
			
 
				+	} else {
			
 
				+		fprintf(stderr, "Usage:\n");
			
 
				+		fprintf(stderr, "  corned_beef [db] init\n");
			
 
				+		fprintf(stderr, "  corned_beef [db] insert [key]\n");
			
 
				+		fprintf(stderr, "  corned_beef [db] lookup [key] [val]\n");
			
 
				+		return 99;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
--- a/corned_beef.h
+++ b/corned_beef.h
@@ -0,0 +1,57 @@
 
				+#include <fcntl.h>
			
 
				+#include <stddef.h>
			
 
				+#include <stdio.h>
			
 
				+#include <string.h>
			
 
				+#include <sys/mman.h>
			
 
				+#include <unistd.h>
			
 
				+
			
 
				+/* We're going to have three kinds of pages in our hash table: the
			
 
				+ * first is the 'index', which we'll have exactly one of. It will
			
 
				+ * contain the roots to a bunch of linked lists, each one corresponding
			
 
				+ * to a hash bucket, and a field to keep track of where the next
			
 
				+ * free page is. I'm not including any operations to delete keys or
			
 
				+ * values, so that number will increase strictly. */
			
 
				+typedef struct {
			
 
				+	size_t list_head[0xff];
			
 
				+	size_t next_free_page;
			
 
				+} cb_index;
			
 
				+
			
 
				+/* An intermediate node contains a key, a pointer to the next node in
			
 
				+ * the list, and a pointer to the corresponding value page. The 'next'
			
 
				+ * field will be zero to indicate that we're at the end of a list, and
			
 
				+ * otherwise it'll be an index into the on-disk file, pointing to a
			
 
				+ * particular page.
			
 
				+*/
			
 
				+typedef struct {
			
 
				+	size_t next;
			
 
				+	size_t val_page;
			
 
				+	char key[];
			
 
				+} cb_node;
			
 
				+
			
 
				+/* The thirst kind of page is a 'value', which needs no pointers, and
			
 
				+ * will be entirely given to a single string. This is all ridiculously
			
 
				+ * inefficient: pages are quite large, and we're not doing anything to
			
 
				+ * pack pages. It's entirely possible we'll have a hash table like
			
 
				+ *   {'a': 'x', 'b': 'y'}
			
 
				+ * which would involve a total of five pages with mostly empty space.
			
 
				+ */
			
 
				+
			
 
				+/* Our 'handle' just contains the file descriptor and the index page
			
 
				+ * that we've mapped from the underlying file. */
			
 
				+typedef struct {
			
 
				+	int file;
			
 
				+	cb_index* idx;
			
 
				+} cb_handle;
			
 
				+
			
 
				+/* Our hash function is bad. No, really. */
			
 
				+unsigned char very_bad_hash(char* input);
			
 
				+
			
 
				+/* Functions for opening/initializing a handle and closing one. */
			
 
				+cb_handle corned_beef_open(char* filename);
			
 
				+void corned_beef_close(cb_handle handle);
			
 
				+
			
 
				+/* Functions for initializing a hash table, looking up a key, and
			
 
				+ * inserting a value with a key. */
			
 
				+int corned_beef_init(cb_handle handle);
			
 
				+int corned_beef_lookup(cb_handle handle, char* key);
			
 
				+int corned_beef_insert(cb_handle handle, char* key, char* val);