|
@@ -0,0 +1,260 @@
|
|
|
+#include "corned_beef.h"
|
|
|
+
|
|
|
+/* mmap'ed memory has to be aligned to the page size of the ambient
|
|
|
+ * system, so we'll just use that as the granularity for individual
|
|
|
+ * chunks. */
|
|
|
+#define PAGE_SZ (sysconf(_SC_PAGE_SIZE))
|
|
|
+/* An intermediate node in our 'hash table' has to fit onto a page,
|
|
|
+ * and it'll contain two size_t values to act as 'pointers' as well
|
|
|
+ * as a null terminator, so the longest a key can be is this. A
|
|
|
+ * value could be slightly longer, but we can cap it at this, too,
|
|
|
+ * because that'll only cost us a handful of bytes. */
|
|
|
+#define MAX_STR_SIZE (sysconf(_SC_PAGE_SIZE) - sizeof(size_t) * 2 - 1)
|
|
|
+
|
|
|
+/* This is the terrible hash function we will use to hash strings. */
|
|
|
+unsigned char
|
|
|
+very_bad_hash(char* input)
|
|
|
+{
|
|
|
+ unsigned char ret = 0;
|
|
|
+ while (*input)
|
|
|
+ ret += *(input++);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+cb_handle
|
|
|
+corned_beef_open(char* filename)
|
|
|
+{
|
|
|
+ /* Our handle contains both the file descriptor and the mmap'ed
|
|
|
+ * index page of our hash table. That way, we can get easy access
|
|
|
+ * to both! */
|
|
|
+ cb_handle handle;
|
|
|
+ /* Here we're opening a file read-write; I could be more granular
|
|
|
+ * in my permissions, but... eh... */
|
|
|
+ handle.file = open(filename, O_RDWR | O_CREAT, 0644);
|
|
|
+ /* And here's mmap itself! The memory it returns corresponds exactly
|
|
|
+ * to memory on the disk, so any changes that we make to the
|
|
|
+ * in-memory representation will get propagated back to the disk. */
|
|
|
+ handle.idx = mmap(NULL, /* This parameter indicates where we want the
|
|
|
+ * mmap'ed memory to live in the address space,
|
|
|
+ * but it's really just a hint. If we leave it
|
|
|
+ * as NULL, then the OS will give us a pointer
|
|
|
+ * that's aligned appropriate and leave it at
|
|
|
+ * that. */
|
|
|
+ PAGE_SZ, /* This parameter indicates how big we want
|
|
|
+ * the mapping to be: we'll just use the
|
|
|
+ * page size, to keep things uniform. This
|
|
|
+ * does mean we're probably making a pretty
|
|
|
+ * sparse file, if most of our keys/values
|
|
|
+ * are small. */
|
|
|
+ PROT_READ | PROT_WRITE, /* This parameter tells us
|
|
|
+ * we want to both read from
|
|
|
+ * and write to this memory,
|
|
|
+ * and consequently the file.
|
|
|
+ * This needs to be consistent
|
|
|
+ * with the mode we used to
|
|
|
+ * open the file! */
|
|
|
+ MAP_SHARED, /* The MAP_SHARED parameter allows us to
|
|
|
+ * share the memory with other processes
|
|
|
+ * and also back onto the disk. There are
|
|
|
+ * a lot of other flags we could choose
|
|
|
+ * here, too. Look 'em up! */
|
|
|
+ handle.file, /* The file that this memory corresponds
|
|
|
+ * to: the one we just opened. */
|
|
|
+ 0); /* And the offset into the file. This needs to be
|
|
|
+ * a multiple of the page size, but in this case,
|
|
|
+ * we're filling in the index, which will always
|
|
|
+ * be on the first page. */
|
|
|
+ return handle;
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+corned_beef_close(cb_handle handle)
|
|
|
+{
|
|
|
+ /* To close our handle, unmap the index file... */
|
|
|
+ munmap(handle.idx, PAGE_SZ);
|
|
|
+ /* And close the file itself. */
|
|
|
+ close(handle.file);
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+corned_beef_init(cb_handle handle)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ /* To initializee our database, start by resizing the file to exactly
|
|
|
+ * one page. I'm assuming that the cb_index structure is smaller than
|
|
|
+ * a page, which is fine for me but not a good thing in general. */
|
|
|
+ ftruncate(handle.file, PAGE_SZ);
|
|
|
+ /* Zero all the bucket pointers, because all the buckets are empty. */
|
|
|
+ for (i = 0; i < 0xff; i++)
|
|
|
+ handle.idx->list_head[i] = 0;
|
|
|
+ /* And if we get another page, we should start counting at one. */
|
|
|
+ handle.idx->next_free_page = 1;
|
|
|
+}
|
|
|
+
|
|
|
+void*
|
|
|
+corned_beef_get_page(cb_handle handle, size_t offset)
|
|
|
+{
|
|
|
+ /* This is simple enough: mmap the page that corresponds to the
|
|
|
+ * given offset. */
|
|
|
+ return mmap(NULL, /* allocate it wherever the kernel wants... */
|
|
|
+ PAGE_SZ, /* make it PAGE_SZ bytes... */
|
|
|
+ PROT_READ | PROT_WRITE, /* allow reading and writing... */
|
|
|
+ MAP_SHARED, /* share it, so changes get written back... */
|
|
|
+ handle.file, /* use the file descriptor in the handle... */
|
|
|
+ offset * PAGE_SZ); /* And use the specified byte offset. */
|
|
|
+}
|
|
|
+
|
|
|
+size_t
|
|
|
+corned_beef_new_page(cb_handle handle, void** dest)
|
|
|
+{
|
|
|
+ /* Our index page keeps track of the next unused index, so we take
|
|
|
+ * that one, and bump it up for the next use... */
|
|
|
+ size_t new_page = handle.idx->next_free_page;
|
|
|
+ handle.idx->next_free_page += 1;
|
|
|
+ /* Resize the file so we have the space to actually use the new
|
|
|
+ * page... */
|
|
|
+ ftruncate(handle.file, PAGE_SZ * (new_page + 1));
|
|
|
+ /* Set the dest pointer to point to the mmap'ed chunk of the file
|
|
|
+ * that corresponds to the new page... */
|
|
|
+ *dest = corned_beef_get_page(handle, new_page);
|
|
|
+ /* And return the index into the new page. */
|
|
|
+ return new_page;
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+corned_beef_lookup(cb_handle handle, char* key)
|
|
|
+{
|
|
|
+ /* When looking up, we start by finding the right bucket to
|
|
|
+ * start our search. */
|
|
|
+ size_t ptr = handle.idx->list_head[very_bad_hash(key)];
|
|
|
+
|
|
|
+ /* It's possible that the bucket is empty, in which case this
|
|
|
+ * loop will get skipped. */
|
|
|
+ while (ptr) {
|
|
|
+ /* We find the page corresponding to the current index */
|
|
|
+ cb_node* node = corned_beef_get_page(handle, ptr);
|
|
|
+
|
|
|
+ /* ...and check to see whether we've found the right key. */
|
|
|
+ if (strcmp(key, node->key) == 0) {
|
|
|
+ /* If so, we grab the page pointed to by the value and
|
|
|
+ * print it to stdout. */
|
|
|
+ char* val = corned_beef_get_page(handle, node->val_page);
|
|
|
+
|
|
|
+ printf("%s\n", val);
|
|
|
+ munmap(val, PAGE_SZ);
|
|
|
+ munmap(node, PAGE_SZ);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+ } else {
|
|
|
+ /* Otherwise, we move on to the next element in the list. */
|
|
|
+ ptr = node->next;
|
|
|
+ munmap(node, PAGE_SZ);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ fprintf(stderr, "Unable to find value for key \"%s\"\n", key);
|
|
|
+
|
|
|
+ return 99;
|
|
|
+}
|
|
|
+
|
|
|
+size_t
|
|
|
+corned_beef_add_node(cb_handle handle, char* key, char* val)
|
|
|
+{
|
|
|
+ cb_node* new;
|
|
|
+ char* dest;
|
|
|
+ /* We create a new page and get its index. This new page will
|
|
|
+ * contain our new node */
|
|
|
+ size_t result = corned_beef_new_page(handle, (void**) &new);
|
|
|
+
|
|
|
+ /* the next index is zero, because this is at the current end
|
|
|
+ * of the list. */
|
|
|
+ new->next = 0;
|
|
|
+ /* The key we can copy into the memory. */
|
|
|
+ strcpy(new->key, key);
|
|
|
+ /* And the value is on a different page, which create here */
|
|
|
+ new->val_page = corned_beef_new_page(handle, (void**) &dest);
|
|
|
+ /* Again, copying the value to the new page is trivial. */
|
|
|
+ strcpy(dest, val);
|
|
|
+
|
|
|
+ munmap(dest, PAGE_SZ);
|
|
|
+ munmap(new, PAGE_SZ);
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+corned_beef_insert(cb_handle handle, char* key, char* val)
|
|
|
+{
|
|
|
+ /* We figure out which bucket to start looking in, which will give us
|
|
|
+ * a page offset into the file. */
|
|
|
+ unsigned char hash = very_bad_hash(key);
|
|
|
+ size_t ptr = handle.idx->list_head[hash];
|
|
|
+
|
|
|
+ if (!ptr) {
|
|
|
+ /* Zero is not a valid page offset---that's where our index page
|
|
|
+ * is---so if the page offset is zero, just add a new node here. */
|
|
|
+ handle.idx->list_head[hash] = corned_beef_add_node(handle, key, val);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ while (ptr) {
|
|
|
+ /* Otherwise, fetch the specified node and start moving along. */
|
|
|
+ cb_node* node = corned_beef_get_page(handle, ptr);
|
|
|
+
|
|
|
+ if (strcmp(key, node->key) == 0) {
|
|
|
+ /* If the key we're looking for already exists, then we can
|
|
|
+ * grab the page that contains the value and replace the
|
|
|
+ * value with the new one, which because of mmap, is just a
|
|
|
+ * simple strcpy. */
|
|
|
+ char* dest = corned_beef_get_page(handle, node->val_page);
|
|
|
+ strcpy(dest, val);
|
|
|
+
|
|
|
+ munmap(node, PAGE_SZ);
|
|
|
+ munmap(dest, PAGE_SZ);
|
|
|
+ return 0;
|
|
|
+ } else if (node->next == 0) {
|
|
|
+ /* If we haven't found it yet and we're at the end of the
|
|
|
+ * linked list, then set the next page to a new one that
|
|
|
+ * contains the key and value we're adding. */
|
|
|
+ node->next = corned_beef_add_node(handle, key, val);
|
|
|
+
|
|
|
+ munmap(node, PAGE_SZ);
|
|
|
+ return 0;
|
|
|
+ } else {
|
|
|
+ /* Otherwise, let's keep going down the list. */
|
|
|
+ ptr = node->next;
|
|
|
+ munmap(node, PAGE_SZ);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* We shouldn't ever hit this part if we're ignoring race conditions.
|
|
|
+ * (And I am ignoring them, for the purposes of this silly program.) */
|
|
|
+ return 99;
|
|
|
+}
|
|
|
+
|
|
|
+/* Our man function is a pretty trivial wrapper over the operations above
|
|
|
+ * so we can use this to initialize, modify, and query on-disk hash tables. */
|
|
|
+int
|
|
|
+main(int argc, char* argv[])
|
|
|
+{
|
|
|
+ if (argc == 3 && !strcmp(argv[2], "init")) {
|
|
|
+ cb_handle h = corned_beef_open(argv[1]);
|
|
|
+ corned_beef_init(h);
|
|
|
+ corned_beef_close(h);
|
|
|
+ } else if (argc == 5 && !strcmp(argv[2], "insert")) {
|
|
|
+ cb_handle h = corned_beef_open(argv[1]);
|
|
|
+ corned_beef_insert(h, argv[3], argv[4]);
|
|
|
+ corned_beef_close(h);
|
|
|
+ } else if (argc == 4 && !strcmp(argv[2], "lookup")) {
|
|
|
+ cb_handle h = corned_beef_open(argv[1]);
|
|
|
+ corned_beef_lookup(h, argv[3]);
|
|
|
+ corned_beef_close(h);
|
|
|
+ } else {
|
|
|
+ fprintf(stderr, "Usage:\n");
|
|
|
+ fprintf(stderr, " corned_beef [db] init\n");
|
|
|
+ fprintf(stderr, " corned_beef [db] insert [key]\n");
|
|
|
+ fprintf(stderr, " corned_beef [db] lookup [key] [val]\n");
|
|
|
+ return 99;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|