#include "corned_beef.h" /* mmap'ed memory has to be aligned to the page size of the ambient * system, so we'll just use that as the granularity for individual * chunks. */ #define PAGE_SZ (sysconf(_SC_PAGE_SIZE)) /* An intermediate node in our 'hash table' has to fit onto a page, * and it'll contain two size_t values to act as 'pointers' as well * as a null terminator, so the longest a key can be is this. A * value could be slightly longer, but we can cap it at this, too, * because that'll only cost us a handful of bytes. */ #define MAX_STR_SIZE (sysconf(_SC_PAGE_SIZE) - sizeof(size_t) * 2 - 1) /* This is the terrible hash function we will use to hash strings. */ unsigned char very_bad_hash(char* input) { unsigned char ret = 0; while (*input) ret += *(input++); return ret; } cb_handle corned_beef_open(char* filename) { /* Our handle contains both the file descriptor and the mmap'ed * index page of our hash table. That way, we can get easy access * to both! */ cb_handle handle; /* Here we're opening a file read-write; I could be more granular * in my permissions, but... eh... */ handle.file = open(filename, O_RDWR | O_CREAT, 0644); /* And here's mmap itself! The memory it returns corresponds exactly * to memory on the disk, so any changes that we make to the * in-memory representation will get propagated back to the disk. */ handle.idx = mmap(NULL, /* This parameter indicates where we want the * mmap'ed memory to live in the address space, * but it's really just a hint. If we leave it * as NULL, then the OS will give us a pointer * that's aligned appropriate and leave it at * that. */ PAGE_SZ, /* This parameter indicates how big we want * the mapping to be: we'll just use the * page size, to keep things uniform. This * does mean we're probably making a pretty * sparse file, if most of our keys/values * are small. */ PROT_READ | PROT_WRITE, /* This parameter tells us * we want to both read from * and write to this memory, * and consequently the file. * This needs to be consistent * with the mode we used to * open the file! */ MAP_SHARED, /* The MAP_SHARED parameter allows us to * share the memory with other processes * and also back onto the disk. There are * a lot of other flags we could choose * here, too. Look 'em up! */ handle.file, /* The file that this memory corresponds * to: the one we just opened. */ 0); /* And the offset into the file. This needs to be * a multiple of the page size, but in this case, * we're filling in the index, which will always * be on the first page. */ return handle; } void corned_beef_close(cb_handle handle) { /* To close our handle, unmap the index file... */ munmap(handle.idx, PAGE_SZ); /* And close the file itself. */ close(handle.file); } int corned_beef_init(cb_handle handle) { int i; /* To initializee our database, start by resizing the file to exactly * one page. I'm assuming that the cb_index structure is smaller than * a page, which is fine for me but not a good thing in general. */ ftruncate(handle.file, PAGE_SZ); /* Zero all the bucket pointers, because all the buckets are empty. */ for (i = 0; i < 0xff; i++) handle.idx->list_head[i] = 0; /* And if we get another page, we should start counting at one. */ handle.idx->next_free_page = 1; } void* corned_beef_get_page(cb_handle handle, size_t offset) { /* This is simple enough: mmap the page that corresponds to the * given offset. */ return mmap(NULL, /* allocate it wherever the kernel wants... */ PAGE_SZ, /* make it PAGE_SZ bytes... */ PROT_READ | PROT_WRITE, /* allow reading and writing... */ MAP_SHARED, /* share it, so changes get written back... */ handle.file, /* use the file descriptor in the handle... */ offset * PAGE_SZ); /* And use the specified byte offset. */ } size_t corned_beef_new_page(cb_handle handle, void** dest) { /* Our index page keeps track of the next unused index, so we take * that one, and bump it up for the next use... */ size_t new_page = handle.idx->next_free_page; handle.idx->next_free_page += 1; /* Resize the file so we have the space to actually use the new * page... */ ftruncate(handle.file, PAGE_SZ * (new_page + 1)); /* Set the dest pointer to point to the mmap'ed chunk of the file * that corresponds to the new page... */ *dest = corned_beef_get_page(handle, new_page); /* And return the index into the new page. */ return new_page; } int corned_beef_lookup(cb_handle handle, char* key) { /* When looking up, we start by finding the right bucket to * start our search. */ size_t ptr = handle.idx->list_head[very_bad_hash(key)]; /* It's possible that the bucket is empty, in which case this * loop will get skipped. */ while (ptr) { /* We find the page corresponding to the current index */ cb_node* node = corned_beef_get_page(handle, ptr); /* ...and check to see whether we've found the right key. */ if (strcmp(key, node->key) == 0) { /* If so, we grab the page pointed to by the value and * print it to stdout. */ char* val = corned_beef_get_page(handle, node->val_page); printf("%s\n", val); munmap(val, PAGE_SZ); munmap(node, PAGE_SZ); return 0; } else { /* Otherwise, we move on to the next element in the list. */ ptr = node->next; munmap(node, PAGE_SZ); } } fprintf(stderr, "Unable to find value for key \"%s\"\n", key); return 99; } size_t corned_beef_add_node(cb_handle handle, char* key, char* val) { cb_node* new; char* dest; /* We create a new page and get its index. This new page will * contain our new node */ size_t result = corned_beef_new_page(handle, (void**) &new); /* the next index is zero, because this is at the current end * of the list. */ new->next = 0; /* The key we can copy into the memory. */ strcpy(new->key, key); /* And the value is on a different page, which create here */ new->val_page = corned_beef_new_page(handle, (void**) &dest); /* Again, copying the value to the new page is trivial. */ strcpy(dest, val); munmap(dest, PAGE_SZ); munmap(new, PAGE_SZ); return result; } int corned_beef_insert(cb_handle handle, char* key, char* val) { /* We figure out which bucket to start looking in, which will give us * a page offset into the file. */ unsigned char hash = very_bad_hash(key); size_t ptr = handle.idx->list_head[hash]; if (!ptr) { /* Zero is not a valid page offset---that's where our index page * is---so if the page offset is zero, just add a new node here. */ handle.idx->list_head[hash] = corned_beef_add_node(handle, key, val); return 0; } while (ptr) { /* Otherwise, fetch the specified node and start moving along. */ cb_node* node = corned_beef_get_page(handle, ptr); if (strcmp(key, node->key) == 0) { /* If the key we're looking for already exists, then we can * grab the page that contains the value and replace the * value with the new one, which because of mmap, is just a * simple strcpy. */ char* dest = corned_beef_get_page(handle, node->val_page); strcpy(dest, val); munmap(node, PAGE_SZ); munmap(dest, PAGE_SZ); return 0; } else if (node->next == 0) { /* If we haven't found it yet and we're at the end of the * linked list, then set the next page to a new one that * contains the key and value we're adding. */ node->next = corned_beef_add_node(handle, key, val); munmap(node, PAGE_SZ); return 0; } else { /* Otherwise, let's keep going down the list. */ ptr = node->next; munmap(node, PAGE_SZ); } } /* We shouldn't ever hit this part if we're ignoring race conditions. * (And I am ignoring them, for the purposes of this silly program.) */ return 99; } /* Our man function is a pretty trivial wrapper over the operations above * so we can use this to initialize, modify, and query on-disk hash tables. */ int main(int argc, char* argv[]) { if (argc == 3 && !strcmp(argv[2], "init")) { cb_handle h = corned_beef_open(argv[1]); corned_beef_init(h); corned_beef_close(h); } else if (argc == 5 && !strcmp(argv[2], "insert")) { cb_handle h = corned_beef_open(argv[1]); corned_beef_insert(h, argv[3], argv[4]); corned_beef_close(h); } else if (argc == 4 && !strcmp(argv[2], "lookup")) { cb_handle h = corned_beef_open(argv[1]); corned_beef_lookup(h, argv[3]); corned_beef_close(h); } else { fprintf(stderr, "Usage:\n"); fprintf(stderr, " corned_beef [db] init\n"); fprintf(stderr, " corned_beef [db] insert [key]\n"); fprintf(stderr, " corned_beef [db] lookup [key] [val]\n"); return 99; } return 0; }