123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- #include "corned_beef.h"
- /* mmap'ed memory has to be aligned to the page size of the ambient
- * system, so we'll just use that as the granularity for individual
- * chunks. */
- #define PAGE_SZ (sysconf(_SC_PAGE_SIZE))
- /* An intermediate node in our 'hash table' has to fit onto a page,
- * and it'll contain two size_t values to act as 'pointers' as well
- * as a null terminator, so the longest a key can be is this. A
- * value could be slightly longer, but we can cap it at this, too,
- * because that'll only cost us a handful of bytes. */
- #define MAX_STR_SIZE (sysconf(_SC_PAGE_SIZE) - sizeof(size_t) * 2 - 1)
- /* This is the terrible hash function we will use to hash strings. */
- unsigned char
- very_bad_hash(char* input)
- {
- unsigned char ret = 0;
- while (*input)
- ret += *(input++);
- return ret;
- }
- cb_handle
- corned_beef_open(char* filename)
- {
- /* Our handle contains both the file descriptor and the mmap'ed
- * index page of our hash table. That way, we can get easy access
- * to both! */
- cb_handle handle;
- /* Here we're opening a file read-write; I could be more granular
- * in my permissions, but... eh... */
- handle.file = open(filename, O_RDWR | O_CREAT, 0644);
- /* And here's mmap itself! The memory it returns corresponds exactly
- * to memory on the disk, so any changes that we make to the
- * in-memory representation will get propagated back to the disk. */
- handle.idx = mmap(NULL, /* This parameter indicates where we want the
- * mmap'ed memory to live in the address space,
- * but it's really just a hint. If we leave it
- * as NULL, then the OS will give us a pointer
- * that's aligned appropriate and leave it at
- * that. */
- PAGE_SZ, /* This parameter indicates how big we want
- * the mapping to be: we'll just use the
- * page size, to keep things uniform. This
- * does mean we're probably making a pretty
- * sparse file, if most of our keys/values
- * are small. */
- PROT_READ | PROT_WRITE, /* This parameter tells us
- * we want to both read from
- * and write to this memory,
- * and consequently the file.
- * This needs to be consistent
- * with the mode we used to
- * open the file! */
- MAP_SHARED, /* The MAP_SHARED parameter allows us to
- * share the memory with other processes
- * and also back onto the disk. There are
- * a lot of other flags we could choose
- * here, too. Look 'em up! */
- handle.file, /* The file that this memory corresponds
- * to: the one we just opened. */
- 0); /* And the offset into the file. This needs to be
- * a multiple of the page size, but in this case,
- * we're filling in the index, which will always
- * be on the first page. */
- return handle;
- }
- void
- corned_beef_close(cb_handle handle)
- {
- /* To close our handle, unmap the index file... */
- munmap(handle.idx, PAGE_SZ);
- /* And close the file itself. */
- close(handle.file);
- }
- int
- corned_beef_init(cb_handle handle)
- {
- int i;
- /* To initializee our database, start by resizing the file to exactly
- * one page. I'm assuming that the cb_index structure is smaller than
- * a page, which is fine for me but not a good thing in general. */
- ftruncate(handle.file, PAGE_SZ);
- /* Zero all the bucket pointers, because all the buckets are empty. */
- for (i = 0; i < 0xff; i++)
- handle.idx->list_head[i] = 0;
- /* And if we get another page, we should start counting at one. */
- handle.idx->next_free_page = 1;
- }
- void*
- corned_beef_get_page(cb_handle handle, size_t offset)
- {
- /* This is simple enough: mmap the page that corresponds to the
- * given offset. */
- return mmap(NULL, /* allocate it wherever the kernel wants... */
- PAGE_SZ, /* make it PAGE_SZ bytes... */
- PROT_READ | PROT_WRITE, /* allow reading and writing... */
- MAP_SHARED, /* share it, so changes get written back... */
- handle.file, /* use the file descriptor in the handle... */
- offset * PAGE_SZ); /* And use the specified byte offset. */
- }
- size_t
- corned_beef_new_page(cb_handle handle, void** dest)
- {
- /* Our index page keeps track of the next unused index, so we take
- * that one, and bump it up for the next use... */
- size_t new_page = handle.idx->next_free_page;
- handle.idx->next_free_page += 1;
- /* Resize the file so we have the space to actually use the new
- * page... */
- ftruncate(handle.file, PAGE_SZ * (new_page + 1));
- /* Set the dest pointer to point to the mmap'ed chunk of the file
- * that corresponds to the new page... */
- *dest = corned_beef_get_page(handle, new_page);
- /* And return the index into the new page. */
- return new_page;
- }
- int
- corned_beef_lookup(cb_handle handle, char* key)
- {
- /* When looking up, we start by finding the right bucket to
- * start our search. */
- size_t ptr = handle.idx->list_head[very_bad_hash(key)];
- /* It's possible that the bucket is empty, in which case this
- * loop will get skipped. */
- while (ptr) {
- /* We find the page corresponding to the current index */
- cb_node* node = corned_beef_get_page(handle, ptr);
- /* ...and check to see whether we've found the right key. */
- if (strcmp(key, node->key) == 0) {
- /* If so, we grab the page pointed to by the value and
- * print it to stdout. */
- char* val = corned_beef_get_page(handle, node->val_page);
- printf("%s\n", val);
- munmap(val, PAGE_SZ);
- munmap(node, PAGE_SZ);
- return 0;
- } else {
- /* Otherwise, we move on to the next element in the list. */
- ptr = node->next;
- munmap(node, PAGE_SZ);
- }
- }
- fprintf(stderr, "Unable to find value for key \"%s\"\n", key);
-
- return 99;
- }
- size_t
- corned_beef_add_node(cb_handle handle, char* key, char* val)
- {
- cb_node* new;
- char* dest;
- /* We create a new page and get its index. This new page will
- * contain our new node */
- size_t result = corned_beef_new_page(handle, (void**) &new);
- /* the next index is zero, because this is at the current end
- * of the list. */
- new->next = 0;
- /* The key we can copy into the memory. */
- strcpy(new->key, key);
- /* And the value is on a different page, which create here */
- new->val_page = corned_beef_new_page(handle, (void**) &dest);
- /* Again, copying the value to the new page is trivial. */
- strcpy(dest, val);
- munmap(dest, PAGE_SZ);
- munmap(new, PAGE_SZ);
-
- return result;
- }
- int
- corned_beef_insert(cb_handle handle, char* key, char* val)
- {
- /* We figure out which bucket to start looking in, which will give us
- * a page offset into the file. */
- unsigned char hash = very_bad_hash(key);
- size_t ptr = handle.idx->list_head[hash];
- if (!ptr) {
- /* Zero is not a valid page offset---that's where our index page
- * is---so if the page offset is zero, just add a new node here. */
- handle.idx->list_head[hash] = corned_beef_add_node(handle, key, val);
- return 0;
- }
- while (ptr) {
- /* Otherwise, fetch the specified node and start moving along. */
- cb_node* node = corned_beef_get_page(handle, ptr);
- if (strcmp(key, node->key) == 0) {
- /* If the key we're looking for already exists, then we can
- * grab the page that contains the value and replace the
- * value with the new one, which because of mmap, is just a
- * simple strcpy. */
- char* dest = corned_beef_get_page(handle, node->val_page);
- strcpy(dest, val);
-
- munmap(node, PAGE_SZ);
- munmap(dest, PAGE_SZ);
- return 0;
- } else if (node->next == 0) {
- /* If we haven't found it yet and we're at the end of the
- * linked list, then set the next page to a new one that
- * contains the key and value we're adding. */
- node->next = corned_beef_add_node(handle, key, val);
-
- munmap(node, PAGE_SZ);
- return 0;
- } else {
- /* Otherwise, let's keep going down the list. */
- ptr = node->next;
- munmap(node, PAGE_SZ);
- }
- }
- /* We shouldn't ever hit this part if we're ignoring race conditions.
- * (And I am ignoring them, for the purposes of this silly program.) */
- return 99;
- }
- /* Our man function is a pretty trivial wrapper over the operations above
- * so we can use this to initialize, modify, and query on-disk hash tables. */
- int
- main(int argc, char* argv[])
- {
- if (argc == 3 && !strcmp(argv[2], "init")) {
- cb_handle h = corned_beef_open(argv[1]);
- corned_beef_init(h);
- corned_beef_close(h);
- } else if (argc == 5 && !strcmp(argv[2], "insert")) {
- cb_handle h = corned_beef_open(argv[1]);
- corned_beef_insert(h, argv[3], argv[4]);
- corned_beef_close(h);
- } else if (argc == 4 && !strcmp(argv[2], "lookup")) {
- cb_handle h = corned_beef_open(argv[1]);
- corned_beef_lookup(h, argv[3]);
- corned_beef_close(h);
- } else {
- fprintf(stderr, "Usage:\n");
- fprintf(stderr, " corned_beef [db] init\n");
- fprintf(stderr, " corned_beef [db] insert [key]\n");
- fprintf(stderr, " corned_beef [db] lookup [key] [val]\n");
- return 99;
- }
- return 0;
- }
|