corned_beef.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. #include "corned_beef.h"
  2. /* mmap'ed memory has to be aligned to the page size of the ambient
  3. * system, so we'll just use that as the granularity for individual
  4. * chunks. */
  5. #define PAGE_SZ (sysconf(_SC_PAGE_SIZE))
  6. /* An intermediate node in our 'hash table' has to fit onto a page,
  7. * and it'll contain two size_t values to act as 'pointers' as well
  8. * as a null terminator, so the longest a key can be is this. A
  9. * value could be slightly longer, but we can cap it at this, too,
  10. * because that'll only cost us a handful of bytes. */
  11. #define MAX_STR_SIZE (sysconf(_SC_PAGE_SIZE) - sizeof(size_t) * 2 - 1)
  12. /* This is the terrible hash function we will use to hash strings. */
  13. unsigned char
  14. very_bad_hash(char* input)
  15. {
  16. unsigned char ret = 0;
  17. while (*input)
  18. ret += *(input++);
  19. return ret;
  20. }
  21. cb_handle
  22. corned_beef_open(char* filename)
  23. {
  24. /* Our handle contains both the file descriptor and the mmap'ed
  25. * index page of our hash table. That way, we can get easy access
  26. * to both! */
  27. cb_handle handle;
  28. /* Here we're opening a file read-write; I could be more granular
  29. * in my permissions, but... eh... */
  30. handle.file = open(filename, O_RDWR | O_CREAT, 0644);
  31. /* And here's mmap itself! The memory it returns corresponds exactly
  32. * to memory on the disk, so any changes that we make to the
  33. * in-memory representation will get propagated back to the disk. */
  34. handle.idx = mmap(NULL, /* This parameter indicates where we want the
  35. * mmap'ed memory to live in the address space,
  36. * but it's really just a hint. If we leave it
  37. * as NULL, then the OS will give us a pointer
  38. * that's aligned appropriate and leave it at
  39. * that. */
  40. PAGE_SZ, /* This parameter indicates how big we want
  41. * the mapping to be: we'll just use the
  42. * page size, to keep things uniform. This
  43. * does mean we're probably making a pretty
  44. * sparse file, if most of our keys/values
  45. * are small. */
  46. PROT_READ | PROT_WRITE, /* This parameter tells us
  47. * we want to both read from
  48. * and write to this memory,
  49. * and consequently the file.
  50. * This needs to be consistent
  51. * with the mode we used to
  52. * open the file! */
  53. MAP_SHARED, /* The MAP_SHARED parameter allows us to
  54. * share the memory with other processes
  55. * and also back onto the disk. There are
  56. * a lot of other flags we could choose
  57. * here, too. Look 'em up! */
  58. handle.file, /* The file that this memory corresponds
  59. * to: the one we just opened. */
  60. 0); /* And the offset into the file. This needs to be
  61. * a multiple of the page size, but in this case,
  62. * we're filling in the index, which will always
  63. * be on the first page. */
  64. return handle;
  65. }
  66. void
  67. corned_beef_close(cb_handle handle)
  68. {
  69. /* To close our handle, unmap the index file... */
  70. munmap(handle.idx, PAGE_SZ);
  71. /* And close the file itself. */
  72. close(handle.file);
  73. }
  74. int
  75. corned_beef_init(cb_handle handle)
  76. {
  77. int i;
  78. /* To initializee our database, start by resizing the file to exactly
  79. * one page. I'm assuming that the cb_index structure is smaller than
  80. * a page, which is fine for me but not a good thing in general. */
  81. ftruncate(handle.file, PAGE_SZ);
  82. /* Zero all the bucket pointers, because all the buckets are empty. */
  83. for (i = 0; i < 0xff; i++)
  84. handle.idx->list_head[i] = 0;
  85. /* And if we get another page, we should start counting at one. */
  86. handle.idx->next_free_page = 1;
  87. }
  88. void*
  89. corned_beef_get_page(cb_handle handle, size_t offset)
  90. {
  91. /* This is simple enough: mmap the page that corresponds to the
  92. * given offset. */
  93. return mmap(NULL, /* allocate it wherever the kernel wants... */
  94. PAGE_SZ, /* make it PAGE_SZ bytes... */
  95. PROT_READ | PROT_WRITE, /* allow reading and writing... */
  96. MAP_SHARED, /* share it, so changes get written back... */
  97. handle.file, /* use the file descriptor in the handle... */
  98. offset * PAGE_SZ); /* And use the specified byte offset. */
  99. }
  100. size_t
  101. corned_beef_new_page(cb_handle handle, void** dest)
  102. {
  103. /* Our index page keeps track of the next unused index, so we take
  104. * that one, and bump it up for the next use... */
  105. size_t new_page = handle.idx->next_free_page;
  106. handle.idx->next_free_page += 1;
  107. /* Resize the file so we have the space to actually use the new
  108. * page... */
  109. ftruncate(handle.file, PAGE_SZ * (new_page + 1));
  110. /* Set the dest pointer to point to the mmap'ed chunk of the file
  111. * that corresponds to the new page... */
  112. *dest = corned_beef_get_page(handle, new_page);
  113. /* And return the index into the new page. */
  114. return new_page;
  115. }
  116. int
  117. corned_beef_lookup(cb_handle handle, char* key)
  118. {
  119. /* When looking up, we start by finding the right bucket to
  120. * start our search. */
  121. size_t ptr = handle.idx->list_head[very_bad_hash(key)];
  122. /* It's possible that the bucket is empty, in which case this
  123. * loop will get skipped. */
  124. while (ptr) {
  125. /* We find the page corresponding to the current index */
  126. cb_node* node = corned_beef_get_page(handle, ptr);
  127. /* ...and check to see whether we've found the right key. */
  128. if (strcmp(key, node->key) == 0) {
  129. /* If so, we grab the page pointed to by the value and
  130. * print it to stdout. */
  131. char* val = corned_beef_get_page(handle, node->val_page);
  132. printf("%s\n", val);
  133. munmap(val, PAGE_SZ);
  134. munmap(node, PAGE_SZ);
  135. return 0;
  136. } else {
  137. /* Otherwise, we move on to the next element in the list. */
  138. ptr = node->next;
  139. munmap(node, PAGE_SZ);
  140. }
  141. }
  142. fprintf(stderr, "Unable to find value for key \"%s\"\n", key);
  143. return 99;
  144. }
  145. size_t
  146. corned_beef_add_node(cb_handle handle, char* key, char* val)
  147. {
  148. cb_node* new;
  149. char* dest;
  150. /* We create a new page and get its index. This new page will
  151. * contain our new node */
  152. size_t result = corned_beef_new_page(handle, (void**) &new);
  153. /* the next index is zero, because this is at the current end
  154. * of the list. */
  155. new->next = 0;
  156. /* The key we can copy into the memory. */
  157. strcpy(new->key, key);
  158. /* And the value is on a different page, which create here */
  159. new->val_page = corned_beef_new_page(handle, (void**) &dest);
  160. /* Again, copying the value to the new page is trivial. */
  161. strcpy(dest, val);
  162. munmap(dest, PAGE_SZ);
  163. munmap(new, PAGE_SZ);
  164. return result;
  165. }
  166. int
  167. corned_beef_insert(cb_handle handle, char* key, char* val)
  168. {
  169. /* We figure out which bucket to start looking in, which will give us
  170. * a page offset into the file. */
  171. unsigned char hash = very_bad_hash(key);
  172. size_t ptr = handle.idx->list_head[hash];
  173. if (!ptr) {
  174. /* Zero is not a valid page offset---that's where our index page
  175. * is---so if the page offset is zero, just add a new node here. */
  176. handle.idx->list_head[hash] = corned_beef_add_node(handle, key, val);
  177. return 0;
  178. }
  179. while (ptr) {
  180. /* Otherwise, fetch the specified node and start moving along. */
  181. cb_node* node = corned_beef_get_page(handle, ptr);
  182. if (strcmp(key, node->key) == 0) {
  183. /* If the key we're looking for already exists, then we can
  184. * grab the page that contains the value and replace the
  185. * value with the new one, which because of mmap, is just a
  186. * simple strcpy. */
  187. char* dest = corned_beef_get_page(handle, node->val_page);
  188. strcpy(dest, val);
  189. munmap(node, PAGE_SZ);
  190. munmap(dest, PAGE_SZ);
  191. return 0;
  192. } else if (node->next == 0) {
  193. /* If we haven't found it yet and we're at the end of the
  194. * linked list, then set the next page to a new one that
  195. * contains the key and value we're adding. */
  196. node->next = corned_beef_add_node(handle, key, val);
  197. munmap(node, PAGE_SZ);
  198. return 0;
  199. } else {
  200. /* Otherwise, let's keep going down the list. */
  201. ptr = node->next;
  202. munmap(node, PAGE_SZ);
  203. }
  204. }
  205. /* We shouldn't ever hit this part if we're ignoring race conditions.
  206. * (And I am ignoring them, for the purposes of this silly program.) */
  207. return 99;
  208. }
  209. /* Our man function is a pretty trivial wrapper over the operations above
  210. * so we can use this to initialize, modify, and query on-disk hash tables. */
  211. int
  212. main(int argc, char* argv[])
  213. {
  214. if (argc == 3 && !strcmp(argv[2], "init")) {
  215. cb_handle h = corned_beef_open(argv[1]);
  216. corned_beef_init(h);
  217. corned_beef_close(h);
  218. } else if (argc == 5 && !strcmp(argv[2], "insert")) {
  219. cb_handle h = corned_beef_open(argv[1]);
  220. corned_beef_insert(h, argv[3], argv[4]);
  221. corned_beef_close(h);
  222. } else if (argc == 4 && !strcmp(argv[2], "lookup")) {
  223. cb_handle h = corned_beef_open(argv[1]);
  224. corned_beef_lookup(h, argv[3]);
  225. corned_beef_close(h);
  226. } else {
  227. fprintf(stderr, "Usage:\n");
  228. fprintf(stderr, " corned_beef [db] init\n");
  229. fprintf(stderr, " corned_beef [db] insert [key]\n");
  230. fprintf(stderr, " corned_beef [db] lookup [key] [val]\n");
  231. return 99;
  232. }
  233. return 0;
  234. }