btree.h

Go to the documentation of this file.
00001 /**
00002  * @file ccn/btree.h
00003  * BTree
00004  */
00005 /* Part of the CCNx C Library.
00006  *
00007  * Copyright (C) 2011-12 Palo Alto Research Center, Inc.
00008  *
00009  * This library is free software; you can redistribute it and/or modify it
00010  * under the terms of the GNU Lesser General Public License version 2.1
00011  * as published by the Free Software Foundation.
00012  * This library is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00015  * Lesser General Public License for more details. You should have received
00016  * a copy of the GNU Lesser General Public License along with this library;
00017  * if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
00018  * Fifth Floor, Boston, MA 02110-1301 USA.
00019  */
00020  
00021 #ifndef CCN_BTREE_DEFINED
00022 #define CCN_BTREE_DEFINED
00023 
00024 #include <stdio.h>
00025 #include <sys/types.h>
00026 #include <ccn/charbuf.h>
00027 #include <ccn/hashtb.h>
00028 
00029 struct ccn_btree_io;
00030 struct ccn_btree_node;
00031 
00032 /**
00033  * Methods for external I/O of btree nodes.
00034  *
00035  * These are supplied by the client, and provide an abstraction
00036  * to hold the persistent representation of the btree.
00037  *
00038  * Each node has a nodeid that serves as its filename.  These start as 1 and
00039  * are assigned consecutively. The node may correspond to a file in a file
00040  * system, or to some other abstraction as appropriate.
00041  *
00042  * Open should prepare for I/O to a node.  It may use the iodata slot to
00043  * keep track of its state, and should set iodata to a non-NULL value.
00044  * It should update the count of openfds as appropriate.
00045  *
00046  * Read gets bytes from the file and places it into the buffer at the
00047  * corresponding position.  The parameter is a limit for the max buffer size.
00048  * Bytes prior to the clean mark do not need to be read.
00049  * The buffer should be extended, if necessary, to hold the data.
00050  * Read is not responsible for updating the clean mark.
00051  * 
00052  * Write puts bytes from the buffer into the file, and truncates the file
00053  * according to the buffer length.  Bytes prior to the clean mork do not
00054  * need to be written, since they should be the same in the buffer and the
00055  * file.  Write is not responsible for updating the clean mark.
00056  *
00057  * Close is called at the obvious time.  It should free any node io state and
00058  * set iodata to NULL, updating openfds as appropriate.  It should not change
00059  * the other parts of the node.
00060  *
00061  * Negative return values indicate errors.
00062  */
00063 typedef int (*ccn_btree_io_openfn)
00064     (struct ccn_btree_io *, struct ccn_btree_node *);
00065 typedef int (*ccn_btree_io_readfn)
00066     (struct ccn_btree_io *, struct ccn_btree_node *, unsigned);
00067 typedef int (*ccn_btree_io_writefn)
00068     (struct ccn_btree_io *, struct ccn_btree_node *);
00069 typedef int (*ccn_btree_io_closefn)
00070     (struct ccn_btree_io *, struct ccn_btree_node *);
00071 typedef int (*ccn_btree_io_destroyfn)
00072     (struct ccn_btree_io **);
00073 
00074 /* This serves as the external name of a btree node. */
00075 typedef unsigned ccn_btnodeid;
00076 
00077 /**
00078  * Holds the methods and the associated common data.
00079  */
00080 struct ccn_btree_io {
00081     char clue[16]; /* unused except for debugging/logging */
00082     ccn_btree_io_openfn btopen;
00083     ccn_btree_io_readfn btread;
00084     ccn_btree_io_writefn btwrite;
00085     ccn_btree_io_closefn btclose;
00086     ccn_btree_io_destroyfn btdestroy;
00087     ccn_btnodeid maxnodeid;    /**< Largest assigned nodeid */
00088     int openfds;               /**< Number of open files */
00089     void *data;
00090 };
00091 /**
00092  * State associated with a btree node
00093  *
00094  * These usually live in the resident hashtb of a ccn_btree, but might be
00095  * elsewhere (such as stack-allocated) in some cases.
00096  */
00097 struct ccn_btree_node {
00098     ccn_btnodeid nodeid;        /**< Identity of node */
00099     struct ccn_charbuf *buf;    /**< The internal buffer */
00100     void *iodata;               /**< Private use by ccn_btree_io methods */
00101     ccn_btnodeid parent;        /**< Parent node id; 0 if unknown */
00102     unsigned clean;             /**< Number of stable buffered bytes at front */
00103     unsigned freelow;           /**< Index of first unused byte of free space */
00104     unsigned corrupt;           /**< Structure is not to be trusted */
00105     unsigned activity;          /**< Meters use of the node */
00106 };
00107 
00108 /** Increment to node->activity when node is referenced but not changed */
00109 #define CCN_BT_ACTIVITY_REFERENCE_BUMP 1
00110 /** Increment to node->activity when node is read from disk */
00111 #define CCN_BT_ACTIVITY_READ_BUMP 8
00112 /** Increment to node->activity when node is modified */
00113 #define CCN_BT_ACTIVITY_UPDATE_BUMP 16
00114 
00115 /** Limit to the number of btree nodes kept open when idle */
00116 #define CCN_BT_OPEN_NODES_IDLE 5
00117 /** Limit to the number of file descriptors the btree should use at a time */
00118 #define CCN_BT_OPEN_NODES_LIMIT 13
00119 
00120 
00121 /**
00122  * State associated with a btree as a whole
00123  */
00124 struct ccn_btree {
00125     unsigned magic;             /**< for making sure we point to a btree */
00126     ccn_btnodeid nextnodeid;    /**< for allocating new btree nodes */
00127     struct ccn_btree_io *io;    /**< storage layer */
00128     struct hashtb *resident;    /**< of ccn_btree_node, by nodeid */
00129     ccn_btnodeid nextspill;     /**< undersize node that needs spilling */
00130     ccn_btnodeid nextsplit;     /**< oversize node that needs splitting */
00131     ccn_btnodeid missedsplit;   /**< should stay zero */
00132     int errors;                 /**< counter for detected errors */
00133     int cleanreq;               /**< if nonzero, cleaning might be needed */
00134     /* tunables */
00135     int full;                   /**< split internal nodes bigger than this */
00136     int full0;                  /**< split leaf nodes bigger than this */
00137     int nodebytes;              /**< limit size of node */
00138     int nodepool;               /**< limit resident size */
00139 };
00140 
00141 /**
00142  *  Structure of a node.
00143  *  
00144  *  These are as they appear on external storage, so we stick to 
00145  *  single-byte types to keep it portable between machines.
00146  *  Multi-byte numeric fields are always in big-endian format.
00147  *
00148  *  Within a node, the entries are fixed size.
00149  *  The entries are packed together at the end of the node's storage,
00150  *  so that by examining the last entry the location of the other entries
00151  *  can be determined directly.  The entsz field includes the whole entry,
00152  *  which consists of a payload followed by a trailer.
00153  *
00154  *  The keys are stored in the first portion of the node.  They may be
00155  *  in multiple pieces, and the pieces may overlap arbitrarily.  This offers
00156  *  a very simple form of compression, since the keys within a node are
00157  *  very likely to have a lot in common with each other.
00158  *
00159  *  A few bytes at the very beginning serve as a header.
00160  *
00161  * This is the overall structure of a node:
00162  *
00163  *  +---+-----------------------+--------------+----+----+-- --+----+
00164  *  |hdr|..string......space....| (free space) | E0 | E1 | ... | En |
00165  *  +---+-----------------------+--------------+----+----+-- --+----+
00166  *
00167  * It is designed so that new entries can be added without having to
00168  * rewrite all of the string space.  Thus the header should not contain
00169  * things that we expect to change often.
00170  */
00171 struct ccn_btree_node_header {
00172     unsigned char magic[4];     /**< File magic */
00173     unsigned char version[1];   /**< Format version */
00174     unsigned char nodetype[1];  /**< Indicates root node, backup root, etc. */
00175     unsigned char level[1];     /**< Level within the tree */
00176     unsigned char extsz[1];     /**< Header extension size (CCN_BT_SIZE_UNITS)*/
00177 };
00178 
00179 /**
00180  *  Structure of a node entry trailer.
00181  *
00182  * This is how the last few bytes of each entry within a node are arranged.
00183  *
00184  */
00185 struct ccn_btree_entry_trailer {
00186     unsigned char koff0[4];     /**< offset of piece 0 of the key */
00187     unsigned char ksiz0[2];     /**< size of piece 0 of the key */
00188     unsigned char koff1[4];     /**< offset of piece 1 */
00189     unsigned char ksiz1[2];     /**< size of piece 1 */
00190     unsigned char entdx[2];     /**< index of this entry within the node */
00191     unsigned char level[1];     /**< leaf nodes are at level 0 */
00192     unsigned char entsz[1];     /**< entry size in CCN_BT_SIZE_UNITS */
00193 };
00194 #define CCN_BT_SIZE_UNITS 8
00195 /** Maximum key size, dictated by size of above size fields */
00196 #define CCN_BT_MAX_KEY_SIZE 65535
00197 
00198 /**
00199  *  Structure of the entry payload within an internal (non-leaf) node.
00200  */
00201 struct ccn_btree_internal_payload {
00202     unsigned char magic[1];     /**< CCN_BT_INTERNAL_MAGIC */
00203     unsigned char pad[3];       /**< must be zero */
00204     unsigned char child[4];     /**< nodeid of a child */
00205 };
00206 #define CCN_BT_INTERNAL_MAGIC 0xCC
00207 /**
00208  *  Logical structure of the entry within an internal (non-leaf) node.
00209  */
00210 struct ccn_btree_internal_entry {
00211     struct ccn_btree_internal_payload ie;
00212     struct ccn_btree_entry_trailer trailer;
00213 };
00214 
00215 /* More extensive function descriptions are provided in the code. */
00216 
00217 /* Number of entries within the node */
00218 int ccn_btree_node_nent(struct ccn_btree_node *node);
00219 
00220 /* Node level (leaves are at level 0) */
00221 int ccn_btree_node_level(struct ccn_btree_node *node);
00222 
00223 /* Node entry size */
00224 int ccn_btree_node_getentrysize(struct ccn_btree_node *node);
00225 
00226 /* Node payload size */
00227 int ccn_btree_node_payloadsize(struct ccn_btree_node *node);
00228 
00229 /* Get address of the indexed entry within node */
00230 void *ccn_btree_node_getentry(size_t payload_bytes,
00231                               struct ccn_btree_node *node, int i);
00232 
00233 /* Fetch the indexed key and place it into dst */
00234 int ccn_btree_key_fetch(struct ccn_charbuf *dst,
00235                         struct ccn_btree_node *node, int i);
00236 
00237 /* Append the indexed key to dst */
00238 int ccn_btree_key_append(struct ccn_charbuf *dst,
00239                          struct ccn_btree_node *node, int i);
00240 
00241 /* Compare given key with the key in the indexed entry of the node */
00242 int ccn_btree_compare(const unsigned char *key, size_t size,
00243                       struct ccn_btree_node *node, int i);
00244 
00245 #define CCN_BT_ENCRES(ndx, success) (2 * (ndx) + ((success) || 0))
00246 #define CCN_BT_SRCH_FOUND(res) ((res) & 1)
00247 #define CCN_BT_SRCH_INDEX(res) ((res) >> 1)
00248 /* Search within the node for the key, or something near it */
00249 int ccn_btree_searchnode(const unsigned char *key, size_t size,
00250                          struct ccn_btree_node *node);
00251 
00252 /* Insert a new entry at slot i of node */
00253 int ccn_btree_insert_entry(struct ccn_btree_node *node, int i,
00254                            const unsigned char *key, size_t keysize,
00255                            void *payload, size_t payload_bytes);
00256 
00257 /* Delete the entry at slot i of node */
00258 int ccn_btree_delete_entry(struct ccn_btree_node *node, int i);
00259 
00260 /* Initialize a btree node */
00261 int ccn_btree_init_node(struct ccn_btree_node *node,
00262                         int level, unsigned char nodetype, unsigned char extsz);
00263 
00264 /* Test for an oversize node */
00265 int ccn_btree_oversize(struct ccn_btree *btree, struct ccn_btree_node *node);
00266 
00267 /* Test for unbalance */
00268 int ccn_btree_unbalance(struct ccn_btree *btree, struct ccn_btree_node *node);
00269 
00270 /* Check a node for internal consistency */
00271 int ccn_btree_chknode(struct ccn_btree_node *node);
00272 
00273 /*
00274  * Overall btree operations
00275  */
00276 
00277 /* Handle creation and destruction */
00278 struct ccn_btree *ccn_btree_create(void);
00279 int ccn_btree_destroy(struct ccn_btree **);
00280 
00281 /* Record an error */
00282 void ccn_btree_note_error(struct ccn_btree *bt, int info);
00283 
00284 /* Access a node, creating or reading it if necessary */
00285 struct ccn_btree_node *ccn_btree_getnode(struct ccn_btree *bt,
00286                                          ccn_btnodeid nodeid,
00287                                          ccn_btnodeid parentid);
00288 
00289 /* Get a node handle if it is already resident */
00290 struct ccn_btree_node *ccn_btree_rnode(struct ccn_btree *bt,
00291                                        ccn_btnodeid nodeid);
00292 
00293 /* Clean a node and release io resources, retaining cached node in memory */
00294 int ccn_btree_close_node(struct ccn_btree *btree, struct ccn_btree_node *node);
00295 
00296 /* Do a lookup, starting from the default root */
00297 int ccn_btree_lookup(struct ccn_btree *btree,
00298                      const unsigned char *key, size_t size,
00299                      struct ccn_btree_node **leafp);
00300 
00301 /* Do a lookup, starting from the provided root and stopping at stoplevel */
00302 int ccn_btree_lookup_internal(struct ccn_btree *btree,
00303                      struct ccn_btree_node *root, int stoplevel,
00304                      const unsigned char *key, size_t size,
00305                      struct ccn_btree_node **ansp);
00306 
00307 /* Search for nodeid in parent */ 
00308 int ccn_btree_index_in_parent(struct ccn_btree_node *parent,
00309                               ccn_btnodeid nodeid);
00310 
00311 /* Find the leaf that comes after the given node */
00312 int ccn_btree_next_leaf(struct ccn_btree *btree,
00313                         struct ccn_btree_node *node,
00314                         struct ccn_btree_node **ansp);
00315 
00316 /* Find the leaf that comes before the given node */
00317 int ccn_btree_prev_leaf(struct ccn_btree *btree,
00318                         struct ccn_btree_node *node,
00319                         struct ccn_btree_node **ansp);
00320 
00321 /* Split a node into two */
00322 int ccn_btree_split(struct ccn_btree *btree, struct ccn_btree_node *node);
00323 
00324 /* Spill a node over into sibling */
00325 int ccn_btree_spill(struct ccn_btree *btree, struct ccn_btree_node *node);
00326 
00327 /* Prepare to update a node */
00328 int ccn_btree_prepare_for_update(struct ccn_btree *bt,
00329                                  struct ccn_btree_node *node);
00330 
00331 /* Check the whole btree carefully */
00332 int ccn_btree_check(struct ccn_btree *btree, FILE *outfp);
00333 
00334 /*
00335  * Storage layer - client can provide other options
00336  */
00337 
00338 /* For btree node storage in files */
00339 struct ccn_btree_io *ccn_btree_io_from_directory(const char *path,
00340                                                  struct ccn_charbuf *msgs);
00341 
00342 /* Low-level field access */
00343 unsigned ccn_btree_fetchval(const unsigned char *p, int size);
00344 void ccn_btree_storeval(unsigned char *p, int size, unsigned v);
00345 
00346 #endif
Generated on Tue Aug 21 14:54:17 2012 for Content-Centric Networking in C by  doxygen 1.6.3