#include <stdio.h>
#include <zlib.h>
#include <db.h>
+#include <assert.h>
#include <libcitadel.h>
#include "../../citserver.h"
#include "../../config.h"
// Some functions in this backend need to store some per-thread data.
// This returns the pointer to the current thread's per-thread data block, creating it if necessary.
+// (This will also work in a non-threaded program; it will return the same pointer every time.)
struct bdb_tsd *bdb_get_tsd(void) {
struct bdb_tsd *c = (struct bdb_tsd *) pthread_getspecific(bdb_thread_key) ;
}
-// Wrapper for txn_begin() that logs/aborts on error. Not part of the backend API.
-static void bdb_txbegin(DB_TXN **tid) {
- int ret;
-
- ret = bdb_env->txn_begin(bdb_env, NULL, tid, 0);
-
- if (ret) {
- syslog(LOG_ERR, "bdb: txn_begin: %s", db_strerror(ret));
- bdb_abort();
- }
-}
-
-
// Panic callback for Berkeley DB. Not part of the backend API.
static void bdb_dbpanic(DB_ENV *env, int errval) {
syslog(LOG_ERR, "bdb: PANIC: %s", db_strerror(errval));
}
+// Transaction-based stuff. I'm writing this as I bake cookies...
+void bdb_begin_transaction(void) {
+ int ret;
+ bdb_bailIfCursor(TSD->cursors, "can't begin transaction during r/o cursor");
+
+ if (TSD->tid != NULL) {
+ syslog(LOG_ERR, "bdb: bdb_begin_transaction: ERROR: nested transaction");
+ bdb_abort();
+ }
+
+ ret = bdb_env->txn_begin(bdb_env, NULL, &TSD->tid, 0);
+ if (ret) {
+ syslog(LOG_ERR, "bdb: bdb_begin_transaction: %s", db_strerror(ret));
+ bdb_abort();
+ }
+}
+
+
+// ...and the cookies are cursed.
+void bdb_end_transaction(void) {
+ int i;
+
+ for (i = 0; i < MAXCDB; i++) {
+ if (TSD->cursors[i] != NULL) {
+ syslog(LOG_WARNING, "bdb: bdb_end_transaction: WARNING: cursor %d still open at transaction end", i);
+ bdb_cclose(TSD->cursors[i]);
+ TSD->cursors[i] = NULL;
+ }
+ }
+
+ if (TSD->tid == NULL) {
+ syslog(LOG_ERR, "bdb: bdb_end_transaction: ERROR: bdb_txcommit(NULL) !!");
+ bdb_abort();
+ }
+ else {
+ bdb_txcommit(TSD->tid);
+ }
+
+ TSD->tid = NULL;
+}
+
+
// Request a checkpoint of the database. Called once per minute by the thread manager.
void bdb_checkpoint(void) {
int ret;
}
// After a successful checkpoint, we can cull the unused logs
- if (CtdlGetConfigInt("c_auto_cull")) {
- ret = bdb_env->log_set_config(bdb_env, DB_LOG_AUTO_REMOVE, 1);
- }
- else {
- ret = bdb_env->log_set_config(bdb_env, DB_LOG_AUTO_REMOVE, 0);
+ ret = bdb_env->log_set_config(bdb_env, DB_LOG_AUTO_REMOVE, 1);
+ if (ret != 0) {
+ syslog(LOG_ERR, "bdb: bdb_checkpoint() auto coll logs: %s", db_strerror(ret));
}
}
-// Open the various tables we'll be using. Any table which
-// does not exist should be created. Note that we don't need a
-// critical section here, because there aren't any active threads
-// manipulating the database yet.
+// Open the various tables we'll be using. Any table which does not exist should be created. Note that we don't need a
+// critical section here, because there aren't any active threads manipulating the database yet.
void bdb_open_databases(void) {
int ret;
int i;
bdb_env->set_verbose(bdb_env, DB_VERB_DEADLOCK, 1);
bdb_env->set_verbose(bdb_env, DB_VERB_RECOVERY, 1);
- // We want to specify the shared memory buffer pool cachesize, but everything else is the default.
- ret = bdb_env->set_cachesize(bdb_env, 0, 64 * 1024, 0);
- if (ret) {
- syslog(LOG_ERR, "bdb: set_cachesize: %s", db_strerror(ret));
- bdb_env->close(bdb_env, 0);
- syslog(LOG_ERR, "bdb: exit code %d", ret);
- exit(CTDLEXIT_DB);
- }
-
- if ((ret = bdb_env->set_lk_detect(bdb_env, DB_LOCK_DEFAULT))) {
- syslog(LOG_ERR, "bdb: set_lk_detect: %s", db_strerror(ret));
- bdb_env->close(bdb_env, 0);
- syslog(LOG_ERR, "bdb: exit code %d", ret);
- exit(CTDLEXIT_DB);
- }
-
- flags = DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_TXN | DB_INIT_LOCK | DB_THREAD | DB_INIT_LOG;
+ flags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD ;
syslog(LOG_DEBUG, "bdb: bdb_env->open(bdb_env, %s, %d, 0)", ctdl_db_dir, flags);
ret = bdb_env->open(bdb_env, ctdl_db_dir, flags, 0); // try opening the database cleanly
- if (ret == DB_RUNRECOVERY) {
- syslog(LOG_ERR, "bdb: bdb_env->open: %s", db_strerror(ret));
- syslog(LOG_ERR, "bdb: attempting recovery...");
- flags |= DB_RECOVER;
- ret = bdb_env->open(bdb_env, ctdl_db_dir, flags, 0); // try recovery
- }
- if (ret == DB_RUNRECOVERY) {
- syslog(LOG_ERR, "bdb: bdb_env->open: %s", db_strerror(ret));
- syslog(LOG_ERR, "bdb: attempting catastrophic recovery...");
- flags &= ~DB_RECOVER;
- flags |= DB_RECOVER_FATAL;
- ret = bdb_env->open(bdb_env, ctdl_db_dir, flags, 0); // try catastrophic recovery
- }
if (ret) {
- syslog(LOG_ERR, "bdb: bdb_env->open: %s", db_strerror(ret));
+ syslog(LOG_ERR, "bdb: bdb_env->open: %s: %s", ctdl_db_dir, db_strerror(ret));
bdb_env->close(bdb_env, 0);
syslog(LOG_ERR, "bdb: exit code %d", ret);
exit(CTDLEXIT_DB);
}
- syslog(LOG_INFO, "bdb: mounting databases");
for (i = 0; i < MAXCDB; ++i) {
ret = db_create(&bdb_table[i], bdb_env, 0); // Create a database handle
if (ret) {
}
snprintf(dbfilename, sizeof dbfilename, "cdb.%02x", i); // table names by number
- ret = bdb_table[i]->open(bdb_table[i], NULL, dbfilename, NULL, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT | DB_THREAD, 0600);
+ ret = bdb_table[i]->open(bdb_table[i], NULL, dbfilename, NULL, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, 0600);
if (ret) {
syslog(LOG_ERR, "bdb: db_open[%02x]: %s", i, db_strerror(ret));
if (ret == ENOMEM) {
// close the tables
syslog(LOG_INFO, "bdb: closing databases");
for (i = 0; i < MAXCDB; ++i) {
- syslog(LOG_INFO, "bdb: closing database %02x", i);
ret = bdb_table[i]->close(bdb_table[i], 0);
if (ret) {
syslog(LOG_ERR, "bdb: db_close: %s", db_strerror(ret));
}
// Close the handle.
+ syslog(LOG_INFO, "bdb: closing environment");
ret = bdb_env->close(bdb_env, DB_FORCESYNC);
if (ret) {
syslog(LOG_ERR, "bdb: DBENV->close: %s", db_strerror(ret));
}
+
+ syslog(LOG_INFO, "bdb: shutdown completed");
}
destLen = (uLongf) zheader.uncompressed_len;
uncompressed_data = malloc(zheader.uncompressed_len);
- if (uncompress((Bytef *) uncompressed_data,
- (uLongf *) &destLen, (const Bytef *) compressed_data, (uLong) sourceLen) != Z_OK) {
+ if (uncompress((Bytef *) uncompressed_data, (uLongf *) &destLen, (const Bytef *) compressed_data, (uLong) sourceLen) != Z_OK) {
syslog(LOG_ERR, "bdb: uncompress() error");
bdb_abort();
}
int compressing = 0;
size_t buffer_len = 0;
uLongf destLen = 0;
+ int existing_txn = 0; // set to nonzero if we are already inside a transaction
memset(&dkey, 0, sizeof(DBT));
memset(&ddata, 0, sizeof(DBT));
}
if (TSD->tid != NULL) {
+ existing_txn = 1;
+ }
+
+ if (!existing_txn) { // If we're not already inside a transaction,
+ bdb_begin_transaction(); // create our own for this operation.
+ }
+
+ do {
ret = bdb_table[cdb]->put(bdb_table[cdb], // db
- TSD->tid, // transaction ID
- &dkey, // key
- &ddata, // data
- 0 // flags
+ TSD->tid, // transaction ID
+ &dkey, // key
+ &ddata, // data
+ 0 // flags
);
- if (ret) {
- syslog(LOG_ERR, "bdb: bdb_store(%d): %s", cdb, db_strerror(ret));
+ if ((ret != 0) && (ret != DB_LOCK_DEADLOCK)) {
+ syslog(LOG_ERR, "bdb: bdb_store(%02x): error %d: %s", cdb, ret, db_strerror(ret));
bdb_abort();
}
- if (compressing) {
- free(compressed_data);
+ if (ret == DB_LOCK_DEADLOCK) {
+ syslog(LOG_DEBUG, "bdb: bdb_store(%02x): would deadlock, trying again", cdb);
}
- return ret;
+ } while (ret == DB_LOCK_DEADLOCK);
+
+ if (!existing_txn) {
+ bdb_end_transaction();
}
- else {
- bdb_bailIfCursor(TSD->cursors, "attempt to write during r/o cursor");
-
- retry:
- bdb_txbegin(&tid);
-
- if ((ret = bdb_table[cdb]->put(bdb_table[cdb], // db
- tid, // transaction ID
- &dkey, // key
- &ddata, // data
- 0))) { // flags
- if (ret == DB_LOCK_DEADLOCK) {
- bdb_txabort(tid);
- goto retry;
- }
- else {
- syslog(LOG_ERR, "bdb: bdb_store(%d): %s", cdb, db_strerror(ret));
- bdb_abort();
- }
- }
- else {
- bdb_txcommit(tid);
- if (compressing) {
- free(compressed_data);
- }
- return ret;
- }
+
+ if (compressing) {
+ free(compressed_data);
}
+
return ret;
}
// Delete a piece of data. Returns 0 if the operation was successful.
int bdb_delete(int cdb, void *key, int keylen) {
DBT dkey;
- DB_TXN *tid;
int ret;
+ int existing_txn = 0; // set to nonzero if we are already inside a transaction
memset(&dkey, 0, sizeof dkey);
dkey.size = keylen;
dkey.data = key;
if (TSD->tid != NULL) {
- ret = bdb_table[cdb]->del(bdb_table[cdb], TSD->tid, &dkey, 0);
- if (ret) {
- syslog(LOG_ERR, "bdb: bdb_delete(%d): %s", cdb, db_strerror(ret));
- if (ret != DB_NOTFOUND) {
- bdb_abort();
- }
- }
+ existing_txn = 1;
}
- else {
- bdb_bailIfCursor(TSD->cursors, "attempt to delete during r/o cursor");
-
- retry:
- bdb_txbegin(&tid);
- if ((ret = bdb_table[cdb]->del(bdb_table[cdb], tid, &dkey, 0)) && ret != DB_NOTFOUND) {
- if (ret == DB_LOCK_DEADLOCK) {
- bdb_txabort(tid);
- goto retry;
- }
- else {
- syslog(LOG_ERR, "bdb: bdb_delete(%d): %s", cdb, db_strerror(ret));
- bdb_abort();
- }
- }
- else {
- bdb_txcommit(tid);
- }
+ if (!existing_txn) { // If we're not already inside a transaction,
+ bdb_begin_transaction(); // create our own for this operation.
}
- return ret;
-}
-
-static DBC *bdb_localcursor(int cdb) {
- int ret;
- DBC *curs;
-
- if (TSD->cursors[cdb] == NULL) {
- ret = bdb_table[cdb]->cursor(bdb_table[cdb], TSD->tid, &curs, 0);
- }
- else {
- ret = TSD->cursors[cdb]->c_dup(TSD->cursors[cdb], &curs, DB_POSITION);
+ ret = bdb_table[cdb]->del(bdb_table[cdb], TSD->tid, &dkey, 0);
+ if (ret) {
+ if (ret != DB_NOTFOUND) {
+ syslog(LOG_ERR, "bdb: bdb_delete(%02x): %s", cdb, db_strerror(ret));
+ bdb_abort();
+ }
}
- if (ret) {
- syslog(LOG_ERR, "bdb: bdb_localcursor: %s", db_strerror(ret));
- bdb_abort();
+ if (!existing_txn) {
+ bdb_end_transaction(); // Only end the transaction if we began it.
}
- return curs;
+ return ret;
}
-// Fetch a piece of data. If not found, returns NULL. Otherwise, it returns
-// a struct cdbdata which it is the caller's responsibility to free later on
-// using the bdb_free() routine.
+// Fetch a piece of data. Returns a "struct cdbdata"
+// If the item is not found, the pointer will be NULL.
struct cdbdata bdb_fetch(int cdb, const void *key, int keylen) {
struct cdbdata returned_data;
dkey.size = keylen;
dkey.data = (void *) key;
- if (TSD->tid != NULL) {
- TSD->dbdata[cdb].flags = DB_DBT_REALLOC;
- ret = bdb_table[cdb]->get(bdb_table[cdb], TSD->tid, &dkey, &TSD->dbdata[cdb], 0);
- }
- else {
- DBC *curs;
-
- do {
- TSD->dbdata[cdb].flags = DB_DBT_REALLOC;
- curs = bdb_localcursor(cdb);
- ret = curs->c_get(curs, &dkey, &TSD->dbdata[cdb], DB_SET);
- bdb_cclose(curs);
- } while (ret == DB_LOCK_DEADLOCK);
- }
+ TSD->dbdata[cdb].flags = DB_DBT_REALLOC;
- if ((ret != 0) && (ret != DB_NOTFOUND)) {
- syslog(LOG_ERR, "bdb: bdb_fetch(%d): %s", cdb, db_strerror(ret));
- bdb_abort();
- }
+ do {
+ ret = bdb_table[cdb]->get(bdb_table[cdb], TSD->tid, &dkey, &TSD->dbdata[cdb], 0);
+ if ((ret != 0) && (ret != DB_NOTFOUND) && (ret != DB_LOCK_DEADLOCK)) {
+ syslog(LOG_ERR, "bdb: bdb_fetch(%d): error %d: %s", cdb, ret, db_strerror(ret));
+ bdb_abort();
+ }
+ } while (ret == DB_LOCK_DEADLOCK);
if (ret == 0) {
bdb_decompress_if_necessary(&TSD->dbdata[cdb]);
}
-// Free a cdbdata item.
-void bdb_free(struct cdbdata *cdb) {
- free(cdb);
-}
-
-
void bdb_close_cursor(int cdb) {
if (TSD->cursors[cdb] != NULL) {
bdb_cclose(TSD->cursors[cdb]);
// Fetch the next item in a sequential search. Returns a pointer to a
// cdbdata structure, or NULL if we've hit the end.
-struct cdbdata bdb_next_item(int cdb) {
- struct cdbdata cdbret;
+struct cdbkeyval bdb_next_item(int cdb) {
+ struct cdbkeyval kv;
int ret = 0;
- memset(&cdbret, 0, sizeof(struct cdbdata));
+ memset(&kv, 0, sizeof(struct cdbkeyval));
// reuse memory from the previous call.
- TSD->dbkey[cdb].flags = DB_DBT_MALLOC;
- TSD->dbdata[cdb].flags = DB_DBT_MALLOC;
+ TSD->dbkey[cdb].flags = DB_DBT_REALLOC;
+ TSD->dbdata[cdb].flags = DB_DBT_REALLOC;
+ assert(TSD->cursors[cdb] != NULL);
ret = TSD->cursors[cdb]->c_get(TSD->cursors[cdb], &TSD->dbkey[cdb], &TSD->dbdata[cdb], DB_NEXT);
if (ret) {
bdb_abort();
}
bdb_close_cursor(cdb);
- return(cdbret); // presumably, we are at the end
+ return(kv); // presumably, we are at the end
}
bdb_decompress_if_necessary(&TSD->dbdata[cdb]);
- cdbret.len = TSD->dbdata[cdb].size;
- cdbret.ptr = TSD->dbdata[cdb].data;
-
- return (cdbret);
-}
-
-
-// Transaction-based stuff. I'm writing this as I bake cookies...
-void bdb_begin_transaction(void) {
- bdb_bailIfCursor(TSD->cursors, "can't begin transaction during r/o cursor");
-
- if (TSD->tid != NULL) {
- syslog(LOG_ERR, "bdb: bdb_begin_transaction: ERROR: nested transaction");
- bdb_abort();
- }
-
- bdb_txbegin(&TSD->tid);
-}
-
-
-void bdb_end_transaction(void) {
- int i;
-
- for (i = 0; i < MAXCDB; i++) {
- if (TSD->cursors[i] != NULL) {
- syslog(LOG_WARNING, "bdb: bdb_end_transaction: WARNING: cursor %d still open at transaction end", i);
- bdb_cclose(TSD->cursors[i]);
- TSD->cursors[i] = NULL;
- }
- }
-
- if (TSD->tid == NULL) {
- syslog(LOG_ERR, "bdb: bdb_end_transaction: ERROR: bdb_txcommit(NULL) !!");
- bdb_abort();
- }
- else {
- bdb_txcommit(TSD->tid);
- }
-
- TSD->tid = NULL;
+ kv.key.len = TSD->dbkey[cdb].size;
+ kv.key.ptr = TSD->dbkey[cdb].data;
+ kv.val.len = TSD->dbdata[cdb].size;
+ kv.val.ptr = TSD->dbdata[cdb].data;
+ return (kv);
}
syslog(LOG_ERR, "bdb: bdb_trunc must not be called in a transaction.");
bdb_abort();
}
- else {
- bdb_bailIfCursor(TSD->cursors, "attempt to write during r/o cursor");
-
- retry:
- if ((ret = bdb_table[cdb]->truncate(bdb_table[cdb], // db
- NULL, // transaction ID
- &count, // #rows deleted
- 0))) { // flags
- if (ret == DB_LOCK_DEADLOCK) {
- goto retry;
- }
- else {
- syslog(LOG_ERR, "bdb: bdb_truncate(%d): %s", cdb, db_strerror(ret));
- if (ret == ENOMEM) {
- syslog(LOG_ERR, "bdb: You may need to tune your database; please read http://www.citadel.org for more information.");
- }
- exit(CTDLEXIT_DB);
- }
+ bdb_begin_transaction(); // create our own transaction for this operation.
+ ret = bdb_table[cdb]->truncate(bdb_table[cdb], // db
+ NULL, // transaction ID
+ &count, // #rows deleted
+ 0); // flags
+ //
+ if (ret) {
+ syslog(LOG_ERR, "bdb: bdb_truncate(%d): %s", cdb, db_strerror(ret));
+ if (ret == ENOMEM) {
+ syslog(LOG_ERR, "bdb: You may need to tune your database; please read http://www.citadel.org for more information.");
}
+ exit(CTDLEXIT_DB);
}
+ bdb_end_transaction();
}
}
+// periodically called for maintenance
+void bdb_tick(void) {
+ int ret;
+ int rejected;
+
+ ret = bdb_env->lock_detect(bdb_env, 0, DB_LOCK_DEFAULT, &rejected);
+ if (ret) {
+ syslog(LOG_ERR, "bdb: lock_detect: %s", db_strerror(ret));
+ }
+ else if (rejected) {
+ syslog(LOG_DEBUG, "bdb: rejected lock %d", rejected);
+ }
+}
+
+
// Calling this function activates the Berkeley DB back end.
void bdb_init_backend(void) {
cdb_close_databases = bdb_close_databases;
cdb_store = bdb_store;
cdb_delete = bdb_delete;
- cdb_free = bdb_free;
cdb_next_item = bdb_next_item;
cdb_close_cursor = bdb_close_cursor;
cdb_begin_transaction = bdb_begin_transaction;
cdb_end_transaction = bdb_end_transaction;
cdb_check_handles = bdb_check_handles;
cdb_trunc = bdb_trunc;
+ cdb_tick = bdb_tick;
// Some functions in this backend need to store some per-thread data.
- // We crerate the key here, during module initialization.
+ // We create the key here, during module initialization.
if (pthread_key_create(&bdb_thread_key, NULL) != 0) {
syslog(LOG_ERR, "pthread_key_create() : %m");
exit(CTDLEXIT_THREAD);