1 // Don't run this. It doesn't work and if you try to run it you will immediately die.
3 // Copyright (c) 2023 by Art Cancro citadel.org
5 // This program is open source software. Use, duplication, or disclosure
6 // is subject to the terms of the GNU General Public License, version 3.
13 #include <sys/types.h>
14 #include <sys/socket.h>
22 #include <libcitadel.h>
25 #include "../server/sysdep.h"
26 #include "../server/citadel_defs.h"
27 #include "../server/server.h"
28 #include "../server/makeuserkey.h"
29 #include "../server/citadel_dirs.h"
33 // Wrapper for realloc() that crashes and burns if the call fails.
34 void *reallok(void *ptr, size_t size) {
35 void *p = realloc(ptr, size);
37 fprintf(stderr, "realloc() failed to resize %p to %ld bytes, error: %m\n", ptr, size);
44 // Open a database environment
45 DB_ENV *open_dbenv(char *dirname) {
52 int dbversion_major, dbversion_minor, dbversion_patch;
53 db_version(&dbversion_major, &dbversion_minor, &dbversion_patch);
55 // Create synthetic integer version numbers and compare them.
56 // Never run with a libdb other than the one with which it was compiled.
57 int compiled_db_version = ( (DB_VERSION_MAJOR * 1000000) + (DB_VERSION_MINOR * 1000) + (DB_VERSION_PATCH) );
58 int linked_db_version = ( (dbversion_major * 1000000) + (dbversion_minor * 1000) + (dbversion_patch) );
59 if (compiled_db_version != linked_db_version) {
61 "db: ctdlload is running with a version of libdb other than the one with which it was compiled.\n"
64 "db: This is an invalid configuration. ctdlload will now exit to prevent data loss.",
71 ret = db_env_create(&dbenv, 0);
73 fprintf(stderr,"db: db_env_create: %s\n", db_strerror(ret));
74 fprintf(stderr,"db: exit code %d\n", ret);
78 // We want to specify the shared memory buffer pool cachesize, but everything else is the default.
79 ret = dbenv->set_cachesize(dbenv, 0, 64 * 1024, 0);
81 fprintf(stderr,"db: set_cachesize: %s\n", db_strerror(ret));
82 dbenv->close(dbenv, 0);
83 fprintf(stderr,"db: exit code %d\n", ret);
87 if ((ret = dbenv->set_lk_detect(dbenv, DB_LOCK_DEFAULT))) {
88 fprintf(stderr,"db: set_lk_detect: %s\n", db_strerror(ret));
89 dbenv->close(dbenv, 0);
90 fprintf(stderr,"db: exit code %d\n", ret);
94 flags = DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_LOG;
95 ret = dbenv->open(dbenv, dirname, flags, 0);
97 fprintf(stderr,"db: dbenv->open: %s\n", db_strerror(ret));
98 dbenv->close(dbenv, 0);
99 fprintf(stderr,"db: exit code %d\n", ret);
107 void close_dbenv(DB_ENV *dbenv) {
108 int ret = dbenv->close(dbenv, 0);
110 fprintf(stderr,"db: dbenv->close: %s\n", db_strerror(ret));
115 // Convert a "msgtext" record to a message on disk. NOT THREADSAFE
116 // This also works for "bigmsg" records.
117 int import_msgtext(char *line, DBT *out_key, DBT *out_data) {
119 static char *b64_decoded_msg = NULL;
120 static size_t b64_decoded_alloc = 0;
124 token = strtok(line, "|");
125 msgnum = atol(strtok(NULL, "|"));
126 token = strtok(NULL, "|");
128 // The record key will be the message number
129 out_key->size = sizeof(long);
130 out_key->data = reallok(out_key->data, out_key->size);
131 memcpy(out_key->data, &msgnum, out_key->size);
133 // The record data will be the decoded message text.
134 // We are allocating more memory than we need, but BDB will only write the number of bytes we tell it to.
135 out_data->data = reallok(out_data->data, strlen(token));
136 out_data->size = CtdlDecodeBase64(out_data->data, token, strlen(token));
141 // Convert a "msgmeta" record to a message metadata record on disk. NOT THREADSAFE
142 int import_msgmeta(char *line, DBT *out_key, DBT *out_data) {
144 struct MetaData *m = malloc(sizeof(struct MetaData));
146 memset(m, 0, sizeof(struct MetaData));
149 for (int i=0; (token = strsep(&p, "|")); ++i) {
152 m->meta_msgnum = atol(token);
155 m->meta_refcount = atoi(token);
158 strncpy(m->meta_content_type, token, sizeof(m->meta_content_type));
161 m->meta_rfc822_length = atol(token);
166 // metadata records are stored in the CDB_MSGMAIN table,
167 // but with the index being the *negative* of the message number.
168 long index = 0 - m->meta_msgnum;
169 out_key->size = sizeof(long);
170 out_key->data = reallok(NULL, out_key->size);
171 memcpy(out_key->data, &index, out_key->size);
174 out_data->size = sizeof(struct MetaData);
175 out_data->data = m; // out_data owns this memory now
181 // Convert a "user" record to a record on disk. (Source string is unusable after this function is called.)
182 int import_user(char *line, DBT *out_key, DBT *out_data) {
183 char userkey[USERNAME_SIZE];
185 struct ctdluser *u = malloc(sizeof(struct ctdluser));
187 memset(u, 0, sizeof(struct ctdluser));
190 for (int i=0; (token = strsep(&p, "|")); ++i) {
193 u->version = atoi(token);
196 u->uid = atoi(token);
199 strncpy(u->password, token, sizeof(u->password));
202 u->flags = atoi(token);
205 u->axlevel = atoi(token);
208 u->usernum = atol(token);
211 u->lastcall = atol(token);
214 u->USuserpurge = atoi(token);
217 strncpy(u->fullname, token, sizeof(u->fullname));
220 u->msgnum_bio = atol(token);
223 u->msgnum_pic = atol(token);
226 CtdlDecodeBase64(u->emailaddrs, token, strlen(token));
229 u->msgnum_inboxrules = atol(token);
232 u->lastproc_inboxrules = atol(token);
237 makeuserkey(userkey, u->fullname);
238 out_key->size = strlen(userkey);
239 out_key->data = strdup(userkey);
240 out_data->size = sizeof(struct ctdluser);
246 // Convert a "room" record to a record on disk. (Source string is unusable after this function is called.)
247 int import_room(char *line, DBT *out_key, DBT *out_data) {
249 struct ctdlroom *r = malloc(sizeof(struct ctdlroom));
251 memset(r, 0, sizeof(struct ctdlroom));
254 for (int i=0; (token = strsep(&p, "|")); ++i) {
257 strncpy(r->QRname, token, sizeof(r->QRname));
260 strncpy(r->QRpasswd, token, sizeof (r->QRpasswd));
263 r->QRroomaide = atol(token);
266 r->QRhighest = atol(token);
269 r->QRgen = atol(token);
272 r->QRflags = atoi(token);
275 strncpy(r->QRdirname, token, sizeof(r->QRdirname));
278 r->msgnum_info = atol(token);
281 r->QRfloor = atoi(token);
284 r->QRmtime = atol(token);
287 r->QRep.expire_mode = atoi(token);
290 r->QRep.expire_value = atoi(token);
293 r->QRnumber = atol(token);
296 r->QRorder = atoi(token);
299 r->QRflags2 = atoi(token);
302 r->QRdefaultview = atoi(token);
305 r->msgnum_pic = atol(token);
310 // The key is the room name in all lower case
311 out_key->size = strlen(r->QRname);
312 out_key->data = strdup(r->QRname);
313 char *k = (char *)out_key->data;
314 for (int i=0; i<=out_key->size; ++i) {
315 k[i] = tolower(k[i]);
318 out_data->size = sizeof(struct ctdlroom);
324 // Convert a floor record to a record on disk.
325 int import_floor(char *line, DBT *out_key, DBT *out_data) {
327 struct floor *f = malloc(sizeof(struct floor));
330 memset(f, 0, sizeof(struct floor));
333 for (int i=0; (token = strsep(&p, "|")); ++i) {
336 floor_num = atoi(token);
339 f->f_flags = atoi(token);
342 strncpy(f->f_name, token, sizeof(f->f_name));
345 f->f_ref_count = atoi(token);
348 f->f_ep.expire_mode = atoi(token);
351 f->f_ep.expire_value = atoi(token);
356 out_key->size = sizeof(int);
357 out_key->data = malloc(out_key->size);
358 memcpy(out_key->data, &floor_num, out_key->size);
360 out_data->size = sizeof(struct floor);
366 // Import a msglist record
367 // msglist|26|32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51|
368 int import_msglist(char *line, DBT *out_key, DBT *out_data) {
370 char *token, *mtoken;
374 long *msglist = NULL;
376 for (int i=0; (token = strsep(&p, "|")); ++i) {
379 roomnum = atol(token);
383 for (int j=0; (mtoken = strsep(&q, ",")); ++j) {
384 msglist = realloc(msglist, (num_msgs+1) * sizeof(long));
385 msglist[num_msgs++] = atol(mtoken);
391 out_key->size = sizeof(long);
392 out_key->data = malloc(out_key->size);
393 memcpy(out_key->data, &roomnum, out_key->size);
395 out_data->size = num_msgs * sizeof(long);
396 out_data->data = msglist;
402 // Convert a "visit" record to a record on disk.
403 int import_visit(char *line, DBT *out_key, DBT *out_data) {
405 struct visit *v = malloc(sizeof(struct visit));
407 memset(v, 0, sizeof(struct visit));
410 for (int i=0; (token = strsep(&p, "|")); ++i) {
413 v->v_roomnum = atol(token);
416 v->v_roomgen = atol(token);
419 v->v_usernum = atol(token);
422 v->v_lastseen = atoi(token);
425 v->v_flags = atoi(token);
428 strncpy(v->v_seen, token, sizeof(v->v_seen));
431 strncpy(v->v_answered, token, sizeof(v->v_answered));
434 v->v_view = atoi(token);
439 // The key is the same as the first three data members (3 x long)
440 out_key->size = sizeof(long) * 3;
441 out_key->data = reallok(NULL, out_key->size);
442 memcpy(out_key->data, v, out_key->size);
444 out_data->size = sizeof(struct visit);
450 // Convert a "dir" record to a record on disk.
451 int import_dir(char *line, DBT *out_key, DBT *out_data) {
453 char username[USERNAME_SIZE];
457 for (int i=0; (token = strsep(&p, "|")); ++i) {
460 strncpy(dirkey, token, sizeof(dirkey));
463 strncpy(username, token, sizeof(username));
468 out_key->size = strlen(dirkey);
469 out_key->data = reallok(NULL, out_key->size);
470 memcpy(out_key->data, dirkey, strlen(dirkey));
472 out_data->size = strlen(username) + 1;
473 out_data->data = strdup(username);
479 // Convert a "usetable" record to a record on disk.
480 int import_usetable(char *line, DBT *out_key, DBT *out_data) {
482 struct UseTable *u = malloc(sizeof(struct UseTable));
484 memset(u, 0, sizeof(struct UseTable));
487 for (int i=0; (token = strsep(&p, "|")); ++i) {
490 u->hash = atoi(token);
493 u->timestamp = atol(token);
498 // the key is just an int (the hash)
499 out_key->size = sizeof(int);
500 out_key->data = reallok(NULL, out_key->size);
501 memcpy(out_key->data, &u->hash, out_key->size);
503 out_data->size = sizeof(struct UseTable);
509 // Import a full text search index record.
510 // It's just like a msglists record: a key and a list of message numbers, but the key is "int" instead of "long"
511 int import_fulltext(char *line, DBT *out_key, DBT *out_data) {
513 char *token, *mtoken;
517 long *msglist = NULL;
519 for (int i=0; (token = strsep(&p, "|")); ++i) {
522 indexnum = atoi(token);
526 for (int j=0; (mtoken = strsep(&q, ",")); ++j) {
527 msglist = realloc(msglist, (num_msgs+1) * sizeof(long));
528 msglist[num_msgs++] = atol(mtoken);
534 out_key->size = sizeof(int);
535 out_key->data = malloc(out_key->size);
536 memcpy(out_key->data, &indexnum, out_key->size);
538 out_data->size = num_msgs * sizeof(long);
539 out_data->data = msglist;
545 // Import an EUID Index record
546 // euidindex|msgnum|roomnum|euid|
547 int import_euidindex(char *line, DBT *out_key, DBT *out_data) {
554 for (int i=0; (token = strsep(&p, "|")); ++i) {
557 msgnum = atol(token);
560 roomnum = atol(token);
563 strncpy(euid, token, sizeof(euid));
568 // The structure of an euidindex record *key* is:
569 // |----room_number----|----------EUID-------------|
570 // (sizeof long) (actual length of euid)
571 out_key->size = sizeof(long) + strlen(euid) + 1;
572 out_key->data = reallok(NULL, out_key->size);
573 memcpy(out_key->data, &roomnum, sizeof(long));
574 strcpy(out_key->data + sizeof(long), euid);
576 // The structure of an euidindex record *value* is:
577 // |-----msg_number----|----room_number----|----------EUID-------------|
578 // (sizeof long) (sizeof long) (actual length of euid)
579 out_data->size = sizeof(long) + sizeof(long) + strlen(euid) + 1;
580 out_data->data = reallok(NULL, out_data->size);
581 memcpy(out_data->data, &msgnum, sizeof(long));
582 memcpy(out_data->data + sizeof(long), &roomnum, sizeof(long));
583 strcpy(out_data->data + sizeof(long) + sizeof(long), euid);
589 // Import a "users by number" (secondary index) record
590 // The key is a "long"
591 int import_usersbynumber(char *line, DBT *out_key, DBT *out_data) {
596 for (int i=0; (token = strsep(&p, "|")); ++i) {
599 usernum = atol(token);
602 out_key->size = sizeof(long);
603 out_key->data = reallok(NULL, sizeof(long));
604 memcpy(out_key->data, &usernum, out_key->size);
605 out_data->data = strdup(token);
606 out_data->size = strlen(out_data->data) + 1;
611 return(0); // should never get here unless it's a bad record
615 // Import a config record
616 // The key is the config key
617 // The data is the config key, a null, the value, and another null
618 int import_config(char *line, DBT *out_key, DBT *out_data) {
623 for (int i=0; (token = strsep(&p, "|")); ++i) {
627 out_key->size = strlen(token);
628 out_key->data = strdup(token);
632 out_data->size = strlen(k) + strlen(v) + 2;
633 out_data->data = reallok(NULL, out_data->size);
634 strcpy(out_data->data, k);
635 strcpy(out_data->data + strlen(k) + 1, v);
644 // Ingest one line of dump data. NOT REENTRANT
645 void ingest_one(char *line, DB_ENV *dst_dbenv) {
647 static int good_rows = 0;
648 static int bad_rows = 0;
649 static int previous_cdb = -1 ;
650 static int current_cdb = -1 ;
652 char record_type[32];
656 DBT out_key, out_data;
658 // We are assuming that the lines of the dump file will generally be sorted by table.
659 // By remembering the last table we worked with, we can do close/open if the table changes.
661 if (current_cdb >= 0) {
662 fprintf(stderr, " \033[33m%02x \033[32m%9d \033[31m%8d\033[0m\r", current_cdb, good_rows, bad_rows);
666 // Clear out our record buffer
667 memset(&out_key, 0, sizeof(DBT));
668 memset(&out_data, 0, sizeof(DBT));
671 // Identify the record type we are currently working with,
672 // then call the correct conversion function to load up our record buffer.
673 extract_token(record_type, line, 0, '|', sizeof record_type);
674 if (!strcasecmp(record_type, "msgtext")) {
675 current_cdb = CDB_MSGMAIN;
676 row_was_good = import_msgtext(line, &out_key, &out_data);
678 else if (!strcasecmp(record_type, "msgmeta")) {
679 current_cdb = CDB_MSGMAIN;
680 row_was_good = import_msgmeta(line, &out_key, &out_data);
682 else if (!strcasecmp(record_type, "user")) {
683 current_cdb = CDB_USERS;
684 row_was_good = import_user(line, &out_key, &out_data);
686 else if (!strcasecmp(record_type, "room")) {
687 current_cdb = CDB_ROOMS;
688 row_was_good = import_room(line, &out_key, &out_data);
690 else if (!strcasecmp(record_type, "floor")) {
691 current_cdb = CDB_FLOORTAB;
692 row_was_good = import_floor(line, &out_key, &out_data);
694 else if (!strcasecmp(record_type, "msglist")) {
695 current_cdb = CDB_MSGLISTS;
696 row_was_good = import_msglist(line, &out_key, &out_data);
698 else if (!strcasecmp(record_type, "visit")) {
699 current_cdb = CDB_VISIT;
700 row_was_good = import_visit(line, &out_key, &out_data);
702 else if (!strcasecmp(record_type, "dir")) {
703 current_cdb = CDB_DIRECTORY;
704 row_was_good = import_dir(line, &out_key, &out_data);
706 else if (!strcasecmp(record_type, "use")) {
707 current_cdb = CDB_USETABLE;
708 row_was_good = import_usetable(line, &out_key, &out_data);
710 else if (!strcasecmp(record_type, "bigmsg")) {
711 current_cdb = CDB_BIGMSGS;
712 row_was_good = import_msgtext(line, &out_key, &out_data);
714 else if (!strcasecmp(record_type, "fulltext")) {
715 current_cdb = CDB_FULLTEXT;
716 row_was_good = import_fulltext(line, &out_key, &out_data);
718 else if (!strcasecmp(record_type, "euidindex")) {
719 current_cdb = CDB_EUIDINDEX;
720 row_was_good = import_euidindex(line, &out_key, &out_data);
722 else if (!strcasecmp(record_type, "usersbynumber")) {
723 current_cdb = CDB_USERSBYNUMBER;
724 row_was_good = import_usersbynumber(line, &out_key, &out_data);
726 else if (!strcasecmp(record_type, "config")) {
727 current_cdb = CDB_CONFIG;
728 row_was_good = import_config(line, &out_key, &out_data);
734 // If the current record is a different table than the previous record,
735 // then close the other table and open the correct one.
736 if (current_cdb != previous_cdb) {
737 if (previous_cdb >= 0) {
738 fprintf(stderr, "\n");
740 if (previous_cdb >= 0) {
741 ret = dst_dbp->close(dst_dbp, 0);
743 fprintf(stderr, "db: db_close: %s\n", db_strerror(ret));
747 if (current_cdb >= 0) {
750 snprintf(dbfilename, sizeof dbfilename, "cdb.%02x", current_cdb);
752 // create a database handle for the destination table
753 ret = db_create(&dst_dbp, dst_dbenv, 0);
755 fprintf(stderr, "db: db_create: %s\n", db_strerror(ret));
756 fprintf(stderr, "db: exit code %d\n", ret);
760 // open the file containing the destination table
761 ret = dst_dbp->open(dst_dbp, NULL, dbfilename, NULL, DB_BTREE, (DB_CREATE | DB_TRUNCATE), 0600);
763 fprintf(stderr, "db: db_open(%s): %s\n", dbfilename, db_strerror(ret));
764 fprintf(stderr, "db: exit code %d\n", ret);
769 previous_cdb = current_cdb;
772 // If the conversion function was successful, write the record to the database.
775 ret = dst_dbp->put(dst_dbp, NULL, &out_key, &out_data, 0);
777 fprintf(stderr, "db: cdb_put(%x): %s", current_cdb, db_strerror(ret));
790 // This is the loop that loads the dump data. NOT REENTRANT
791 void ingest(DB_ENV *dst_dbenv) {
792 static size_t line_alloc = 1;
794 static size_t line_len = 0;
799 fprintf(stderr, "\033[7mtable\033[0m \033[7mgood_rows\033[0m \033[7mbad_rows\033[0m\n");
800 line = reallok(NULL, line_alloc);
805 while (ch = getc(stdin), ((ch != '\n') && (ch > 0))) {
806 if ((line_len+2) > line_alloc) {
808 line = reallok(line, line_alloc);
810 line[line_len++] = ch;
819 ingest_one(line, dst_dbenv);
820 if (!strncasecmp(line, HKEY("begin|"))) begin_found = 1;
821 if (!strncasecmp(line, HKEY("end|"))) end_found = 1;
827 fprintf(stderr, "\033[31mWARNING: \"begin\" line was not found in the loaded data.\033[0m\n");
830 fprintf(stderr, "\033[31mWARNING: \"end\" line was not found in the loaded data.\033[0m\n");
836 int main(int argc, char **argv) {
837 char dst_dir[PATH_MAX];
839 static DB_ENV *dst_dbenv; // Target DB environment
841 // display the greeting
842 fprintf(stderr, "\033[44m\033[33m\033[1m \033[K\033[0m\n"
843 "\033[44m\033[33m\033[1m DB Load utility for Citadel \033[K\033[0m\n"
844 "\033[44m\033[33m\033[1m Copyright (c) 2023 by citadel.org et al. \033[K\033[0m\n"
845 "\033[44m\033[33m\033[1m This program is open source software. Use, duplication, or disclosure \033[K\033[0m\n"
846 "\033[44m\033[33m\033[1m is subject to the terms of the GNU General Public license v3. \033[K\033[0m\n"
847 "\033[44m\033[33m\033[1m \033[K\033[0m\n");
849 // Default destination directory unless overridden
850 snprintf(dst_dir, sizeof(dst_dir), "%s/data", CTDLDIR);
852 // Parse command line
854 while ((a = getopt(argc, argv, "h:y")) != EOF) {
857 snprintf(dst_dir, sizeof(dst_dir), "%s/data", optarg);
863 fprintf(stderr, "%s: usage: %s -h dest_dir [<dumpfile]\n", argv[0], argv[0]);
868 if (dst_dir == NULL) {
869 fprintf(stderr, "ctdlload: no destination directory was specified.\n");
873 if (confirmed == 1) {
874 fprintf(stderr, "ctdlload: You have specified the [-y] flag, so processing will continue.\n");
877 fprintf(stderr, "ctdlload: usage: ctdlload -y -h[data_dir] <[dump_file]\n");
878 fprintf(stderr, " [data_dir] is your database directory, usually /usr/local/citadel/data\n");
879 fprintf(stderr, " Please read [ https://www.citadel.org/dump-and-load.html ] to learn how to proceed.\n");
883 // Remove any database that is already in the target directory (yes, delete it, be careful)
885 snprintf(cmd, sizeof cmd, "rm -fv %s/cdb.* %s/log.*", dst_dir, dst_dir);
888 dst_dbenv = open_dbenv(dst_dir);
890 close_dbenv(dst_dbenv);
892 fprintf(stderr, "ctdlload: \033[32m\033[1mfinished\033[0m\n");