GEDCOM moving (some) source to citation details for use with RootsMagic

I have an old family tree GEDCOM file which I wanted to imported into RootsMagic. This issue I had was the original program that created the GEDCOM file had most (all) of research evidence details stored against the source and very little against the citation (except text used for citation name).

To move the source comments to citation detail comments and media was a lot of work to do manually. Thus, since I had begun learning C programming (still a novice), I wrote some code to read the GEDCOM file and move source information mentioned above to citation to a new GEDCOM file - not perfect code (eg novice) but it seems to works ok.

The original GEDCOM file is not written over but having a backup is recommended.

Thought I would share the code, if anyone else has a similar issue (unlikely) when finding an old family tree GEDCOM file on an old hard drive;

// src2cite - moves source comments and media to citation detail and media
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

int extractBetweenDelimiters(const char *str, const char *startDelim, const char *endDelim);
void *lstwrite(FILE *infile, char *srcfile);
void *srcwrite(FILE *outfile, FILE *srcfile, int src);

const char *SOURCEREF = "2 SOUR @S";    // source reference
const char *SOURCEDEF = "0 @S";         // source definition
const char *SOURCETMP = "0 @S%d@ SOUR"; // source template
const char *REPODEF   = "0 @R";         // repository definition
const char *NOTE1 = "1 NOTE";           // note tag at level 1
const char *OBJE1 = "1 OBJE";           // media object tag at level 1
const char *REPO1 = "1 REPO";           // repositiory tag at level 1
const char *TEXT1 = "1 TEXT";           // text tag at level 1
const char *TITL1 = "1 TITL";           // title tag at level 1
const char *TRLR0 = "0 TRLR";           // end of transmission

int main(int argc, char* argv[] ) {
    // check parameters
    if (argc !=3) {
        printf("usage: %s {existing input GEDCOM filename} {new output GEDCOM filename}\n", argv[0]);
        return 1;
    }
    if (strcmp(argv[1], argv[2]) == 0) {
        printf("sorry, can't output to same GEDCOM filename!\n");
        printf("use a different name; %s myfamily.ged myfamily1.ged\n", argv[0]);
        return 1;
    }

    // open gedcom_in as input and gedcom_out as output
    printf("%s program is running.\n", argv[0]);
    char *filein = argv[1];
    char *fileout = argv[2];
    FILE *gedcom_in = fopen(filein, "r");
    if (gedcom_in == NULL) {
        printf("error opening %s, file may not exist!\n", filein);
        return 1;
    }
    FILE *gedcom_out = fopen(fileout, "w");
    if (gedcom_out == NULL) {
        printf("error opening %s!\n", fileout);
        return 1;
    }

    // seed random number
    srand(time(NULL));
    int random_number = (rand()%100000);

    // create source list temporary file from gedcom_in
    char srclist[20];
    sprintf(srclist, "./_srclist%d.tmp", random_number);
    fflush(stdout);
    lstwrite(gedcom_in, srclist);

    // open scrlist_in as input
    FILE *srclist_in = fopen(srclist, "r");
    if (srclist_in == NULL) {
        printf("error opening %s\n", srclist);
        return 1;
    }
    // define variables and display processing message
    int srcno;
    char buffer[512];
    bool src = false;
    bool srcnote = false;
    bool srcobje = false;
    printf("processing %s ... ", filein);
    fflush(stdout);

    // read gedcom_in from start until EOF and write to gedcom_out
    fseek(gedcom_in, 0, SEEK_SET);
    while (fgets(buffer, sizeof(buffer), gedcom_in) != NULL) {
        // determine if 0 @..@ SOUR are being processed
        if (buffer[0] == '0') {
            if (strstr(buffer, SOURCEDEF)) {
                src = true;
                srcnote = false;
                srcobje = false;
            } else {
                src = false;
            }
        }
        // determine if source notes are being processed
        if (src && strstr(buffer, NOTE1)) {
            srcnote = true;
        } else if (buffer[0] == '1') {
            srcnote = false;
        }
        // determine if source objects are being processed
        if (src && strstr(buffer, OBJE1)) {
            srcobje = true;
        } else if (buffer[0] == '1') {
            srcobje = false;
        }
        // ignore source NOTEs and OBJEcts and any associated tags
        if (!srcnote && !srcobje) {
            fputs(buffer, gedcom_out);
        }
        // find source reference
        if (strstr(buffer, SOURCEREF)) {
            // get source no between at signs, eg 2 SOUR @999@
            srcno = (int)extractBetweenDelimiters(buffer, "@S", "@\n");
            // write gedcom lines from srclist_in to gedcom_out for source no
            srcwrite(gedcom_out, srclist_in, srcno);
        }
    }
    // close files and exit
    printf("done!\n");
    printf("new gedom file %s created.\n", fileout);
    fclose(gedcom_in);
    fclose(gedcom_out);
    fclose(srclist_in);
    remove(srclist);
    return 0;
}

// Extract the source reference from between the two delimiters
int extractBetweenDelimiters(const char *str, const char *startDelim, const char *endDelim) {
    char *start = strstr(str, startDelim);
    char *end = strstr(str, endDelim);
    if (start && end && start < end) {
        start += strlen(startDelim);
        size_t length = end - start;
        char result[12];
        strncpy(result, start, length);
        result[length] = '\0';
        return atoi(result);
    }
    return 0;
}

// Read gedcom file and write to source list records from 0 @S..@ SOUR to 0 @R..@ REPO, 0 TRLR or end
void *lstwrite(FILE *infile, char *srcfile) {
    char buffer[512];

    // create source list file
    FILE *srclist_out = fopen(srcfile, "w");
    if (srclist_out == NULL) {
        printf("error opening %s!\n", srcfile);
        exit(1);
    }

    // start at beginning of file and read until end or source list writing complete
    fseek(infile, 0, SEEK_SET);
    bool complete = false;
    bool found = false;
    while (fgets(buffer, sizeof(buffer), infile) != NULL && !complete) {
        // find if at repositiory definition or TRLR tag
        if (strstr(buffer, REPODEF) || strstr(buffer, TRLR0)) {
            complete = true;
        } else {
            // find source definition
            if (strstr(buffer, SOURCEDEF)) {
                found = true;
            }
            if (found) {
                fputs(buffer, srclist_out);
            }
        }
    }
    fclose(srclist_out);
    return 0;
}

// Read source list file after 0 @S..@ SOUR to next 0 @S..@ SOUR and write those GEDCOM lines to
// GEDCOM output file with the gedcom level from the source list file by 2
void *srcwrite(FILE *outfile, FILE *srcfile, int num) {
    char srcdef[512];
    sprintf(srcdef, SOURCETMP, num); // define source for searching
    char buffer[512];

    // start at beginning of file and read until end or source writing complete
    fseek(srcfile, 0, SEEK_SET);
    bool complete = false;
    while (fgets(buffer, sizeof(buffer), srcfile) != NULL && !complete) {
        // find source definition in source list
        if (strstr(buffer, srcdef)) {
            bool txt = false;
            int level;
            char *residual;

            // read source list and write to gedcom out until next definition
            // tags with TITL, REPO and TEXT and its associated tags are ignored
            // source REFN will be used, if available, as citation reference
            while (fgets(buffer, sizeof(buffer), srcfile) != NULL && !complete) {
                if (buffer[0] != '0') {
                    // ignore 1 TITL and 1 REPO gedcom level and tags
                    if (!strstr(buffer, TITL1) && !strstr(buffer, REPO1)) {
                        // check for TEXT tag to set txt flag on
                        if (strstr(buffer, TEXT1)) {
                            txt = true;
                        } else if (buffer[0] == '1') {
                            txt = false;
                        }
                        // ignore TEXT and associated 2 CONC and 2 CONT tags
                        if (!txt) {
                            // increment gedcom level +2 before writing
                            level = atoi(&buffer[0]) + 2;
                            residual = (char *)buffer + 1; // skip past level char
                            sprintf(buffer, "%d%s", level, residual);
                            fputs(buffer, outfile);
                        }
                    }
                } else {
                    // set complete to true when next source level 0 is read
                    complete = true;
                }
            }
        }
    }
    return 0;
}

The program usage is ./src2cite {gedcom file} {output filename}.
PS If you have source REFN it will copy those to citation REFN.

3 Likes

Good for you at using a tool you are comfortable with to solve a problem. If I were in the same situation, my go-to would be a SQLite script on the RM database file resulting from the import. “Horses for courses”.

1 Like