I have an old family tree GEDCOM file which I wanted to imported into RootsMagic. This issue I had was the original program that created the GEDCOM file had most (all) of research evidence details stored against the source and very little against the citation (except text used for citation name).
To move the source comments to citation detail comments and media was a lot of work to do manually. Thus, since I had begun learning C programming (still a novice), I wrote some code to read the GEDCOM file and move source information mentioned above to citation to a new GEDCOM file - not perfect code (eg novice) but it seems to works ok.
The original GEDCOM file is not written over but having a backup is recommended.
Thought I would share the code, if anyone else has a similar issue (unlikely) when finding an old family tree GEDCOM file on an old hard drive;
// src2cite - moves source comments and media to citation detail and media
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int extractBetweenDelimiters(const char *str, const char *startDelim, const char *endDelim);
void *lstwrite(FILE *infile, char *srcfile);
void *srcwrite(FILE *outfile, FILE *srcfile, int src);
const char *SOURCEREF = "2 SOUR @S"; // source reference
const char *SOURCEDEF = "0 @S"; // source definition
const char *SOURCETMP = "0 @S%d@ SOUR"; // source template
const char *REPODEF = "0 @R"; // repository definition
const char *NOTE1 = "1 NOTE"; // note tag at level 1
const char *OBJE1 = "1 OBJE"; // media object tag at level 1
const char *REPO1 = "1 REPO"; // repositiory tag at level 1
const char *TEXT1 = "1 TEXT"; // text tag at level 1
const char *TITL1 = "1 TITL"; // title tag at level 1
const char *TRLR0 = "0 TRLR"; // end of transmission
int main(int argc, char* argv[] ) {
// check parameters
if (argc !=3) {
printf("usage: %s {existing input GEDCOM filename} {new output GEDCOM filename}\n", argv[0]);
return 1;
}
if (strcmp(argv[1], argv[2]) == 0) {
printf("sorry, can't output to same GEDCOM filename!\n");
printf("use a different name; %s myfamily.ged myfamily1.ged\n", argv[0]);
return 1;
}
// open gedcom_in as input and gedcom_out as output
printf("%s program is running.\n", argv[0]);
char *filein = argv[1];
char *fileout = argv[2];
FILE *gedcom_in = fopen(filein, "r");
if (gedcom_in == NULL) {
printf("error opening %s, file may not exist!\n", filein);
return 1;
}
FILE *gedcom_out = fopen(fileout, "w");
if (gedcom_out == NULL) {
printf("error opening %s!\n", fileout);
return 1;
}
// seed random number
srand(time(NULL));
int random_number = (rand()%100000);
// create source list temporary file from gedcom_in
char srclist[20];
sprintf(srclist, "./_srclist%d.tmp", random_number);
fflush(stdout);
lstwrite(gedcom_in, srclist);
// open scrlist_in as input
FILE *srclist_in = fopen(srclist, "r");
if (srclist_in == NULL) {
printf("error opening %s\n", srclist);
return 1;
}
// define variables and display processing message
int srcno;
char buffer[512];
bool src = false;
bool srcnote = false;
bool srcobje = false;
printf("processing %s ... ", filein);
fflush(stdout);
// read gedcom_in from start until EOF and write to gedcom_out
fseek(gedcom_in, 0, SEEK_SET);
while (fgets(buffer, sizeof(buffer), gedcom_in) != NULL) {
// determine if 0 @..@ SOUR are being processed
if (buffer[0] == '0') {
if (strstr(buffer, SOURCEDEF)) {
src = true;
srcnote = false;
srcobje = false;
} else {
src = false;
}
}
// determine if source notes are being processed
if (src && strstr(buffer, NOTE1)) {
srcnote = true;
} else if (buffer[0] == '1') {
srcnote = false;
}
// determine if source objects are being processed
if (src && strstr(buffer, OBJE1)) {
srcobje = true;
} else if (buffer[0] == '1') {
srcobje = false;
}
// ignore source NOTEs and OBJEcts and any associated tags
if (!srcnote && !srcobje) {
fputs(buffer, gedcom_out);
}
// find source reference
if (strstr(buffer, SOURCEREF)) {
// get source no between at signs, eg 2 SOUR @999@
srcno = (int)extractBetweenDelimiters(buffer, "@S", "@\n");
// write gedcom lines from srclist_in to gedcom_out for source no
srcwrite(gedcom_out, srclist_in, srcno);
}
}
// close files and exit
printf("done!\n");
printf("new gedom file %s created.\n", fileout);
fclose(gedcom_in);
fclose(gedcom_out);
fclose(srclist_in);
remove(srclist);
return 0;
}
// Extract the source reference from between the two delimiters
int extractBetweenDelimiters(const char *str, const char *startDelim, const char *endDelim) {
char *start = strstr(str, startDelim);
char *end = strstr(str, endDelim);
if (start && end && start < end) {
start += strlen(startDelim);
size_t length = end - start;
char result[12];
strncpy(result, start, length);
result[length] = '\0';
return atoi(result);
}
return 0;
}
// Read gedcom file and write to source list records from 0 @S..@ SOUR to 0 @R..@ REPO, 0 TRLR or end
void *lstwrite(FILE *infile, char *srcfile) {
char buffer[512];
// create source list file
FILE *srclist_out = fopen(srcfile, "w");
if (srclist_out == NULL) {
printf("error opening %s!\n", srcfile);
exit(1);
}
// start at beginning of file and read until end or source list writing complete
fseek(infile, 0, SEEK_SET);
bool complete = false;
bool found = false;
while (fgets(buffer, sizeof(buffer), infile) != NULL && !complete) {
// find if at repositiory definition or TRLR tag
if (strstr(buffer, REPODEF) || strstr(buffer, TRLR0)) {
complete = true;
} else {
// find source definition
if (strstr(buffer, SOURCEDEF)) {
found = true;
}
if (found) {
fputs(buffer, srclist_out);
}
}
}
fclose(srclist_out);
return 0;
}
// Read source list file after 0 @S..@ SOUR to next 0 @S..@ SOUR and write those GEDCOM lines to
// GEDCOM output file with the gedcom level from the source list file by 2
void *srcwrite(FILE *outfile, FILE *srcfile, int num) {
char srcdef[512];
sprintf(srcdef, SOURCETMP, num); // define source for searching
char buffer[512];
// start at beginning of file and read until end or source writing complete
fseek(srcfile, 0, SEEK_SET);
bool complete = false;
while (fgets(buffer, sizeof(buffer), srcfile) != NULL && !complete) {
// find source definition in source list
if (strstr(buffer, srcdef)) {
bool txt = false;
int level;
char *residual;
// read source list and write to gedcom out until next definition
// tags with TITL, REPO and TEXT and its associated tags are ignored
// source REFN will be used, if available, as citation reference
while (fgets(buffer, sizeof(buffer), srcfile) != NULL && !complete) {
if (buffer[0] != '0') {
// ignore 1 TITL and 1 REPO gedcom level and tags
if (!strstr(buffer, TITL1) && !strstr(buffer, REPO1)) {
// check for TEXT tag to set txt flag on
if (strstr(buffer, TEXT1)) {
txt = true;
} else if (buffer[0] == '1') {
txt = false;
}
// ignore TEXT and associated 2 CONC and 2 CONT tags
if (!txt) {
// increment gedcom level +2 before writing
level = atoi(&buffer[0]) + 2;
residual = (char *)buffer + 1; // skip past level char
sprintf(buffer, "%d%s", level, residual);
fputs(buffer, outfile);
}
}
} else {
// set complete to true when next source level 0 is read
complete = true;
}
}
}
}
return 0;
}
The program usage is ./src2cite {gedcom file} {output filename}.
PS If you have source REFN it will copy those to citation REFN.