Going from GEDCOM to JSON as it turns out, is not as trivial as I had initially thought it would be.
This morning I took a second crack at it and learned a good bit thanks to ChatGPT, who is by the way a wonderful teacher. After more than 3 hours in a single unpaid ChatGPT session, we actually ended up writing from scratch a GEDCOM to JSON parser which worked well but only after plumbing the numerous shortcomings of python-gedcom.
The Phase 1 - Source-Aware GEDCOM to JSON Parser we created today worked great. Tomorrow we’ll have a go at Phase 2 - Enhanced Parser for GEDCOM Citations.
Along the way, the idea presents itself - could RootsMagic directly export a .json file?
For anybody interested, here’s today’s Phase 1 Python script gedcom_tojson.py
:
import json
from gedcom.parser import Parser
from gedcom.element.individual import IndividualElement
from gedcom.element.family import FamilyElement
GEDCOM_FILE = "David Howe.ged"
OUTPUT_JSON = "david_howe_family.json"
parser = Parser()
parser.parse_file(GEDCOM_FILE)
elements = parser.get_root_child_elements()
individuals = {}
families = {}
# First pass: collect individuals and families
for elem in elements:
if isinstance(elem, IndividualElement):
pointer = elem.get_pointer()
individuals[pointer] = {
"id": pointer,
"name": elem.get_name(),
"gender": elem.get_gender(),
"birth_date": elem.get_birth_data()[0],
"death_date": elem.get_death_data()[0],
"spouses": [],
"children": [],
"parents": [],
"famc": None, # Family child (parents)
"fams": [] # Family spouse (marriages)
}
# Look for FAMC and FAMS in sub-elements
for child in elem.get_child_elements():
if child.get_tag() == "FAMC":
individuals[pointer]["famc"] = child.get_value()
elif child.get_tag() == "FAMS":
individuals[pointer]["fams"].append(child.get_value())
elif isinstance(elem, FamilyElement):
pointer = elem.get_pointer()
family = {
"id": pointer,
"husband": None,
"wife": None,
"children": []
}
for child in elem.get_child_elements():
tag = child.get_tag()
value = child.get_value()
if tag == "HUSB":
family["husband"] = value
elif tag == "WIFE":
family["wife"] = value
elif tag == "CHIL":
family["children"].append(value)
families[pointer] = family
# Second pass: connect relationships
for ind in individuals.values():
# Parents
famc = ind.get("famc")
if famc and famc in families:
parents = families[famc]
for parent_id in [parents.get("husband"), parents.get("wife")]:
if parent_id and parent_id in individuals:
ind["parents"].append(individuals[parent_id]["name"])
# Spouses and children
for fam_id in ind.get("fams", []):
if fam_id in families:
fam = families[fam_id]
spouse_id = fam.get("wife") if ind["gender"] == "M" else fam.get("husband")
if spouse_id and spouse_id in individuals:
ind["spouses"].append(individuals[spouse_id]["name"])
for child_id in fam.get("children", []):
if child_id in individuals:
ind["children"].append(individuals[child_id]["name"])
# Clean up unused keys
for ind in individuals.values():
ind.pop("famc", None)
ind.pop("fams", None)
# Save to JSON
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
json.dump(list(individuals.values()), f, indent=2, ensure_ascii=False)
print(f"Done! JSON saved as {OUTPUT_JSON}")