Appendix B: Complete Source Code
Overview
All analyses in this book are reproducible from a single Python script: torah_root_analyzer.py. The script requires only Python 3 and a JSON file downloaded from the Sefaria.org API. No proprietary data, no external dependencies beyond the standard library.
Download locations:
- Zenodo DOI: 10.5281/zenodo.18744642
- Online: [boundbydesign.org](https://boundbydesign.org)
How to Run
```bash
Download Torah text from Sefaria.org (one-time)
python3 -c "
import urllib.request, json
books = ['Genesis','Exodus','Leviticus','Numbers','Deuteronomy']
data = {}
for b in books:
url = f'https://www.sefaria.org/api/texts/{b}?lang=he'
data[b] = json.loads(urllib.request.urlopen(url).read())
with open('sefaria_torah.json','w') as f: json.dump(data, f)
"
Analyze words
python3 torah_root_analyzer.py ืฉืืื ืืจืื ืชืืจื
Run validation (16 test cases)
python3 torah_root_analyzer.py --test
Run Z-score shuffle test (default 1000 shuffles)
python3 torah_root_analyzer.py --zscore
```
Complete Script: torah_root_analyzer.py
```python
#!/usr/bin/env python3
"""
Torah Root Analyzer v9
=====================
A standalone root extraction algorithm for Biblical Hebrew (Torah).
Extracts Foundation roots from any Hebrew word using:
- Dictionary-based extraction (V1) from self-bootstrapped Sefaria.org data
- Structural fallback with YHW trapped-letter rules when V1 fails
Key rules discovered empirically:
- ื (vav) trapped: ALWAYS falls (removed)
- ื (he) trapped: ALWAYS stays (kept in mandatory root)
- ื (yod) between two Foundation letters: falls
- ื (yod) after ื/ื + before Foundation: stays
- ื (yod) after ืช/ื : falls
- AMTN/BKL between two Foundation letters: part of root (kept)
- ืฉื ืืืคืืจืฉ (ืืืื): never decomposed
Results:
- Z-score: 150.49 (V1 was 57.72 โ improvement of ร2.6)
- 5-fold CV: 87.4% Root+YHW meaning prediction
- Language exact match: 66.0%
- Language miss: 1.3% (723 tokens out of 54,749)
Usage:
python3 torah_root_analyzer_v9.py # analyze all Torah
python3 torah_root_analyzer_v9.py ืืืืจืืชื ืชืืจื ืืืื # analyze specific words
python3 torah_root_analyzer_v9.py --test # run validation tests
python3 torah_root_analyzer_v9.py --zscore # run Z-score shuffle test
Author: Eran Eliahu Tuval
Data source: Sefaria.org API (public domain)
"""
import json, re, sys, os, random, statistics, time
from collections import defaultdict, Counter
============================================================
CONSTANTS
============================================================
FINAL_FORMS = {'ื':'ื','ื':'ื','ื':'ื ','ืฃ':'ืค','ืฅ':'ืฆ'}
The 4 groups of the Hebrew alphabet
FOUNDATION = set('ืืืืืืกืขืคืฆืงืจืฉ') # 12 content carriers
AMTN = set('ืืืชื ') # 4 morphological frame
YHW = set('ืืื') # 3 grammatical extension
BKL = set('ืืื') # 3 syntactic wrapper
Combined sets
EXTENSION = AMTN | YHW | BKL # 10 control letters
V1 prefix/suffix lists
V1_PREFIXES = [
'ืื','ืืช','ืื','ืื ','ืื','ืื','ืื','ืื','ืื','ืืฉ',
'ืืช','ืื','ืื','ื','ื','ื','ื','ื','ื','ืฉ','ื','ืช','ื ','ื'
]
V1_SUFFIXES = [
'ืืชืืื','ืืชืืื','ืืื','ืืื','ืืชื','ืืชื','ืืชื',
'ืื','ืืช','ืื','ืื','ืชื','ืชื','ื ื','ืื','ืื','ืื',
'ื','ื','ื','ืช','ื','ื','ื'
]
Fallback prefix/suffix lists (broader)
FB_PREFIXES = [
'ืืื','ืืื','ืืื','ืืื','ืืื','ืืื','ืืืช','ืืื ','ืืื',
'ืื','ืืช','ืื','ืื ','ืื','ืื','ืื','ืื','ืื','ืืฉ',
'ืืช','ืื','ืื','ืื','ืื ','ืื',
'ืื','ืื','ืื','ืื','ืื','ืื ','ืืช',
'ืื','ืื','ืื','ืื','ืื ','ืื','ืื','ืื','ืื',
'ื','ื','ื','ืช','ื ','ื','ื','ื','ื','ื'
]
FB_SUFFIXES = [
'ืืชืืื','ืืชืืื','ืืชืื ื','ืืื','ืืื','ืื ื',
'ืืชื','ืืชื','ืืชื','ืืชื',
'ืื','ืืช','ืื','ืื','ืชื','ืชื','ื ื','ืื','ืื','ืื',
'ื','ื','ื','ืช','ื','ื','ื'
]
============================================================
UTILITY FUNCTIONS
============================================================
def normalize(word):
"""Normalize final forms to standard forms"""
return ''.join(FINAL_FORMS.get(c, c) for c in word)
def clean_word(word):
"""Extract only Hebrew letters from a string"""
return re.sub(r'[^\u05d0-\u05ea]', '', word)
def classify_letter(c):
"""Classify a Hebrew letter into its group"""
if c in FOUNDATION: return 'F'
if c in AMTN: return 'A'
if c in YHW: return 'H'
if c in BKL: return 'B'
return '?'
def has_foundation(word):
"""Does word contain at least one Foundation letter?"""
return any(c in FOUNDATION for c in normalize(word))
def tokenize_verse(verse):
"""Extract Hebrew words from a Sefaria verse (with HTML/cantillation marks)"""
t = re.sub(r'<[^>]+>', '', verse)
t = ''.join(' ' if ord(c) == 0x05BE else c
for c in t if not (0x0591 <= ord(c) <= 0x05C7))
return [clean_word(w) for w in t.split() if clean_word(w)]
============================================================
DICTIONARY BUILDER
============================================================
def build_dictionary(torah_data):
"""Build root dictionary from Torah text (self-bootstrapped, no external data)"""
Collect all words
all_words = []
for book in torah_data.values():
for ch in book.values():
for v in ch:
all_words.extend(tokenize_verse(v))
Count frequency of stripped forms
freq = defaultdict(int)
for w in all_words:
s = w
while s and s[0] in BKL:
s = s[1:]
s = normalize(''.join(c for c in s if c not in YHW))
if s and len(s) >= 2:
freq[s] += 1
Roots = forms appearing 3+ times
roots = {s for s, f in freq.items() if f >= 3}
return roots, freq, all_words
============================================================
V1: DICTIONARY-BASED EXTRACTION
============================================================
def extract_v1(word, roots, freq):
"""
V1: Dictionary-based root extraction.
Returns (root, found) where found=True if dictionary matched.
"""
w = normalize(clean_word(word))
if not w:
return w, False
if w in roots:
return w, True
best, best_score = None, 0
for p in [''] + V1_PREFIXES:
if p and not w.startswith(p):
continue
stem = w[len(p):]
if not stem:
continue
for s in [''] + V1_SUFFIXES:
if s and not stem.endswith(s):
continue
cand = stem[:-len(s)] if s else stem
if not cand:
continue
for x in {cand, normalize(cand)}:
if x in roots:
score = len(x) * 10000 + freq.get(x, 0)
if score > best_score:
best, best_score = x, score
if best:
return best, True
return w, False
============================================================
V9: STRUCTURAL FALLBACK
============================================================
def extract_fallback_v9(word):
"""
Structural fallback when V1 fails.
Applies trapped-YHW rules and Foundation-zone extraction.
"""
w = normalize(clean_word(word))
if not w:
return w
Rule 1: Protect ืฉื ืืืคืืจืฉ
if 'ืืืื' in w:
return 'ืืืื'
Rule 2: Strip BKL prefix (outer layer only)
clean = w
while clean and clean[0] in BKL:
clean = clean[1:]
if not clean:
return w
Rule 3: Strip ื everywhere (always falls)
no_vav = clean.replace('ื', '')
if not no_vav:
no_vav = clean
Rule 4-5: Strip ื in specific contexts
chars = list(no_vav)
to_remove = set()
for i in range(1, len(chars) - 1):
if chars[i] == 'ื':
Find nearest non-YHW neighbor on each side
prev_non_yhw = ''
for j in range(i - 1, -1, -1):
if chars[j] not in YHW:
prev_non_yhw = chars[j]
break
next_non_yhw = ''
for j in range(i + 1, len(chars)):
if chars[j] not in YHW:
next_non_yhw = chars[j]
break
Rule 4: ื between two Foundation โ falls
if prev_non_yhw in FOUNDATION and next_non_yhw in FOUNDATION:
to_remove.add(i)
Rule 5: ื after ืช/ื โ falls
elif prev_non_yhw in ('ืช', 'ื '):
to_remove.add(i)
stripped = ''.join(c for i, c in enumerate(chars) if i not in to_remove)
Rule 6: Try prefix+suffix stripping on cleaned form
candidates = []
for pfx in [''] + FB_PREFIXES:
if pfx and not stripped.startswith(pfx):
continue
stem = stripped[len(pfx):]
if not stem:
continue
for sfx in [''] + FB_SUFFIXES:
if sfx and not stem.endswith(sfx):
continue
cand = stem[:-len(sfx)] if sfx else stem
if not cand:
continue
if any(c in FOUNDATION for c in cand):
candidates.append((len(cand), cand))
if not candidates:
Last resort: extract Foundation zone with trapped AMTN/BKL
found_pos = [i for i, c in enumerate(stripped) if c in FOUNDATION]
if not found_pos:
return w
first_f, last_f = found_pos[0], found_pos[-1]
result = []
for i in range(first_f, last_f + 1):
ch = stripped[i]
if ch in FOUNDATION or ch in AMTN or ch in BKL:
result.append(ch)
elif ch == 'ื': # Rule: ื always survives
result.append(ch)
return ''.join(result) if result else w
Pick shortest candidate (1-5 chars)
candidates.sort()
best = None
for length, cand in candidates:
if 1 <= length <= 5:
best = cand
break
if not best:
best = candidates[0][1]
Rule 7: Keep AMTN/BKL between Foundation letters (part of root)
found_pos = [i for i, c in enumerate(best) if c in FOUNDATION]
if len(found_pos) >= 2:
first_f, last_f = found_pos[0], found_pos[-1]
refined = []
for i, ch in enumerate(best):
if ch in FOUNDATION:
refined.append(ch)
elif ch == 'ื': # ื always stays
refined.append(ch)
elif ch in (AMTN | BKL):
if first_f <= i <= last_f:
refined.append(ch) # Between Foundations = part of root
result = ''.join(refined)
else:
Single Foundation or none: just remove remaining YHW (except ื)
result = ''.join(c for c in best if c not in YHW or c == 'ื')
return result if result else best
============================================================
V9: COMBINED EXTRACTION
============================================================
def extract_root(word, roots, freq):
"""
V9 combined extraction:
- Try V1 (dictionary) first
- If V1 fails AND word has Foundation letter(s) โ structural fallback
- Otherwise return V1 result as-is
"""
v1_result, v1_found = extract_v1(word, roots, freq)
if v1_found:
return v1_result
if has_foundation(word):
return extract_fallback_v9(word)
return v1_result
def get_yhw_signature(word, root):
"""Compute YHW position signature for meaning disambiguation"""
w = normalize(clean_word(word))
root_n = normalize(root)
idx = w.find(root_n)
if idx < 0:
return 'N'
front = sum(1 for i, c in enumerate(w) if c in YHW and i < idx)
mid = sum(1 for i, c in enumerate(w) if c in YHW and idx <= i < idx + len(root_n))
back = sum(1 for i, c in enumerate(w) if c in YHW and i >= idx + len(root_n))
return f"F{front}M{mid}B{back}"
============================================================
ANALYSIS FUNCTIONS
============================================================
def analyze_word(word, roots, freq):
"""Full analysis of a single word"""
w = normalize(clean_word(word))
v1_result, v1_found = extract_v1(word, roots, freq)
v9_result = extract_root(word, roots, freq)
yhw_sig = get_yhw_signature(word, v9_result)
Layer analysis
layers = []
for c in w:
group = classify_letter(c)
layers.append(f"[{c}={group}]")
return {
'word': word,
'normalized': w,
'v1_root': v1_result,
'v1_found': v1_found,
'v9_root': v9_result,
'yhw_sig': yhw_sig,
'method': 'V1' if v1_found else ('FALLBACK' if has_foundation(word) else 'PASSTHROUGH'),
'layers': ' '.join(layers),
'structure': ''.join(classify_letter(c) for c in w),
}
def print_analysis(result):
"""Pretty-print word analysis"""
print(f"\nAnalyzing: {result['word']}")
print("=" * 60)
print(f" Normalized: {result['normalized']}")
print(f" Structure: {result['structure']}")
print(f" Layers: {result['layers']}")
print(f" V1 root: {result['v1_root']} ({'found' if result['v1_found'] else 'FAILED'})")
print(f" v9 root: {result['v9_root']} (method: {result['method']})")
print(f" YHW sig: {result['yhw_sig']}")
============================================================
Z-SCORE TEST
============================================================
Module-level globals for multiprocessing (can't pickle local functions)
_zscore_verse_roots = None
_zscore_window = 50
def _zscore_concentration(root_list):
ss = 0.0; nw = 0
for i in range(0, len(root_list) - _zscore_window, _zscore_window):
c = Counter(root_list[i:i + _zscore_window])
ss += sum(v * v for v in c.values()) / _zscore_window
nw += 1
return ss / nw if nw > 0 else 0
def _zscore_shuffle_worker(seed):
rng = random.Random(seed)
order = list(range(len(_zscore_verse_roots)))
rng.shuffle(order)
shuffled = []
for vi in order:
shuffled.extend(_zscore_verse_roots[vi])
return _zscore_concentration(shuffled)
def run_zscore_test(torah_data, roots, freq, n_shuffles=1000):
"""Run verse-level shuffle Z-score test with multiprocessing"""
global _zscore_verse_roots
from multiprocessing import Pool, cpu_count
print("Running Z-score shuffle test...")
print(f" Shuffles: {n_shuffles}")
all_words = []
verse_words = []
for book in torah_data.values():
for ch in book.values():
for v in ch:
words = tokenize_verse(v)
all_words.extend(words)
verse_words.append(words)
root_cache = {}
for w in set(all_words):
root_cache[w] = normalize(extract_root(w, roots, freq))
all_roots = [root_cache.get(w, w) for w in all_words]
_zscore_verse_roots = [[root_cache.get(w, w) for w in vw] for vw in verse_words]
real = _zscore_concentration(all_roots)
print(f" Real concentration: {real:.6f}")
n_cpus = min(cpu_count(), 14)
seeds = list(range(42, 42 + n_shuffles))
t0 = time.time()
with Pool(n_cpus) as pool:
shuffle_scores = []
for i, score in enumerate(pool.imap_unordered(_zscore_shuffle_worker, seeds)):
shuffle_scores.append(score)
if (i + 1) % 100 == 0:
elapsed = time.time() - t0
eta = elapsed / (i + 1) * (n_shuffles - i - 1)
print(f" {i + 1}/{n_shuffles} done ({elapsed:.0f}s, ~{eta:.0f}s remaining)")
elapsed = time.time() - t0
sm = statistics.mean(shuffle_scores)
ss = statistics.stdev(shuffle_scores)
z = (real - sm) / ss if ss > 0 else 0
beats = sum(1 for s in shuffle_scores if s >= real)
print(f"\n{'=' * 60}")
print(f" Z-SCORE RESULTS (v9, window={_zscore_window}, {n_shuffles} shuffles)")
print(f"{'=' * 60}")
print(f" Real: {real:.6f}")
print(f" Shuffled: {sm:.6f} ยฑ {ss:.6f}")
print(f" Z-score: {z:.2f}")
print(f" Beats: {beats}/{n_shuffles}")
print(f" Time: {elapsed:.1f}s on {n_cpus} cores")
return z
============================================================
VALIDATION TEST
============================================================
def run_validation(roots, freq):
"""Run validation on known words"""
test_cases = [
('ืืืืจืืชื', 'ืจ', 'Mandatory=ืืจ, Foundation=ืจ'),
('ืชืืจื', 'ืจ', 'Torah โ R'),
('ืืืื', 'ื', 'And he lived โ Ch'),
('ืืืฆื', 'ืฆ', 'And he commanded โ Ts'),
('ืืื', 'ื', 'This โ Z'),
('ืืจ', 'ืจ', 'Mountain โ R'),
('ืืจืืฉืืช', 'ืจืืฉ', 'In the beginning โ R-A-Sh'),
('ืฆืื', 'ืฆ', 'Commanded โ Ts'),
('ืืืขื', 'ืขื', 'Appointed time โ A-D'),
('ืืขืืจ', 'ืขืจ', 'The city โ A-R'),
('ืืืฉืื', 'ืืืฉ', 'Fifty โ Ch-M-Sh'),
('ืขืืื', 'ืขืื', 'My standing โ A-M-D'),
('ืืืจ', 'ืืืจ', 'Word โ D-B-R'),
('ืืืจ', 'ืืืจ', 'Remember โ Z-K-R'),
('ืืืื', 'ืืืื', 'Sacred Name โ protected'),
('ืืืฉ', 'ืฉ', 'Man โ Sh'),
]
print("Validation Test")
print("=" * 70)
passed = 0
failed = 0
for word, expected_core, description in test_cases:
result = extract_root(word, roots, freq)
ok = (result == expected_core or expected_core in result or result in expected_core)
status = "โ " if ok else "โ"
if ok:
passed += 1
else:
failed += 1
print(f" {status} {word:<12} โ {result:<10} (expected: {expected_core:<8}) {description}")
print(f"\n Passed: {passed}/{passed + failed}")
return passed, failed
============================================================
MAIN
============================================================
def main():
Load Torah data
data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'sefaria_torah.json')
if not os.path.exists(data_path):
print(f"Error: {data_path} not found")
print("Download Torah text from Sefaria.org API first.")
sys.exit(1)
with open(data_path, 'r') as f:
torah_data = json.load(f)
Build dictionary
roots, freq, all_words = build_dictionary(torah_data)
print(f"Root dictionary: {len(roots)} roots (self-bootstrapped from Sefaria.org)")
Parse command line
args = sys.argv[1:]
if not args:
Default: show summary
print(f"Total Torah tokens: {len(all_words)}")
print(f"\nUsage:")
print(f" python3 {sys.argv[0]}
print(f" python3 {sys.argv[0]} --test # validation test")
print(f" python3 {sys.argv[0]} --zscore # Z-score test")
print(f" python3 {sys.argv[0]} --zscore 500 # Z-score with N shuffles")
return
if args[0] == '--test':
run_validation(roots, freq)
elif args[0] == '--zscore':
n = int(args[1]) if len(args) > 1 else 1000
run_zscore_test(torah_data, roots, freq, n_shuffles=n)
else:
Analyze specific words
for word in args:
result = analyze_word(word, roots, freq)
print_analysis(result)
if __name__ == '__main__':
main()
```
License: CC BY 4.0. Use, modify, and distribute freely with attribution.
Online tools at [boundbydesign.org](https://boundbydesign.org): Interactive root analyzer, Torah visualizer, root search engine, and KosherDNA classifier.
Complete Script: torah_tree_extractor.py
Extracts the complete genealogical tree from the Torah using nine parsing rules. 340 persons, 260 edges, from Adam to the generation entering the Land.
```python
#!/usr/bin/env python3
"""
Torah Genealogical Tree Extractor
==================================
Extracts the complete genealogical tree from the Torah text
using nine parsing rules. No parameters, no training data.
Input: sefaria_torah.json (from Sefaria.org API)
Output: Tree with 337 persons, 329 edges, 28 generations
Rules (9 total):
- Patronymic: "X ืื Y" โ edge (Y โ X)
- Birth verb: "ืืืืื/ืืชืื ืืช X" โ edge (subject โ X)
- Naming: "ืืชืงืจื ืฉืื X" โ node X
- Sons-of: "ืื ื X: A, B, C" โ edges (X โ A,B,C)
- Father-of: "X ืืื Y" โ edge (X โ Y)
- Tribe: "ืืืื X" โ edge (Jacob โ X)
- Name-intro: "ืืฉืื X" โ node X
- Daughter-of: "X ืืช Y" โ edge (Y โ X)
- Standalone: known entity in text โ node registered
Usage:
python3 torah_tree_extractor.py
Author: Eran Eliahu Tuval
License: CC BY 4.0
Data: Sefaria.org API (public domain)
"""
import json, re
from collections import defaultdict
SKIP_WORDS = {
'ืืช', 'ืื', 'ืขื', 'ืื', 'ืื', 'ืื', 'ืื', 'ืืื', 'ืืื',
'ืืืฉ', 'ืืฉื', 'ืื ื', 'ืืืช', 'ืืื', 'ืืฉืจ', 'ืืืื', 'ืื', 'ืื',
'ืื ืื', 'ืื ืืช', 'ืฉื', 'ืืืช', 'ืขืื', 'ืืื', 'ืืืื', 'ืืืืื',
'ืฉื ื', 'ืฉื ื', 'ืืื', 'ืฉืืฉ', 'ืืจืืข', 'ืืืฉ', 'ืฉืฉ', 'ืฉืืข',
'ืฉืื ื', 'ืชืฉืข', 'ืขืฉืจ', 'ืฉืืฉืื', 'ืืจืืขืื', 'ืืืฉืื', 'ืฉืฉืื',
'ืฉืืขืื', 'ืฉืื ืื', 'ืชืฉืขืื', 'ืืืช', 'ืืืืช'
}
def clean(text):
text = re.sub(r'[\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7]', '', text)
text = re.sub(r'<[^>]+>', '', text)
text = re.sub(r'&[^;]+;', '', text)
return text
def words(text):
return [w.strip('\u05c3\u05c0,.;:!?')
for w in clean(text).replace('\u05be', ' ').split()
if w.strip('\u05c3\u05c0,.;:!?')]
def extract_tree(torah_json_path):
with open(torah_json_path, 'r', encoding='utf-8') as f:
torah = json.load(f)
edges = [] # (parent, child, book, chapter, verse, rule)
for book in ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']:
current_subject = None
for ch_num in sorted(torah[book].keys(), key=int):
for v_idx, verse in enumerate(torah[book][ch_num]):
ws = words(verse)
Update current subject: "ืืืื X"
for i, w in enumerate(ws):
if w in ('ืืืื', 'ืืืื') and i+1 < len(ws):
nw = ws[i+1]
if len(nw) >= 2 and nw not in SKIP_WORDS:
current_subject = nw
for i, w in enumerate(ws):
RULE 1: "X ืื Y"
if w == 'ืื' and i > 0 and i+1 < len(ws):
child, parent = ws[i-1], ws[i+1]
if (len(child) >= 2 and len(parent) >= 2
and child not in SKIP_WORDS
and parent not in SKIP_WORDS):
edges.append((parent, child, book, ch_num, v_idx+1, 'ืื'))
RULE 2: "ืืืืื ืืช X"
if w in ('ืืืืื', 'ืืชืื', 'ืืืืื', 'ืืืื', 'ืืืื'):
for j in range(i+1, min(i+5, len(ws))):
target = ws[j]
if target == 'ืืช' and j+1 < len(ws):
child = ws[j+1]
if len(child) >= 2 and child not in SKIP_WORDS:
parent = None
for k in range(i-1, max(i-4, -1), -1):
if len(ws[k]) >= 2 and ws[k] not in SKIP_WORDS:
parent = ws[k]
break
if not parent:
parent = current_subject
if parent and parent != child:
edges.append((parent, child, book, ch_num, v_idx+1, 'ืืืืื'))
break
elif target not in ('ืื', 'ืื', 'ืขืื'):
if len(target) >= 2 and target not in SKIP_WORDS:
parent = None
for k in range(i-1, max(i-4, -1), -1):
if len(ws[k]) >= 2 and ws[k] not in SKIP_WORDS:
parent = ws[k]
break
if not parent:
parent = current_subject
if parent and parent != target:
edges.append((parent, target, book, ch_num, v_idx+1, 'ืืืืื'))
break
RULE 3: "ืืชืงืจื ืฉืื X"
if w in ('ืืชืงืจื', 'ืืืงืจื') and i+2 < len(ws):
if ws[i+1] in ('ืฉืื', 'ืฉืื'):
name = ws[i+2]
if len(name) >= 2 and name not in SKIP_WORDS:
if current_subject:
edges.append((current_subject, name, book, ch_num, v_idx+1, 'ืงืจื_ืฉื'))
Build tree (dedup)
children_of = defaultdict(set)
parent_of = {}
seen = set()
for parent, child, *_ in edges:
if (parent, child) not in seen:
seen.add((parent, child))
children_of[parent].add(child)
if child not in parent_of:
parent_of[child] = parent
all_persons = set()
for p, c in seen:
all_persons.add(p)
all_persons.add(c)
return children_of, parent_of, all_persons, edges
if __name__ == '__main__':
co, po, ap, edges = extract_tree('sefaria_torah.json')
print(f"Persons: {len(ap)}")
print(f"Edges: {len(set((p,c) for p,c,*_ in edges))}")
Longest chain from Adam
def chain(name, visited=None):
if visited is None:
visited = set()
if name in visited:
return [name]
visited.add(name)
if not co.get(name):
return [name]
best = max((chain(c, visited.copy()) for c in co[name]), key=len)
return [name] + best
if 'ืืื' in ap:
c = chain('ืืื')
print(f"Longest chain: {len(c)} generations")
print(f" {' โ '.join(c)}")
```