Save and Load Graphs¶
Learn how to persist LZGraphs to disk and reload them later.
Quick Reference¶
Saving Graphs¶
Basic Save¶
from LZGraphs import AAPLZGraph
import pandas as pd
# Build a graph
data = pd.read_csv("repertoire.csv")
graph = AAPLZGraph(data, verbose=True)
# Save to disk
graph.save("my_graph.pkl")
Save with Custom Path¶
from pathlib import Path
# Save to specific directory
output_dir = Path("models/graphs")
output_dir.mkdir(parents=True, exist_ok=True)
graph.save(output_dir / "repertoire_2024.pkl")
Loading Graphs¶
Basic Load¶
from LZGraphs import AAPLZGraph
# Load a saved graph
graph = AAPLZGraph.load("my_graph.pkl")
# Verify it works
print(f"Nodes: {graph.graph.number_of_nodes()}")
print(f"Edges: {graph.graph.number_of_edges()}")
Loading Different Graph Types¶
from LZGraphs import AAPLZGraph, NDPLZGraph, NaiveLZGraph
# Load the correct type
aap_graph = AAPLZGraph.load("aap_graph.pkl")
ndp_graph = NDPLZGraph.load("ndp_graph.pkl")
naive_graph = NaiveLZGraph.load("naive_graph.pkl")
Type Matching
You must use the same class to load that was used to save. Loading an AAPLZGraph file with NDPLZGraph.load() will raise an error.
Use Cases¶
Avoid Recomputation¶
from pathlib import Path
cache_file = Path("cached_graph.pkl")
if cache_file.exists():
# Load from cache
graph = AAPLZGraph.load(cache_file)
print("Loaded from cache")
else:
# Build and cache
data = pd.read_csv("large_repertoire.csv")
graph = AAPLZGraph(data, verbose=True)
graph.save(cache_file)
print("Built and cached")
Batch Processing¶
from pathlib import Path
import pandas as pd
from LZGraphs import AAPLZGraph
# Process multiple repertoires
input_dir = Path("data/repertoires")
output_dir = Path("data/graphs")
output_dir.mkdir(exist_ok=True)
for csv_file in input_dir.glob("*.csv"):
graph_file = output_dir / f"{csv_file.stem}.pkl"
if not graph_file.exists():
data = pd.read_csv(csv_file)
graph = AAPLZGraph(data, verbose=False)
graph.save(graph_file)
print(f"Saved: {graph_file.name}")
Analysis Pipeline¶
# Step 1: Build and save (expensive)
# script: build_graphs.py
for repertoire_file in repertoire_files:
data = pd.read_csv(repertoire_file)
graph = AAPLZGraph(data)
graph.save(f"graphs/{repertoire_file.stem}.pkl")
# Step 2: Analyze (fast, can be rerun)
# script: analyze_graphs.py
for graph_file in Path("graphs").glob("*.pkl"):
graph = AAPLZGraph.load(graph_file)
# Run analysis...
File Format¶
LZGraphs uses Python's pickle format:
- Binary format: Compact and fast
- Python-specific: Not portable to other languages
- Version-sensitive: Best used with same Python/LZGraphs version
Checking File Contents¶
import pickle
with open("my_graph.pkl", "rb") as f:
obj = pickle.load(f)
print(f"Type: {type(obj)}")
print(f"Has graph: {hasattr(obj, 'graph')}")
Best Practices¶
1. Use Descriptive Names¶
2. Version Your Graphs¶
from datetime import datetime
import LZGraphs
# Include version info
filename = f"graph_v{LZGraphs.__version__}_{datetime.now():%Y%m%d}.pkl"
graph.save(filename)
3. Store Metadata Separately¶
import json
# Save graph
graph.save("my_graph.pkl")
# Save metadata
metadata = {
"source_file": "repertoire.csv",
"n_sequences": len(data),
"created": str(datetime.now()),
"lzgraphs_version": LZGraphs.__version__
}
with open("my_graph_metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
Troubleshooting¶
"File not found" Error¶
from pathlib import Path
filepath = Path("my_graph.pkl")
if not filepath.exists():
print(f"File does not exist: {filepath.absolute()}")
"Invalid pickle" Error¶
The file may be corrupted or from an incompatible version:
try:
graph = AAPLZGraph.load("my_graph.pkl")
except Exception as e:
print(f"Error loading graph: {e}")
# Rebuild from source data
data = pd.read_csv("original_data.csv")
graph = AAPLZGraph(data)
Type Mismatch Error¶
from LZGraphs import AAPLZGraph, NDPLZGraph
try:
# Wrong type
graph = NDPLZGraph.load("aap_graph.pkl")
except TypeError as e:
print(f"Type mismatch: {e}")
# Use correct type
graph = AAPLZGraph.load("aap_graph.pkl")
Next Steps¶
- Sequence Generation - Generate sequences from saved graphs
- Compare Repertoires - Compare multiple saved graphs