Be comfortable with Python data structures (dictionaries, sets)
Have worked with real-world Speckle projects (1000+ objects)
This guide focuses on performance optimization and complex patterns for working with large BIM datasets. These patterns are most useful when dealing with projects from connectors like Revit, Rhino, or ArchiCAD.
Repeatedly traversing large object trees is slow. If you need to search for multiple categories or properties, traversing the entire tree each time becomes a performance bottleneck.
Terminology Note: Examples use property names like “category”, “level”, etc. for illustration. Real BIM data from connectors may structure these differently - Revit uses both direct properties AND proxy collections (e.g., LevelProxy, CategoryProxy). See BIM Data Patterns for connector-specific structures.
Build an index once, then perform fast lookups using GraphTraversal:
Copy
from specklepy.objects.graph_traversal.traversal import GraphTraversaldef build_category_index(root): """Build index of objects by category (traverse once).""" traversal = GraphTraversal([]) index = {} for context in traversal.traverse(root): obj = context.current # Get category if hasattr(obj, "properties") and obj.properties: category = obj.properties.get("category") if category: # Add to index if category not in index: index[category] = [] index[category].append(obj) return index# ✅ Fast: traverse once, lookup many timesindex = build_category_index(root)walls = index.get("Walls", [])floors = index.get("Floors", [])columns = index.get("Columns", [])print(f"Walls: {len(walls)}")print(f"Floors: {len(floors)}")print(f"Columns: {len(columns)}")
Indexing pattern: (1) Traverse once - Visit every object in the tree, (2) Extract key - Get the property value to index by (e.g., category), (3) Store reference - Add object to a dictionary by that key, (4) Fast lookup - Use dictionary access (O(1)) instead of tree traversal (O(n)).Performance comparison:
Copy
# Without index (repeated traversal)# Time: O(n * m) where n = objects, m = searchesfor category in ["Walls", "Floors", "Columns", "Beams", "Doors"]: objects = find_by_category(root, category) # Each search is O(n)# With index (single traversal)# Time: O(n) for indexing + O(1) for each lookupindex = build_category_index(root) # O(n) oncefor category in ["Walls", "Floors", "Columns", "Beams", "Doors"]: objects = index.get(category, []) # O(1) each time
Multi-property index - index by multiple properties for complex queries using GraphTraversal:
Copy
from specklepy.objects.graph_traversal.traversal import GraphTraversaldef build_multi_index(root, *properties): """Build index on multiple property values. Example: index = build_multi_index(root, "category", "level") level_2_walls = index.get(("Walls", "Level 2"), []) """ traversal = GraphTraversal([]) index = {} def create_key(obj): """Create tuple key from property values.""" if not hasattr(obj, "properties"): return None values = tuple(obj.properties.get(prop) for prop in properties) # Only create key if all properties exist return values if all(v is not None for v in values) else None for context in traversal.traverse(root): obj = context.current key = create_key(obj) if key: if key not in index: index[key] = [] index[key].append(obj) return index# Build multi-property indexindex = build_multi_index(root, "category", "level")# Fast lookups by category AND levellevel_2_walls = index.get(("Walls", "Level 2"), [])level_3_floors = index.get(("Floors", "Level 3"), [])# See all combinationsprint("Available combinations:")for key in sorted(index.keys()): count = len(index[key]) print(f" {key}: {count} objects")
ID-based index - build indexes by object IDs for fast lookups using GraphTraversal:
Copy
from specklepy.objects.graph_traversal.traversal import GraphTraversaldef build_id_index(root): """Build index by object ID for fast lookups.""" traversal = GraphTraversal([]) by_id = {} by_name = {} by_app_id = {} for context in traversal.traverse(root): obj = context.current # Index by Speckle ID (if set) if hasattr(obj, "id") and obj.id: by_id[obj.id] = obj # Index by name if hasattr(obj, "name") and obj.name: if obj.name not in by_name: by_name[obj.name] = [] by_name[obj.name].append(obj) # Index by applicationId (common in BIM) if hasattr(obj, "applicationId") and obj.applicationId: by_app_id[obj.applicationId] = obj return { "by_id": by_id, "by_name": by_name, "by_app_id": by_app_id }# Build comprehensive indexindexes = build_id_index(root)# Fast lookupsobj_by_id = indexes["by_id"].get("abc123def456")obj_by_name = indexes["by_name"].get("Wall-101")obj_by_app_id = indexes["by_app_id"].get("revit-element-12345")
Don’t rebuild indexes unnecessarily! Building an index is expensive (O(n)). Cache the index and reuse it:
Copy
# ❌ Bad - rebuilds index each timedef get_walls(root): index = build_category_index(root) # Expensive! return index.get("Walls", [])def get_floors(root): index = build_category_index(root) # Expensive again! return index.get("Floors", [])# ✅ Good - build once, reuse many timesindex = build_category_index(root) # Build oncewalls = index.get("Walls", [])floors = index.get("Floors", [])columns = index.get("Columns", [])
You need to decide whether to traverse directly or build an index first. The wrong choice can hurt performance.Use direct traversal when:
✅ Single search on a dataset
✅ Small datasets (less than 100 objects)
✅ One-time operation
✅ Memory is very limited
Copy
# Good use of direct traversaldef find_specific_wall(root, wall_name): """Find one specific wall - no need for index.""" for obj in traverse_all(root): if (hasattr(obj, "properties") and obj.properties.get("category") == "Walls" and getattr(obj, "name", None) == wall_name): return obj return None# Single search - direct traversal is finewall = find_specific_wall(root, "W-101")
Use indexes when:
✅ Multiple searches on the same dataset
✅ Large datasets (1000+ objects)
✅ Repeated lookups by the same property
✅ Performance is critical
Copy
# Good use of indexingdef analyze_by_category(root): """Multiple category queries - use index.""" # Build index once index = build_category_index(root) # Many lookups (all O(1)) walls = index.get("Walls", []) floors = index.get("Floors", []) columns = index.get("Columns", []) beams = index.get("Beams", []) doors = index.get("Doors", []) windows = index.get("Windows", []) # Process results return { "walls": len(walls), "floors": len(floors), "structural": len(columns) + len(beams), "openings": len(doors) + len(windows) }
Some objects in Speckle are “detached” - stored separately and referenced by ID. You see properties like @displayValue instead of the actual object.Understand when and why detachment happens:
Copy
from specklepy.objects import Base# Check if property is detacheddef is_detached(obj, property_name): """Check if a property is a detached reference.""" return hasattr(obj, f"@{property_name}")# Example: displayValue might be detachedif is_detached(obj, "displayValue"): # This is a reference ID (hash string) ref_id = getattr(obj, "@displayValue") print(f"displayValue is detached: {ref_id}") # The actual object is still accessible normally # (resolved during receive()) if hasattr(obj, "displayValue"): print("But displayValue is available!") print(f"Type: {type(obj.displayValue)}")
Why detachment happens: (1) Performance - Large objects (big meshes) are stored separately, (2) Deduplication - Same object can be referenced multiple times, (3) Lazy loading - Objects loaded only when needed.How it’s resolved:
Copy
from specklepy.api import operationsfrom specklepy.transports.server import ServerTransport# During receive(), references are automatically resolvedtransport = ServerTransport(stream_id=project_id, client=client)obj = operations.receive(object_id, remote_transport=transport)# By the time you access the object, references are resolved# You don't need to manually handle @properties in most casesif hasattr(obj, "displayValue"): # This works - reference was auto-resolved mesh = obj.displayValue print(f"Vertices: {len(mesh.vertices)}")
Checking for detached properties:
Copy
def find_detached_properties(obj): """Find all detached properties on an object.""" detached = [] for name in dir(obj): if name.startswith("@") and not name.startswith("__"): # This is a detached reference property_name = name[1:] # Remove @ prefix ref_id = getattr(obj, name) detached.append({ "property": property_name, "reference": ref_id, "resolved": hasattr(obj, property_name) }) return detached# Check objectdetached = find_detached_properties(obj)for item in detached: print(f"{item['property']}: {item['reference'][:16]}...") print(f" Resolved: {item['resolved']}")
Don’t assume all properties are resolved! In rare cases with custom transports or partial receives, references might not be resolved:
Copy
# ❌ Bad - assumes displayValue is always presentmesh = obj.displayValuevertices = mesh.vertices# ✅ Good - check before accessingif hasattr(obj, "displayValue") and obj.displayValue: mesh = obj.displayValue if hasattr(mesh, "vertices"): vertices = mesh.vertices
Revit objects have complex nested parameter structures organized by category. Accessing them efficiently requires understanding this structure.Access parameters via the properties dictionary:
Copy
def get_parameter_value(obj, param_name): """Get Revit parameter value by name.""" if not hasattr(obj, "properties"): return None # Parameters are in properties["Parameters"] params = obj.properties.get("Parameters") if not params: return None # Parameters are organized by category # Search all categories for the parameter for category_name, category_params in params.items(): if isinstance(category_params, dict) and param_name in category_params: param = category_params[param_name] # Parameter might be a dict with "value" key or direct value if isinstance(param, dict): return param.get("value") else: return param return None# Use itwall = walls[0]fire_rating = get_parameter_value(wall, "Fire Rating")structural = get_parameter_value(wall, "Structural")comments = get_parameter_value(wall, "Comments")print(f"Fire Rating: {fire_rating}")print(f"Structural: {structural}")print(f"Comments: {comments}")
def get_all_parameters(obj): """Extract all parameters as flat dictionary.""" if not hasattr(obj, "properties"): return {} params = obj.properties.get("Parameters", {}) all_params = {} for category_name, category_params in params.items(): if isinstance(category_params, dict): for param_name, param_data in category_params.items(): # Extract value if isinstance(param_data, dict): value = param_data.get("value") units = param_data.get("units") all_params[param_name] = { "value": value, "units": units, "category": category_name } else: all_params[param_name] = { "value": param_data, "units": None, "category": category_name } return all_params# Get all parametersparams = get_all_parameters(wall)for name, data in params.items(): value = data["value"] units = data["units"] print(f"{name}: {value} {units or ''}")
Building a parameter index using GraphTraversal:
Copy
from specklepy.objects.graph_traversal.traversal import GraphTraversaldef build_parameter_index(root): """Build index of all parameter values across all objects.""" traversal = GraphTraversal([]) index = {} for context in traversal.traverse(root): obj = context.current # Get object info obj_name = getattr(obj, "name", None) obj_id = getattr(obj, "id", None) # Extract parameters if hasattr(obj, "properties"): params = obj.properties.get("Parameters", {}) for category_name, category_params in params.items(): if isinstance(category_params, dict): for param_name, param_data in category_params.items(): # Create index entry if param_name not in index: index[param_name] = [] # Extract value if isinstance(param_data, dict): value = param_data.get("value") else: value = param_data # Add to index index[param_name].append({ "object": obj, "object_name": obj_name, "object_id": obj_id, "value": value }) return index# Build indexparam_index = build_parameter_index(root)# Fast queriesfire_rated_objects = [ item for item in param_index.get("Fire Rating", []) if item["value"] and "hour" in str(item["value"])]load_bearing_objects = [ item for item in param_index.get("Structural", []) if item["value"] is True]print(f"Fire rated elements: {len(fire_rated_objects)}")print(f"Load bearing elements: {len(load_bearing_objects)}")