Symbol-Based Discovery
Symbol search uses tree-sitter to extract and analyze functions, classes, methods, and variables from your code. This provides structured, language-aware code discovery.
Quick Start
from kit import Repository
repo = Repository("/path/to/codebase")
# Extract all symbolssymbols = repo.extract_symbols()
# Find all functionsfunctions = [s for s in symbols if s["type"] == "function"]print(f"Found {len(functions)} functions")
# Find specific functionauth_funcs = [s for s in symbols if "auth" in s["name"].lower()]
# Extract from specific filefile_symbols = repo.extract_symbols("src/api/routes.py")Understanding Symbols
Symbols represent code elements extracted via tree-sitter:
Symbol types:
function- Top-level functionsclass- Class definitionsmethod- Class methodsvariable- Module-level variablesconstant- Constants
Symbol properties:
{ "name": "authenticate_user", "type": "function", "file": "src/auth.py", "start_line": 42, "end_line": 58, "code": "def authenticate_user(...):\n ...", "language": "python"}Basic Symbol Extraction
All Symbols in Repository
# Get all symbolssymbols = repo.extract_symbols()
# Count by typefrom collections import Countertypes = Counter(s["type"] for s in symbols)print(f"Functions: {types['function']}")print(f"Classes: {types['class']}")print(f"Methods: {types['method']}")Symbols from Specific File
# Extract from one filesymbols = repo.extract_symbols("src/api/routes.py")
# Extract from multiple filesfiles = ["src/auth.py", "src/models.py", "src/utils.py"]all_symbols = []for file in files: all_symbols.extend(repo.extract_symbols(file))Symbols from Directory
# Get file treefiles = repo.get_file_tree()
# Extract from all files in src/api/api_files = [f["path"] for f in files if f["path"].startswith("src/api/")]api_symbols = []for file in api_files: api_symbols.extend(repo.extract_symbols(file))
print(f"Found {len(api_symbols)} symbols in src/api/")Filtering and Searching Symbols
Find by Name
# Find exact matchsymbols = repo.extract_symbols()user_class = next((s for s in symbols if s["name"] == "User"), None)
# Find partial matchauth_symbols = [s for s in symbols if "auth" in s["name"].lower()]
# Find with regeximport repattern = re.compile(r"^get_\w+")getters = [s for s in symbols if s["type"] == "function" and pattern.match(s["name"])]Find by Type
symbols = repo.extract_symbols()
# All functionsfunctions = [s for s in symbols if s["type"] == "function"]
# All classesclasses = [s for s in symbols if s["type"] == "class"]
# All methods (functions inside classes)methods = [s for s in symbols if s["type"] == "method"]
# All async functions (check code content)async_functions = [s for s in symbols if s["type"] == "function" and "async def" in s.get("code", "")]Find by File Pattern
symbols = repo.extract_symbols()
# Symbols from test filestest_symbols = [s for s in symbols if "test" in s["file"]]
# Symbols from specific moduleapi_symbols = [s for s in symbols if s["file"].startswith("src/api/")]
# Exclude testsnon_test = [s for s in symbols if not s["file"].startswith("tests/")]Common Use Cases
Find All API Endpoints
# Find route handler functionssymbols = repo.extract_symbols()
# Look for functions with route decoratorsapi_endpoints = []for symbol in symbols: if symbol["type"] == "function": code = symbol.get("code", "") if "@app.route" in code or "@router" in code: api_endpoints.append(symbol)
print(f"Found {len(api_endpoints)} API endpoints:")for endpoint in api_endpoints: print(f" {endpoint['file']}::{endpoint['name']}")Find Unused Functions
# Get all function namessymbols = repo.extract_symbols()function_names = {s["name"] for s in symbols if s["type"] == "function"}
# Search for usage of each functionunused = []for name in function_names: # Skip test functions and private functions if name.startswith("test_") or name.startswith("_"): continue
# Search for function calls results = repo.search_text(f"{name}\\(", file_pattern="**/*.py")
# If only one result (the definition), it might be unused if len(results) <= 1: unused.append(name)
print(f"Potentially unused functions: {len(unused)}")for name in unused[:10]: # Show first 10 print(f" {name}")Analyze Code Complexity
# Find large functionssymbols = repo.extract_symbols()functions = [s for s in symbols if s["type"] == "function"]
# Calculate lines of codefor func in functions: func["lines"] = func["end_line"] - func["start_line"] + 1
# Sort by sizelarge_functions = sorted(functions, key=lambda f: f["lines"], reverse=True)
print("Largest functions:")for func in large_functions[:10]: print(f" {func['name']} ({func['lines']} lines) in {func['file']}")Generate Class Hierarchy
# Extract all classessymbols = repo.extract_symbols()classes = [s for s in symbols if s["type"] == "class"]
# Group by filefrom collections import defaultdictby_file = defaultdict(list)for cls in classes: by_file[cls["file"]].append(cls["name"])
for file, class_names in sorted(by_file.items()): print(f"\n{file}:") for name in class_names: print(f" - {name}")Find Symbol Usages
# Find where a symbol is useddef find_usages(repo, symbol_name): """Find all usages of a symbol across the repository.""" results = repo.search_text(symbol_name, file_pattern="**/*.py")
# Filter out the definition symbols = repo.extract_symbols() definition = next((s for s in symbols if s["name"] == symbol_name), None)
usages = [] for result in results: # Skip if it's the definition line if definition and result["file"] == definition["file"] and \ result["line_number"] == definition["start_line"]: continue usages.append(result)
return usages
# Example usageusages = find_usages(repo, "authenticate_user")print(f"Found {len(usages)} usages of authenticate_user")Symbol Properties
Name and Type
symbol = { "name": "UserManager", # Symbol identifier "type": "class", # Symbol type}Location
symbol = { "file": "src/models.py", # File path "start_line": 15, # Starting line (1-indexed) "end_line": 142, # Ending line (inclusive)}Code Content
symbol = { "code": "class UserManager:\n def __init__(...):\n ...", "language": "python"}CLI Usage
Symbol extraction is available via the kit symbols command:
# Extract all symbolskit symbols /path/to/repo
# Format as tablekit symbols /path/to/repo --format table
# Format as JSONkit symbols /path/to/repo --format json > symbols.json
# From specific filekit symbols /path/to/repo --file src/api/routes.py
# Filter by typekit symbols /path/to/repo --format json | jq '.[] | select(.type=="function")'Language Support
Symbol extraction works with any language that kit supports via tree-sitter:
Fully supported:
- Python
- JavaScript/TypeScript
- Go
- Rust
- Java
- C/C++
- Ruby
- PHP
- And more…
Check the current list with:
from kit import Repositoryrepo = Repository(".")print(repo.supported_languages())Performance Tips
Fast:
- Symbol extraction is cached per file
- Incremental updates for changed files
- Tree-sitter parsing is very efficient
Optimize:
- Extract from specific files when possible
- Filter symbols in memory rather than re-extracting
- Use file patterns to limit scope
When to Use Symbol Search
Good for:
- Finding functions, classes, methods by name
- Analyzing code structure
- Building code navigation tools
- Understanding codebase architecture
- Finding definitions
Consider alternatives:
- Text search: Finding specific strings or patterns
- Semantic search: Finding code by meaning
- Dependency analysis: Understanding relationships
Advanced Examples
Find All Test Functions
symbols = repo.extract_symbols()
# Find test functionstest_functions = [ s for s in symbols if s["type"] == "function" and (s["name"].startswith("test_") or "test" in s["file"])]
# Group by fileby_file = {}for test in test_functions: by_file.setdefault(test["file"], []).append(test["name"])
for file, tests in sorted(by_file.items()): print(f"\n{file}: {len(tests)} tests")Generate Module Documentation
# Get all public symbols (not starting with _)symbols = repo.extract_symbols()public_symbols = [s for s in symbols if not s["name"].startswith("_")]
# Group by fileby_file = {}for symbol in public_symbols: by_file.setdefault(symbol["file"], []).append(symbol)
# Generate simple docsfor file, syms in sorted(by_file.items()): print(f"\n## {file}\n") for s in syms: print(f"- `{s['name']}` ({s['type']})")Find Decorator Patterns
# Find all decorated functionssymbols = repo.extract_symbols()
decorators = {}for symbol in symbols: if symbol["type"] in ["function", "method"]: code = symbol.get("code", "") # Look for @decorator patterns lines = code.split("\n") for line in lines: if line.strip().startswith("@"): decorator = line.strip()[1:].split("(")[0] decorators.setdefault(decorator, []).append(symbol["name"])
print("Decorator usage:")for decorator, functions in sorted(decorators.items()): print(f" @{decorator}: {len(functions)} functions")