Practical Recipes
1. Rename every function old_name
→ new_name
from pathlib import Pathfrom kit import Repository
repo = Repository("/path/to/project")
# Gather definitions & references (quick heuristic)usages = repo.find_symbol_usages("old_name", symbol_type="function")
edits: dict[str, str] = {}for u in usages: path, line = u["file"], u.get("line") if line is None: continue lines = repo.get_file_content(path).splitlines() lines[line] = lines[line].replace("old_name", "new_name") edits[path] = "\n".join(lines) + "\n"
# Apply edits – prompt the user first!for rel_path, new_src in edits.items(): Path(repo.repo_path, rel_path).write_text(new_src)
repo.mapper.scan_repo() # refresh symbols if you'll run more queries
2. Summarize a Git diff for an LLM PR review
from kit import Repository# Assuming OpenAI for this example, and API key is set in environmentfrom kit.summaries import OpenAIConfig
repo = Repository(".")assembler = repo.get_context_assembler()# diff_text would be a string containing the output of `git diff`# Example:# diff_text = subprocess.run(["git", "diff", "HEAD~1"], capture_output=True, text=True).stdout
# Ensure diff_text is populated before this step in a real scriptdiff_text = """diff --git a/file.py b/file.pyindex 0000000..1111111 100644--- a/file.py+++ b/file.py@@ -1,1 +1,1 @@-old line+new line""" # Placeholder diff_text
assembler.add_diff(diff_text)context_blob = assembler.format_context()
# Get the summarizer and its underlying LLM client to summarize arbitrary text# This example assumes you want to use the default OpenAI configuration for the summarizer.# If you have a specific config (OpenAI, Anthropic, Google), pass it to get_summarizer.summarizer_instance = repo.get_summarizer() # Uses default OpenAIConfigllm_client = summarizer_instance._get_llm_client() # Access the configured client
summary = "Could not generate summary."if hasattr(llm_client, 'chat') and hasattr(llm_client.chat, 'completions'): # OpenAI-like client try: response = llm_client.chat.completions.create( model=summarizer_instance.config.model if summarizer_instance.config else "gpt-4o", # Get model from config messages=[ {"role": "system", "content": "You are an expert software engineer. Please summarize the following code changes and context."}, {"role": "user", "content": context_blob} ], temperature=0.2, max_tokens=500 # Adjust as needed ) summary = response.choices[0].message.content.strip() except Exception as e: summary = f"Error generating summary: {e}"elif hasattr(llm_client, 'messages') and hasattr(llm_client.messages, 'create'): # Anthropic-like client try: response = llm_client.messages.create( model=summarizer_instance.config.model if summarizer_instance.config else "claude-3-opus-20240229", system="You are an expert software engineer. Please summarize the following code changes and context.", messages=[ {"role": "user", "content": context_blob} ], max_tokens=500, temperature=0.2, ) summary = response.content[0].text.strip() except Exception as e: summary = f"Error generating summary: {e}"# Add similar elif for Google GenAI client if needed, or abstract this LLM call further
print(summary)
3. Semantic search for authentication code
from sentence_transformers import SentenceTransformermodel = SentenceTransformer("all-MiniLM-L6-v2")embed = lambda text: model.encode([text])[0].tolist()
repo = Repository(".")vs = repo.get_vector_searcher(embed_fn=embed)vs.build_index()
hits = repo.search_semantic("How is user authentication handled?", embed_fn=embed)for h in hits: print(h["file"], h.get("name"))
4. Export full repo index to JSON (file tree + symbols)
repo = Repository("/path/to/project")repo.write_index("repo_index.json")
5. Find All Callers of a Specific Function (Cross-File)
This recipe helps you understand where a particular function is being used throughout your entire codebase, which is crucial for impact analysis or refactoring.
from kit import Repository
# Initialize the repositoryrepo = Repository("/path/to/your_project")
# Specify the function name and its typefunction_name_to_trace = "my_target_function"
# Find all usages (definitions, calls, imports)usages = repo.find_symbol_usages(function_name_to_trace, symbol_type="function")
print(f"Usages of function '{function_name_to_trace}':")for usage in usages: file_path = usage.get("file") line_number = usage.get("line") # Assuming 'line' is the start line of the usage/symbol context_snippet = usage.get("context", "No context available") usage_type = usage.get("type", "unknown") # e.g., 'function' for definition, 'call' for a call site
# We are interested in where it's CALLED, so we might filter out the definition itself if needed, # or differentiate based on the 'type' or 'context'. # For this example, we'll print all usages. if line_number is not None: print(f"- Found in: {file_path}:L{line_number + 1}") # (line is 0-indexed, display as 1-indexed) else: print(f"- Found in: {file_path}") print(f" Type: {usage_type}") print(f" Context: {context_snippet.strip()}\n")
# Example: Filtering for actual call sites (heuristic based on context or type if available)# print(f"\nCall sites for function '{function_name_to_trace}':")# for usage in usages:# # This condition might need refinement based on what 'find_symbol_usages' returns for 'type' of a call# if usage.get("type") != "function" and function_name_to_trace + "(" in usage.get("context", ""):# file_path = usage.get("file")# line_number = usage.get("line")# print(f"- Call in: {file_path}:L{line_number + 1 if line_number is not None else 'N/A'}")
6. Identify Potentially Unused Functions (Heuristic)
This recipe provides a heuristic to find functions that might be unused within the analyzed codebase. This can be a starting point for identifying dead code. Note that this is a heuristic because it might not catch dynamically called functions, functions part of a public API but not used internally, or functions used only in parts of the codebase not analyzed (e.g., separate test suites).
from kit import Repository
repo = Repository("/path/to/your_project")
# Get all symbols from the repository index# The structure of repo.index() might vary; assuming it's a dict like {'symbols': {'file_path': [symbol_dicts]}}# or a direct way to get all function definitions.# For this example, let's assume we can iterate through all symbols and filter functions.
# A more robust way might be to iterate files, then symbols within files from repo.index()# index = repo.index()# all_symbols_by_file = index.get("symbols", {})
print("Potentially unused functions:")
# First, get a list of all function definitionsdefined_functions = []repo_index = repo.index() # Assuming this fetches file tree and symbolssymbols_map = repo_index.get("symbols", {})
for file_path, symbols_in_file in symbols_map.items(): for symbol_info in symbols_in_file: if symbol_info.get("type") == "function": defined_functions.append({ "name": symbol_info.get("name"), "file": file_path, "line": symbol_info.get("line_start", 0) # or 'line' })
for func_def in defined_functions: function_name = func_def["name"] definition_file = func_def["file"] definition_line = func_def["line"]
if not function_name: # Skip if name is missing continue
usages = repo.find_symbol_usages(function_name, symbol_type="function")
# Filter out the definition itself from the usages to count actual calls/references # This heuristic assumes a usage is NOT the definition if its file and line differ, # or if the usage 'type' (if available and detailed) indicates a call. # A simpler heuristic: if only 1 usage, it's likely just the definition.
actual_references = [] for u in usages: # Check if the usage is different from the definition site if not (u.get("file") == definition_file and u.get("line") == definition_line): actual_references.append(u)
# If a function has no other references apart from its own definition site (or very few) # It's a candidate for being unused. The threshold (e.g., 0 or 1) can be adjusted. if len(actual_references) == 0: print(f"- Function '{function_name}' defined in {definition_file}:L{definition_line + 1} has no apparent internal usages.")
:::caution[Limitations of this heuristic:]**Limitations of this heuristic:**
* **Dynamic Calls:** Functions called dynamically (e.g., through reflection, or if the function name is constructed from a string at runtime) won't be detected as used.* **Public APIs:** Functions intended for external use (e.g., library functions) will appear unused if the analysis is limited to the library's own codebase.* **Test Code:** If your test suite is separate and not part of the `Repository` path being analyzed, functions used only by tests might be flagged.* **Object Methods:** The `symbol_type="function"` might need adjustment or further logic if you are also looking for unused *methods* within classes, as their usage context is different.:::