Skip to content

Practical Recipes

1. Rename every function old_namenew_name

Section titled “1. Rename every function old_name → new_name”
from pathlib import Path
from kit import Repository
repo = Repository("/path/to/project")
# Gather definitions & references (quick heuristic)
usages = repo.find_symbol_usages("old_name", symbol_type="function")
edits: dict[str, str] = {}
for u in usages:
path, line = u["file"], u.get("line")
if line is None:
continue
lines = repo.get_file_content(path).splitlines()
lines[line] = lines[line].replace("old_name", "new_name")
edits[path] = "\n".join(lines) + "\n"
# Apply edits – prompt the user first!
for rel_path, new_src in edits.items():
Path(repo.repo_path, rel_path).write_text(new_src)
repo.mapper.scan_repo() # refresh symbols if you'll run more queries

2. Summarize a Git diff for an LLM PR review

Section titled “2. Summarize a Git diff for an LLM PR review”
from kit import Repository
# Assuming OpenAI for this example, and API key is set in environment
from kit.summaries import OpenAIConfig
repo = Repository(".")
assembler = repo.get_context_assembler()
# diff_text would be a string containing the output of `git diff`
# Example:
# diff_text = subprocess.run(["git", "diff", "HEAD~1"], capture_output=True, text=True).stdout
# Ensure diff_text is populated before this step in a real script
diff_text = """diff --git a/file.py b/file.py
index 0000000..1111111 100644
--- a/file.py
+++ b/file.py
@@ -1,1 +1,1 @@
-old line
+new line
""" # Placeholder diff_text
assembler.add_diff(diff_text)
context_blob = assembler.format_context()
# Get the summarizer and its underlying LLM client to summarize arbitrary text
# This example assumes you want to use the default OpenAI configuration for the summarizer.
# If you have a specific config (OpenAI, Anthropic, Google), pass it to get_summarizer.
summarizer_instance = repo.get_summarizer() # Uses default OpenAIConfig
llm_client = summarizer_instance._get_llm_client() # Access the configured client
summary = "Could not generate summary."
if hasattr(llm_client, 'chat') and hasattr(llm_client.chat, 'completions'): # OpenAI-like client
try:
response = llm_client.chat.completions.create(
model=summarizer_instance.config.model if summarizer_instance.config else "gpt-4o", # Get model from config
messages=[
{"role": "system", "content": "You are an expert software engineer. Please summarize the following code changes and context."},
{"role": "user", "content": context_blob}
],
temperature=0.2,
max_tokens=500 # Adjust as needed
)
summary = response.choices[0].message.content.strip()
except Exception as e:
summary = f"Error generating summary: {e}"
elif hasattr(llm_client, 'messages') and hasattr(llm_client.messages, 'create'): # Anthropic-like client
try:
response = llm_client.messages.create(
model=summarizer_instance.config.model if summarizer_instance.config else "claude-3-opus-20240229",
system="You are an expert software engineer. Please summarize the following code changes and context.",
messages=[
{"role": "user", "content": context_blob}
],
max_tokens=500,
temperature=0.2,
)
summary = response.content[0].text.strip()
except Exception as e:
summary = f"Error generating summary: {e}"
# Add similar elif for Google GenAI client if needed, or abstract this LLM call further
print(summary)

3. Semantic search for authentication code

Section titled “3. Semantic search for authentication code”
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
embed = lambda text: model.encode([text])[0].tolist()
repo = Repository(".")
vs = repo.get_vector_searcher(embed_fn=embed)
vs.build_index()
hits = repo.search_semantic("How is user authentication handled?", embed_fn=embed)
for h in hits:
print(h["file"], h.get("name"))

4. Export full repo index to JSON (file tree + symbols)

Section titled “4. Export full repo index to JSON (file tree + symbols)”
repo = Repository("/path/to/project")
repo.write_index("repo_index.json")

5. Find All Callers of a Specific Function (Cross-File)

Section titled “5. Find All Callers of a Specific Function (Cross-File)”

This recipe helps you understand where a particular function is being used throughout your entire codebase, which is crucial for impact analysis or refactoring.

from kit import Repository
# Initialize the repository
repo = Repository("/path/to/your_project")
# Specify the function name and its type
function_name_to_trace = "my_target_function"
# Find all usages (definitions, calls, imports)
usages = repo.find_symbol_usages(function_name_to_trace, symbol_type="function")
print(f"Usages of function '{function_name_to_trace}':")
for usage in usages:
file_path = usage.get("file")
line_number = usage.get("line") # Assuming 'line' is the start line of the usage/symbol
context_snippet = usage.get("context", "No context available")
usage_type = usage.get("type", "unknown") # e.g., 'function' for definition, 'call' for a call site
# We are interested in where it's CALLED, so we might filter out the definition itself if needed,
# or differentiate based on the 'type' or 'context'.
# For this example, we'll print all usages.
if line_number is not None:
print(f"- Found in: {file_path}:L{line_number + 1}") # (line is 0-indexed, display as 1-indexed)
else:
print(f"- Found in: {file_path}")
print(f" Type: {usage_type}")
print(f" Context: {context_snippet.strip()}\n")
# Example: Filtering for actual call sites (heuristic based on context or type if available)
# print(f"\nCall sites for function '{function_name_to_trace}':")
# for usage in usages:
# # This condition might need refinement based on what 'find_symbol_usages' returns for 'type' of a call
# if usage.get("type") != "function" and function_name_to_trace + "(" in usage.get("context", ""):
# file_path = usage.get("file")
# line_number = usage.get("line")
# print(f"- Call in: {file_path}:L{line_number + 1 if line_number is not None else 'N/A'}")

6. Identify Potentially Unused Functions (Heuristic)

Section titled “6. Identify Potentially Unused Functions (Heuristic)”

This recipe provides a heuristic to find functions that might be unused within the analyzed codebase. This can be a starting point for identifying dead code. Note that this is a heuristic because it might not catch dynamically called functions, functions part of a public API but not used internally, or functions used only in parts of the codebase not analyzed (e.g., separate test suites).

from kit import Repository
repo = Repository("/path/to/your_project")
# Get all symbols from the repository index
# The structure of repo.index() might vary; assuming it's a dict like {'symbols': {'file_path': [symbol_dicts]}}
# or a direct way to get all function definitions.
# For this example, let's assume we can iterate through all symbols and filter functions.
# A more robust way might be to iterate files, then symbols within files from repo.index()
# index = repo.index()
# all_symbols_by_file = index.get("symbols", {})
print("Potentially unused functions:")
# First, get a list of all function definitions
defined_functions = []
repo_index = repo.index() # Assuming this fetches file tree and symbols
symbols_map = repo_index.get("symbols", {})
for file_path, symbols_in_file in symbols_map.items():
for symbol_info in symbols_in_file:
if symbol_info.get("type") == "function":
defined_functions.append({
"name": symbol_info.get("name"),
"file": file_path,
"line": symbol_info.get("line_start", 0) # or 'line'
})
for func_def in defined_functions:
function_name = func_def["name"]
definition_file = func_def["file"]
definition_line = func_def["line"]
if not function_name: # Skip if name is missing
continue
usages = repo.find_symbol_usages(function_name, symbol_type="function")
# Filter out the definition itself from the usages to count actual calls/references
# This heuristic assumes a usage is NOT the definition if its file and line differ,
# or if the usage 'type' (if available and detailed) indicates a call.
# A simpler heuristic: if only 1 usage, it's likely just the definition.
actual_references = []
for u in usages:
# Check if the usage is different from the definition site
if not (u.get("file") == definition_file and u.get("line") == definition_line):
actual_references.append(u)
# If a function has no other references apart from its own definition site (or very few)
# It's a candidate for being unused. The threshold (e.g., 0 or 1) can be adjusted.
if len(actual_references) == 0:
print(f"- Function '{function_name}' defined in {definition_file}:L{definition_line + 1} has no apparent internal usages.")
:::caution[Limitations of this heuristic:]
**Limitations of this heuristic:**
* **Dynamic Calls:** Functions called dynamically (e.g., through reflection, or if the function name is constructed from a string at runtime) won't be detected as used.
* **Public APIs:** Functions intended for external use (e.g., library functions) will appear unused if the analysis is limited to the library's own codebase.
* **Test Code:** If your test suite is separate and not part of the `Repository` path being analyzed, functions used only by tests might be flagged.
* **Object Methods:** The `symbol_type="function"` might need adjustment or further logic if you are also looking for unused *methods* within classes, as their usage context is different.
:::