Practical Recipes

1. Rename every function `old_name` → `new_name`

from pathlib import Path
from kit import Repository

repo = Repository("/path/to/project")

# Gather definitions & references (quick heuristic)
usages = repo.find_symbol_usages("old_name", symbol_type="function")

edits: dict[str, str] = {}
for u in usages:
    path, line = u["file"], u.get("line")
    if line is None:
        continue
    lines = repo.get_file_content(path).splitlines()
    lines[line] = lines[line].replace("old_name", "new_name")
    edits[path] = "\n".join(lines) + "\n"

# Apply edits – prompt the user first!
for rel_path, new_src in edits.items():
    Path(repo.repo_path, rel_path).write_text(new_src)

repo.mapper.scan_repo()  # refresh symbols if you'll run more queries

2. Summarize a Git diff for an LLM PR review

from kit import Repository
# Assuming OpenAI for this example, and API key is set in environment
from kit.summaries import OpenAIConfig

repo = Repository(".")
assembler = repo.get_context_assembler()
# diff_text would be a string containing the output of `git diff`
# Example:
# diff_text = subprocess.run(["git", "diff", "HEAD~1"], capture_output=True, text=True).stdout

# Ensure diff_text is populated before this step in a real script
diff_text = """diff --git a/file.py b/file.py
index 0000000..1111111 100644
--- a/file.py
+++ b/file.py
@@ -1,1 +1,1 @@
-old line
+new line
""" # Placeholder diff_text

assembler.add_diff(diff_text)
context_blob = assembler.format_context()

# Get the summarizer and its underlying LLM client to summarize arbitrary text
# This example assumes you want to use the default OpenAI configuration for the summarizer.
# If you have a specific config (OpenAI, Anthropic, Google), pass it to get_summarizer.
summarizer_instance = repo.get_summarizer() # Uses default OpenAIConfig
llm_client = summarizer_instance._get_llm_client() # Access the configured client

summary = "Could not generate summary."
if hasattr(llm_client, 'chat') and hasattr(llm_client.chat, 'completions'): # OpenAI-like client
    try:
        response = llm_client.chat.completions.create(
            model=summarizer_instance.config.model if summarizer_instance.config else "gpt-4o", # Get model from config
            messages=[
                {"role": "system", "content": "You are an expert software engineer. Please summarize the following code changes and context."},
                {"role": "user", "content": context_blob}
            ],
            temperature=0.2,
            max_tokens=500 # Adjust as needed
        )
        summary = response.choices[0].message.content.strip()
    except Exception as e:
        summary = f"Error generating summary: {e}"
elif hasattr(llm_client, 'messages') and hasattr(llm_client.messages, 'create'): # Anthropic-like client
    try:
        response = llm_client.messages.create(
            model=summarizer_instance.config.model if summarizer_instance.config else "claude-3-opus-20240229",
            system="You are an expert software engineer. Please summarize the following code changes and context.",
            messages=[
                {"role": "user", "content": context_blob}
            ],
            max_tokens=500,
            temperature=0.2,
        )
        summary = response.content[0].text.strip()
    except Exception as e:
        summary = f"Error generating summary: {e}"
# Add similar elif for Google GenAI client if needed, or abstract this LLM call further

print(summary)

3. Semantic search for authentication code

from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
embed = lambda text: model.encode([text])[0].tolist()

repo = Repository(".")
vs = repo.get_vector_searcher(embed_fn=embed)
vs.build_index()

hits = repo.search_semantic("How is user authentication handled?", embed_fn=embed)
for h in hits:
    print(h["file"], h.get("name"))

4. Export full repo index to JSON (file tree + symbols)

repo = Repository("/path/to/project")
repo.write_index("repo_index.json")

5. Find All Callers of a Specific Function (Cross-File)

This recipe helps you understand where a particular function is being used throughout your entire codebase, which is crucial for impact analysis or refactoring.

from kit import Repository

# Initialize the repository
repo = Repository("/path/to/your_project")

# Specify the function name and its type
function_name_to_trace = "my_target_function"

# Find all usages (definitions, calls, imports)
usages = repo.find_symbol_usages(function_name_to_trace, symbol_type="function")

print(f"Usages of function '{function_name_to_trace}':")
for usage in usages:
    file_path = usage.get("file")
    line_number = usage.get("line") # Assuming 'line' is the start line of the usage/symbol
    context_snippet = usage.get("context", "No context available")
    usage_type = usage.get("type", "unknown") # e.g., 'function' for definition, 'call' for a call site

    # We are interested in where it's CALLED, so we might filter out the definition itself if needed,
    # or differentiate based on the 'type' or 'context'.
    # For this example, we'll print all usages.
    if line_number is not None:
        print(f"- Found in: {file_path}:L{line_number + 1}") # (line is 0-indexed, display as 1-indexed)
    else:
        print(f"- Found in: {file_path}")
    print(f"    Type: {usage_type}")
    print(f"    Context: {context_snippet.strip()}\n")

# Example: Filtering for actual call sites (heuristic based on context or type if available)
# print(f"\nCall sites for function '{function_name_to_trace}':")
# for usage in usages:
#     # This condition might need refinement based on what 'find_symbol_usages' returns for 'type' of a call
#     if usage.get("type") != "function" and function_name_to_trace + "(" in usage.get("context", ""):
#         file_path = usage.get("file")
#         line_number = usage.get("line")
#         print(f"- Call in: {file_path}:L{line_number + 1 if line_number is not None else 'N/A'}")

6. Identify Potentially Unused Functions (Heuristic)

This recipe provides a heuristic to find functions that might be unused within the analyzed codebase. This can be a starting point for identifying dead code. Note that this is a heuristic because it might not catch dynamically called functions, functions part of a public API but not used internally, or functions used only in parts of the codebase not analyzed (e.g., separate test suites).

from kit import Repository

repo = Repository("/path/to/your_project")

# Get all symbols from the repository index
# The structure of repo.index() might vary; assuming it's a dict like {'symbols': {'file_path': [symbol_dicts]}}
# or a direct way to get all function definitions.
# For this example, let's assume we can iterate through all symbols and filter functions.

# A more robust way might be to iterate files, then symbols within files from repo.index()
# index = repo.index()
# all_symbols_by_file = index.get("symbols", {})

print("Potentially unused functions:")

# First, get a list of all function definitions
defined_functions = []
repo_index = repo.index() # Assuming this fetches file tree and symbols
symbols_map = repo_index.get("symbols", {})

for file_path, symbols_in_file in symbols_map.items():
    for symbol_info in symbols_in_file:
        if symbol_info.get("type") == "function":
            defined_functions.append({
                "name": symbol_info.get("name"),
                "file": file_path,
                "line": symbol_info.get("line_start", 0) # or 'line'
            })

for func_def in defined_functions:
    function_name = func_def["name"]
    definition_file = func_def["file"]
    definition_line = func_def["line"]

    if not function_name: # Skip if name is missing
        continue

    usages = repo.find_symbol_usages(function_name, symbol_type="function")

    # Filter out the definition itself from the usages to count actual calls/references
    # This heuristic assumes a usage is NOT the definition if its file and line differ,
    # or if the usage 'type' (if available and detailed) indicates a call.
    # A simpler heuristic: if only 1 usage, it's likely just the definition.

    actual_references = []
    for u in usages:
        # Check if the usage is different from the definition site
        if not (u.get("file") == definition_file and u.get("line") == definition_line):
            actual_references.append(u)

    # If a function has no other references apart from its own definition site (or very few)
    # It's a candidate for being unused. The threshold (e.g., 0 or 1) can be adjusted.
    if len(actual_references) == 0:
        print(f"- Function '{function_name}' defined in {definition_file}:L{definition_line + 1} has no apparent internal usages.")

:::caution[Limitations of this heuristic:]
**Limitations of this heuristic:**

*   **Dynamic Calls:** Functions called dynamically (e.g., through reflection, or if the function name is constructed from a string at runtime) won't be detected as used.
*   **Public APIs:** Functions intended for external use (e.g., library functions) will appear unused if the analysis is limited to the library's own codebase.
*   **Test Code:** If your test suite is separate and not part of the `Repository` path being analyzed, functions used only by tests might be flagged.
*   **Object Methods:** The `symbol_type="function"` might need adjustment or further logic if you are also looking for unused *methods* within classes, as their usage context is different.
:::