Skip to content
293 changes: 293 additions & 0 deletions AntiPattern_Remediator/full_repo_workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
"""
Full Repository Workflow - Process files with 100% test coverage from JaCoCo results
"""
from colorama import Fore, Style
from pathlib import Path
import os

# Import workflow utilities
from workflow.workflow_utils import parse_antipattern_results, get_repository_paths_from_files
from workflow.backup_manager import create_repository_backup
from workflow.results_manager import save_intermediate_results, create_processing_summary
from workflow.file_operations import read_java_file, save_refactored_code



def read_jacoco_results(jacoco_results_dir: str = "../jacoco_results") -> list:
"""Read the list of files with 100% coverage from JaCoCo results."""
results_path = Path(jacoco_results_dir)

if not results_path.exists():
print(Fore.RED + f"JaCoCo results directory not found: {results_path}" + Style.RESET_ALL)
return []

# Find all result files
all_files = list(results_path.glob("*.txt"))
if not all_files:
print(Fore.RED + f"No JaCoCo result files found in: {results_path}" + Style.RESET_ALL)
return []

# Separate combined file from individual repo files
combined_file = results_path / "all_100_percent_coverage_files.txt"
repo_files = [f for f in all_files if f.name != "all_100_percent_coverage_files.txt"]

print(Fore.CYAN + "\nAvailable JaCoCo result files:" + Style.RESET_ALL)
print("0) All repositories (combined)")

# Show individual repository options
for i, repo_file in enumerate(repo_files, 1):
# Extract repo name from filename (remove _100_percent_coverage.txt suffix)
repo_name = repo_file.stem.replace("_100_percent_coverage", "")
print(f"{i}) {repo_name}")

# Get user choice
while True:
try:
choice = input(f"\nSelect repository to process (0-{len(repo_files)}): ").strip()
choice_num = int(choice)

if choice_num == 0:
# Use combined file
selected_file = combined_file
repo_name = "All repositories"
break
elif 1 <= choice_num <= len(repo_files):
# Use specific repo file
selected_file = repo_files[choice_num - 1]
repo_name = selected_file.stem.replace("_100_percent_coverage", "")
break
else:
print(Fore.RED + f"Invalid choice. Please enter a number between 0 and {len(repo_files)}" + Style.RESET_ALL)
except ValueError:
print(Fore.RED + "Invalid input. Please enter a number." + Style.RESET_ALL)

# Read the selected file
if not selected_file.exists():
print(Fore.RED + f"Selected file not found: {selected_file}" + Style.RESET_ALL)
return []

try:
with open(selected_file, 'r') as f:
file_paths = [line.strip() for line in f if line.strip()]

print(Fore.GREEN + f"Selected: {repo_name}" + Style.RESET_ALL)
print(Fore.GREEN + f"Found {len(file_paths)} files with 100% test coverage" + Style.RESET_ALL)
return file_paths

except Exception as e:
print(Fore.RED + f"Error reading file {selected_file}: {e}" + Style.RESET_ALL)
return []



def process_java_files_with_workflow(file_paths: list, settings, db_manager, prompt_manager, langgraph):
"""Process each Java file through the agentic workflow."""
processed_files = []
failed_files = []

for i, file_path in enumerate(file_paths, 1):
print(Fore.BLUE + f"\n{'='*60}" + Style.RESET_ALL)
print(Fore.BLUE + f"Processing file {i}/{len(file_paths)}: {file_path}" + Style.RESET_ALL)
print(Fore.BLUE + f"{'='*60}" + Style.RESET_ALL)

# Read the Java file content
java_code = read_java_file(file_path)
if java_code is None:
failed_files.append(file_path)
continue

# Create initial state for this file
initial_state = {
"code": java_code,
"context": None,
"trove_context": None,
"antipatterns_scanner_results": None,
"refactoring_strategy_results": None,
"refactored_code": None,
"code_review_results": None,
"code_review_times": 0,
"msgs": [],
"answer": None,
"current_file_path": file_path # Track current file being processed
}

try:
# Run the agentic workflow
print(Fore.CYAN + "Running agentic workflow..." + Style.RESET_ALL)
final_state = langgraph.invoke(initial_state)

# Save intermediate results for analysis
save_intermediate_results(file_path, final_state, settings)

# Check if refactoring was successful
if final_state.get('refactored_code'):
# Parse anti-pattern results
antipatterns_found, antipatterns_count = parse_antipattern_results(final_state.get('antipatterns_scanner_results'))

# Save the refactored code back to the file
if save_refactored_code(file_path, final_state['refactored_code']):
processed_files.append({
'file_path': file_path,
'status': 'success',
'antipatterns_found': antipatterns_found,
'antipatterns_count': antipatterns_count,
'code_review_times': final_state.get('code_review_times', 0),
'has_intermediate_results': True
})
print(Fore.GREEN + f"Successfully processed: {file_path}" + Style.RESET_ALL)
else:
failed_files.append(file_path)
else:
# Parse anti-pattern results
antipatterns_found, antipatterns_count = parse_antipattern_results(final_state.get('antipatterns_scanner_results'))

print(Fore.YELLOW + f"No refactored code generated for: {file_path}" + Style.RESET_ALL)
processed_files.append({
'file_path': file_path,
'status': 'no_refactoring',
'antipatterns_found': antipatterns_found,
'antipatterns_count': antipatterns_count,
'code_review_times': final_state.get('code_review_times', 0),
'has_intermediate_results': True
})

except Exception as e:
print(Fore.RED + f"Error processing {file_path}: {e}" + Style.RESET_ALL)
failed_files.append(file_path)

return processed_files, failed_files


def run_full_repo_workflow(settings, db_manager, prompt_manager, langgraph):
"""Run the full repository workflow for files with 100% test coverage."""
print(Fore.BLUE + "\n=== Full Repository Workflow ===" + Style.RESET_ALL)
print("Process Java files with 100% test coverage from JaCoCo results...")

# Read JaCoCo results to get files with 100% test coverage
print(Fore.CYAN + "\nReading JaCoCo results..." + Style.RESET_ALL)
file_paths = read_jacoco_results()

if not file_paths:
print(Fore.RED + "No files found in JaCoCo results. Please run JaCoCo analysis first." + Style.RESET_ALL)
print("Run: python jacoco_tool/jacoco_analysis.py")
return False

# Extract repository paths from file paths
print(Fore.CYAN + "\nIdentifying repositories to backup..." + Style.RESET_ALL)
repo_paths = get_repository_paths_from_files(file_paths)

if not repo_paths:
print(Fore.RED + "No repository paths could be identified from the file paths." + Style.RESET_ALL)
return False

print(f"Found {len(repo_paths)} repositories to backup:")
for repo_path in sorted(repo_paths):
repo_name = Path(repo_path).name
print(f" • {repo_name} ({repo_path})")

# Ask user for confirmation to proceed with backup and processing
print(f"\nFiles to process ({len(file_paths)} total):")
for i, path in enumerate(file_paths[:5], 1): # Show first 5 files
print(f" {i}. {path}")
if len(file_paths) > 5:
print(f" ... and {len(file_paths) - 5} more files")

proceed = input(f"\nProceed with backing up {len(repo_paths)} repositories and processing {len(file_paths)} files? (Y/N): ").strip().lower()
if proceed != 'y':
print("Operation cancelled.")
return False

# Create repository backups
print(Fore.BLUE + f"\n{'='*60}" + Style.RESET_ALL)
print(Fore.BLUE + "CREATING REPOSITORY BACKUPS" + Style.RESET_ALL)
print(Fore.BLUE + f"{'='*60}" + Style.RESET_ALL)

backup_info = create_repository_backup(repo_paths)

if backup_info['failed_backups']:
print(Fore.RED + f"\nWarning: {len(backup_info['failed_backups'])} repositories failed to backup:" + Style.RESET_ALL)
for failed in backup_info['failed_backups']:
print(Fore.RED + f" {failed['repo_path']}: {failed['error']}" + Style.RESET_ALL)

continue_anyway = input("\nContinue processing despite backup failures? (Y/N): ").strip().lower()
if continue_anyway != 'y':
print("Operation cancelled due to backup failures.")
return False

print(Fore.GREEN + f"\nSuccessfully backed up {len(backup_info['backed_up_repos'])} repositories" + Style.RESET_ALL)
print(Fore.GREEN + f"Backup location: {backup_info['backup_dir']}" + Style.RESET_ALL)

# Process each file through the agentic workflow
print(Fore.BLUE + f"\n{'='*60}" + Style.RESET_ALL)
print(Fore.BLUE + "STARTING FILE PROCESSING" + Style.RESET_ALL)
print(Fore.BLUE + f"{'='*60}" + Style.RESET_ALL)

processed_files, failed_files = process_java_files_with_workflow(
file_paths, settings, db_manager, prompt_manager, langgraph
)

# Create comprehensive processing summary
summary_file = create_processing_summary(processed_files, backup_info)

# Generate summary report
print(Fore.BLUE + "\n" + "="*80 + Style.RESET_ALL)
print(Fore.BLUE + "BATCH PROCESSING SUMMARY" + Style.RESET_ALL)
print(Fore.BLUE + "="*80 + Style.RESET_ALL)

# Backup summary
print(Fore.CYAN + "Repository Backup Summary:" + Style.RESET_ALL)
print(f" Backup timestamp: {backup_info['timestamp']}")
print(f" Backup location: {backup_info['backup_dir']}")
print(f" Repositories backed up: {len(backup_info['backed_up_repos'])}")
if backup_info['failed_backups']:
print(f" Failed backups: {len(backup_info['failed_backups'])}")

# Processing summary
print(Fore.CYAN + "\nFile Processing Summary:" + Style.RESET_ALL)
print(f" Total files processed: {len(processed_files)}")
print(f" Failed files: {len(failed_files)}")

# Categorize results
successful_refactoring = [f for f in processed_files if f['status'] == 'success']
no_refactoring_needed = [f for f in processed_files if f['status'] == 'no_refactoring']
files_with_antipatterns = [f for f in processed_files if f.get('antipatterns_found', False)]
total_antipatterns = sum(f.get('antipatterns_count', 0) for f in processed_files)

print(Fore.GREEN + f" Successfully refactored: {len(successful_refactoring)}" + Style.RESET_ALL)
print(Fore.YELLOW + f" No refactoring needed: {len(no_refactoring_needed)}" + Style.RESET_ALL)
print(Fore.RED + f" Failed: {len(failed_files)}" + Style.RESET_ALL)
print(Fore.MAGENTA + f" Files with anti-patterns: {len(files_with_antipatterns)}" + Style.RESET_ALL)
print(Fore.MAGENTA + f" Total anti-patterns found: {total_antipatterns}" + Style.RESET_ALL)

# Statistics
if processed_files:
refactor_rate = len(successful_refactoring) / len(processed_files) * 100
antipattern_rate = len(files_with_antipatterns) / len(processed_files) * 100


print(Fore.CYAN + "\nProcessing Statistics:" + Style.RESET_ALL)
print(f" Refactoring success rate: {refactor_rate:.1f}%")
print(f" Anti-pattern detection rate: {antipattern_rate:.1f}%")
print(f" Total anti-patterns found: {total_antipatterns}")

# Show detailed results
if successful_refactoring:
print(Fore.GREEN + "\nSuccessfully refactored files:" + Style.RESET_ALL)
for file_info in successful_refactoring:
antipatterns_info = f" (antipatterns: {file_info.get('antipatterns_count', 0)})" if file_info.get('antipatterns_count', 0) > 0 else ""
print(f"{file_info['file_path']} (reviews: {file_info['code_review_times']}){antipatterns_info}")

if failed_files:
print(Fore.RED + "\nFailed files:" + Style.RESET_ALL)
for file_path in failed_files:
print(f"{file_path}")

print(Fore.GREEN + f"\nBatch processing complete!" + Style.RESET_ALL)
print(Fore.CYAN + f"Repository backups available at: {backup_info['backup_dir']}" + Style.RESET_ALL)
print(f"To restore a repository, copy from backup directory back to original location.")

# Intermediate results information
print(Fore.MAGENTA + f"\nIntermediate Results:" + Style.RESET_ALL)
print(f" Individual file analysis results saved in: ../processing_results/")
if summary_file:
print(f" Comprehensive summary saved: {Path(summary_file).name}")
36 changes: 27 additions & 9 deletions AntiPattern_Remediator/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@

"""
Main entry point - Legacy Code Migration Tool
"""
from config.settings import initialize_settings
# from scripts import seed_database
from dotenv import load_dotenv
load_dotenv()
from colorama import Fore, Style
import os
from pathlib import Path
from full_repo_workflow import run_full_repo_workflow



def main():
"""Main function: Run antipattern analysis"""
Expand All @@ -18,9 +21,9 @@ def main():
provider_map = {"1": "ollama", "2": "ibm", "3": "vllm"}
provider = provider_map.get(choice, "ollama") # default to ollama

#Let us choose which DB to interact with
# Let us choose which DB to interact with
print("Choose your trove: 1) ChromaDB (VectorDB) 2) TinyDB (DocumentDB)")
db_choice = input("Choose 1 or 2: ").strip()
db_choice = input("Choose 1 or 2: ").strip()

# Initialize global settings with selected provider
settings = initialize_settings(provider)
Expand Down Expand Up @@ -74,6 +77,8 @@ def main():
}
}
"""

# Initial workflow state
initial_state = {
"code": legacy_code,
"context": None,
Expand All @@ -84,30 +89,43 @@ def main():
"code_review_results": None,
"code_review_times": 0,
"msgs": [],
"answer": None
"answer": None,

# ExplainerAgent fields
"explanation_response_raw": None,
"explanation_json": None,
}

#Setup Database
# Setup Database
if db_choice == "2":
print("Seeding TinyDB with AntiPattern Dataset")
seed_database.main()
db_manager = TinyDBManager()
print("Using TinyDB for knowledge retreival")
print("Using TinyDB for knowledge retrieval")
else:
vector_db = VectorDBManager()
db_manager = vector_db.get_db()
print("Using ChromaDB for knowledge retreival")
print("Using ChromaDB for knowledge retrieval")

retriever = db_manager.as_retriever()
langgraph = CreateGraph(db_manager, prompt_manager, retriever=retriever).workflow
final_state = langgraph.invoke(initial_state)

# Final results summary
print(Fore.GREEN + f"\nAnalysis Complete!" + Style.RESET_ALL)
print(f"Final state keys: {list(final_state.keys())}")
print(f"Context retrieved: {'Yes' if final_state.get('context') else 'No'}")
print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}")
print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}")
print(f"Code review results: {final_state.get('code_review_times')}")

# Show explanation from ExplainerAgent
if final_state.get("explanation_json"):
import json
print(Fore.CYAN + "\n=== Explanation (JSON) ===" + Style.RESET_ALL)
print(json.dumps(final_state["explanation_json"], indent=2, ensure_ascii=False))
else:
print(Fore.RED + "\nNo explanation was generated." + Style.RESET_ALL)


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion AntiPattern_Remediator/src/core/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from .refactor_strategist import RefactorStrategist
from .code_transformer import CodeTransformer
from .code_reviewer import CodeReviewerAgent
from .explainer import ExplainerAgent

__all__ = [
"AntipatternScanner",
"RefactorStrategist",
"CodeTransformer",
"CodeReviewerAgent"
"CodeReviewerAgent",
"ExplainerAgent"
]
Loading