import os
from pathlib import Path
from dataclasses import dataclass, field
 
 
@dataclass
class ModuleInfo:
    path: str
    file_count: int
    total_lines: int
    languages: set[str] = field(default_factory=set)
    entry_points: list[str] = field(default_factory=list)
    key_files: list[str] = field(default_factory=list)
 
 
class CodebaseExplorer:
    """코드베이스를 계층적으로 탐색하는 도구"""
 
    LANG_EXTENSIONS = {
        ".py": "Python",
        ".ts": "TypeScript",
        ".tsx": "TypeScript/React",
        ".js": "JavaScript",
        ".java": "Java",
        ".go": "Go",
        ".rs": "Rust",
    }
 
    IGNORE_DIRS = {
        "node_modules", ".git", "__pycache__",
        ".next", "dist", "build", ".venv",
    }
 
    def __init__(self, root: str):
        self.root = Path(root)
 
    def scan_structure(self) -> dict[str, ModuleInfo]:
        """1단계: 디렉터리 구조 스캔"""
        modules: dict[str, ModuleInfo] = {}
 
        for dirpath, dirnames, filenames in os.walk(self.root):
            # 무시할 디렉터리 제거
            dirnames[:] = [
                d for d in dirnames
                if d not in self.IGNORE_DIRS
            ]
 
            rel_path = os.path.relpath(dirpath, self.root)
            if rel_path == ".":
                continue
 
            code_files = [
                f for f in filenames
                if Path(f).suffix in self.LANG_EXTENSIONS
            ]
 
            if code_files:
                total_lines = sum(
                    self._count_lines(Path(dirpath) / f)
                    for f in code_files
                )
                languages = {
                    self.LANG_EXTENSIONS[Path(f).suffix]
                    for f in code_files
                    if Path(f).suffix in self.LANG_EXTENSIONS
                }
 
                modules[rel_path] = ModuleInfo(
                    path=rel_path,
                    file_count=len(code_files),
                    total_lines=total_lines,
                    languages=languages,
                    entry_points=self._find_entry_points(
                        Path(dirpath), code_files
                    ),
                    key_files=self._identify_key_files(code_files),
                )
 
        return modules
 
    def _count_lines(self, filepath: Path) -> int:
        try:
            return len(filepath.read_text().splitlines())
        except (UnicodeDecodeError, PermissionError):
            return 0
 
    def _find_entry_points(
        self, dirpath: Path, files: list[str]
    ) -> list[str]:
        entry_patterns = [
            "main.py", "app.py", "index.ts", "index.tsx",
            "server.py", "main.ts", "cli.py",
        ]
        return [f for f in files if f in entry_patterns]
 
    def _identify_key_files(self, files: list[str]) -> list[str]:
        """설정 파일, 라우터, 모델 등 핵심 파일 식별"""
        key_patterns = [
            "config", "router", "model", "schema",
            "middleware", "service", "controller",
        ]
        return [
            f for f in files
            if any(p in f.lower() for p in key_patterns)
        ]

진입점 기반 탐색

코드베이스를 이해하는 가장 효과적인 방법은 진입점(entry point)에서 시작하여 호출 그래프를 따라가는 것입니다.

entry_point_tracer.py

python

import ast
from dataclasses import dataclass
 
 
@dataclass
class CallChain:
    caller: str
    callee: str
    file_path: str
    line_number: int
 
 
class EntryPointTracer(ast.NodeVisitor):
    """진입점에서 시작하여 호출 체인을 추적"""
 
    def __init__(self):
        self.call_chains: list[CallChain] = []
        self._current_function: str = "<module>"
 
    def visit_FunctionDef(self, node: ast.FunctionDef):
        previous = self._current_function
        self._current_function = node.name
        self.generic_visit(node)
        self._current_function = previous
 
    def visit_Call(self, node: ast.Call):
        callee_name = self._extract_call_name(node)
        if callee_name:
            self.call_chains.append(CallChain(
                caller=self._current_function,
                callee=callee_name,
                file_path="",  # 외부에서 설정
                line_number=node.lineno,
            ))
        self.generic_visit(node)
 
    def _extract_call_name(self, node: ast.Call) -> str | None:
        if isinstance(node.func, ast.Name):
            return node.func.id
        elif isinstance(node.func, ast.Attribute):
            return f"{ast.unparse(node.func.value)}.{node.func.attr}"
        return None

함수 및 모듈 설명 자동 생성

LLM 기반 함수 설명 생성

AST에서 추출한 함수 정보와 소스 코드를 LLM에 전달하여 자연어 설명을 생성합니다.

doc_generator.py

python

from dataclasses import dataclass
 
 
@dataclass
class FunctionDoc:
    name: str
    summary: str
    parameters: list[dict]
    returns: str
    side_effects: list[str]
    complexity_note: str | None
 
 
FUNCTION_DOC_PROMPT = """다음 함수를 분석하고 한국어로 문서를 생성하세요.
 
함수 코드:
---
{code}
---
 
호출 컨텍스트:
- 이 함수를 호출하는 함수: {callers}
- 이 함수가 호출하는 함수: {callees}
 
다음 형식으로 응답하세요:
1. 요약: 이 함수가 하는 일을 한 문장으로
2. 매개변수: 각 매개변수의 역할
3. 반환값: 반환값의 의미
4. 부수 효과: 외부 상태 변경 여부
5. 복잡도 참고: 리팩터링이 필요한 경우 이유"""
 
 
class DocumentationGenerator:
    """LLM을 활용한 코드 문서 자동 생성기"""
 
    def __init__(self, llm_client):
        self.llm_client = llm_client
 
    async def generate_function_doc(
        self,
        code: str,
        callers: list[str],
        callees: list[str],
    ) -> FunctionDoc:
        prompt = FUNCTION_DOC_PROMPT.format(
            code=code,
            callers=", ".join(callers) if callers else "없음",
            callees=", ".join(callees) if callees else "없음",
        )
 
        response = await self.llm_client.generate(prompt)
        return self._parse_response(response)
 
    async def generate_module_overview(
        self,
        module_path: str,
        functions: list[dict],
        imports: list[str],
        exported_symbols: list[str],
    ) -> str:
        prompt = f"""다음 모듈을 분석하고 개요를 생성하세요.
 
모듈 경로: {module_path}
함수 목록: {', '.join(f['name'] for f in functions)}
Import 목록: {', '.join(imports)}
Export 목록: {', '.join(exported_symbols)}
 
다음을 포함하세요:
1. 모듈의 핵심 역할 (한 문장)
2. 주요 기능 요약 (불릿 포인트)
3. 다른 모듈과의 관계
4. 주의사항"""
 
        return await self.llm_client.generate(prompt)
 
    def _parse_response(self, response: str) -> FunctionDoc:
        # LLM 응답 파싱 로직 (간략화)
        lines = response.strip().split("\n")
        return FunctionDoc(
            name="",
            summary=lines[0] if lines else "",
            parameters=[],
            returns="",
            side_effects=[],
            complexity_note=None,
        )

Tip

함수 문서를 생성할 때 호출 컨텍스트(callers, callees)를 함께 제공하면 LLM이 함수의 역할을 더 정확하게 이해합니다. 고립된 함수 코드만으로는 비즈니스 의미를 파악하기 어렵습니다.

의존성 그래프 추출

모듈 간 의존성 시각화

코드베이스의 구조를 이해하려면 모듈 간 의존관계를 시각적으로 파악하는 것이 중요합니다.

dependency_graph.py

python

import ast
from collections import defaultdict
from pathlib import Path
 
 
class DependencyGraphBuilder:
    """모듈 간 의존성 그래프를 생성하는 도구"""
 
    def __init__(self, project_root: str):
        self.root = Path(project_root)
        self.edges: list[tuple[str, str]] = []
        self.node_metadata: dict[str, dict] = {}
 
    def build(self) -> dict:
        for py_file in self.root.rglob("*.py"):
            if any(p in str(py_file) for p in ["__pycache__", ".venv"]):
                continue
 
            module = self._path_to_module(py_file)
            imports = self._extract_imports(py_file)
 
            self.node_metadata[module] = {
                "lines": self._count_lines(py_file),
                "type": self._classify_module(py_file),
            }
 
            for imp in imports:
                if self._is_internal(imp):
                    self.edges.append((module, imp))
 
        return {
            "nodes": self.node_metadata,
            "edges": self.edges,
        }
 
    def to_mermaid(self) -> str:
        """Mermaid 다이어그램 문자열 생성"""
        lines = ["graph TD"]
 
        # 노드 정의
        for node, meta in self.node_metadata.items():
            short_name = node.split(".")[-1]
            node_id = node.replace(".", "_")
            lines.append(f"    {node_id}[{short_name}]")
 
        # 엣지 정의
        for source, target in self.edges:
            src_id = source.replace(".", "_")
            tgt_id = target.replace(".", "_")
            lines.append(f"    {src_id} --> {tgt_id}")
 
        return "\n".join(lines)
 
    def _extract_imports(self, filepath: Path) -> list[str]:
        try:
            source = filepath.read_text()
            tree = ast.parse(source)
        except (SyntaxError, UnicodeDecodeError):
            return []
 
        imports = []
        for node in ast.walk(tree):
            if isinstance(node, ast.ImportFrom) and node.module:
                imports.append(node.module)
            elif isinstance(node, ast.Import):
                for alias in node.names:
                    imports.append(alias.name)
        return imports
 
    def _path_to_module(self, filepath: Path) -> str:
        relative = filepath.relative_to(self.root)
        return str(relative.with_suffix("")).replace("/", ".")
 
    def _is_internal(self, module_name: str) -> bool:
        parts = module_name.split(".")
        possible_path = self.root / "/".join(parts)
        return (
            possible_path.with_suffix(".py").exists()
            or (possible_path / "__init__.py").exists()
        )
 
    def _count_lines(self, filepath: Path) -> int:
        try:
            return len(filepath.read_text().splitlines())
        except (UnicodeDecodeError, PermissionError):
            return 0
 
    def _classify_module(self, filepath: Path) -> str:
        name = filepath.stem
        if name in ("models", "model", "schema", "schemas"):
            return "model"
        elif name in ("routes", "router", "views", "controllers"):
            return "controller"
        elif name in ("services", "service"):
            return "service"
        elif name.startswith("test_") or name.endswith("_test"):
            return "test"
        return "module"

생성된 의존성 그래프 예시

아키텍처 다이어그램 자동 생성

의존성 그래프에서 한 단계 더 나아가, LLM을 활용하면 아키텍처 수준의 다이어그램을 자동으로 생성할 수 있습니다.

architecture_diagrammer.py

python

ARCHITECTURE_PROMPT = """다음은 프로젝트의 모듈 의존성 정보입니다.
 
모듈 목록:
{modules}
 
의존관계:
{dependencies}
 
이 정보를 바탕으로 다음을 생성하세요:
 
1. 시스템 아키텍처 설명 (3-5문장)
2. 아키텍처 패턴 식별 (MVC, 레이어드, 헥사고날 등)
3. Mermaid 다이어그램 (레이어별로 그룹핑)
4. 잠재적 아키텍처 문제점"""
 
 
class ArchitectureDiagrammer:
    """의존성 정보를 기반으로 아키텍처 다이어그램 생성"""
 
    def __init__(self, llm_client):
        self.llm_client = llm_client
 
    async def generate_architecture(
        self,
        graph: dict,
    ) -> dict:
        modules_desc = "\n".join(
            f"- {name}: {meta['type']} ({meta['lines']}줄)"
            for name, meta in graph["nodes"].items()
        )
 
        deps_desc = "\n".join(
            f"- {src} -> {tgt}"
            for src, tgt in graph["edges"]
        )
 
        prompt = ARCHITECTURE_PROMPT.format(
            modules=modules_desc,
            dependencies=deps_desc,
        )
 
        response = await self.llm_client.generate(prompt)
 
        return {
            "description": response,
            "generated_at": "auto",
        }

Info

LLM이 생성한 아키텍처 다이어그램은 코드베이스의 "현재 상태"를 반영합니다. 설계 의도와 다를 수 있으며, 이러한 차이 자체가 아키텍처 드리프트(Architecture Drift)를 발견하는 단서가 됩니다.

인라인 주석과 문서 자동 생성

JSDoc / docstring 자동 생성

jsdoc-generator.ts

typescript

import { Project, FunctionDeclaration } from "ts-morph";
 
interface GeneratedDoc {
  functionName: string;
  jsdoc: string;
  insertLine: number;
}
 
async function generateJSDocs(
  project: Project,
  filePath: string,
  llmClient: LLMClient,
): Promise<GeneratedDoc[]> {
  const sourceFile = project.getSourceFileOrThrow(filePath);
  const functions = sourceFile.getFunctions();
  const docs: GeneratedDoc[] = [];
 
  for (const func of functions) {
    // 이미 JSDoc이 있으면 건너뜀
    if (func.getJsDocs().length > 0) continue;
 
    const code = func.getFullText();
    const params = func.getParameters().map((p) => ({
      name: p.getName(),
      type: p.getType().getText(),
    }));
    const returnType = func.getReturnType().getText();
 
    const prompt = buildJSDocPrompt(code, params, returnType);
    const generatedJsdoc = await llmClient.generate(prompt);
 
    docs.push({
      functionName: func.getName() ?? "anonymous",
      jsdoc: generatedJsdoc,
      insertLine: func.getStartLineNumber(),
    });
  }
 
  return docs;
}
 
function buildJSDocPrompt(
  code: string,
  params: Array<{ name: string; type: string }>,
  returnType: string,
): string {
  return `다음 TypeScript 함수에 대한 JSDoc 주석을 생성하세요.
한국어로 설명하되, 매개변수명과 타입은 영어 그대로 유지하세요.
 
함수 코드:
\`\`\`typescript
${code}
\`\`\`
 
매개변수: ${params.map((p) => `${p.name}: ${p.type}`).join(", ")}
반환 타입: ${returnType}
 
JSDoc 형식으로만 응답하세요.`;
}

생성 결과 예시

example-output.ts

typescript

/**
 * 사용자의 주문 목록을 조회하고 총 금액을 계산합니다.
 * 만료된 주문은 자동으로 필터링됩니다.
 *
 * @param userId - 조회할 사용자의 고유 식별자
 * @param options - 조회 옵션 (페이지네이션, 정렬)
 * @returns 주문 목록과 총 금액을 포함한 결과 객체
 * @throws UserNotFoundException 사용자를 찾을 수 없는 경우
 */
async function getUserOrders(
  userId: string,
  options: QueryOptions,
): Promise<OrderResult> {
  // ...
}

지식 전달 가속 파이프라인

지금까지의 도구들을 결합하면, 새로운 팀원이 코드베이스를 빠르게 이해할 수 있는 온보딩 파이프라인을 구축할 수 있습니다.

Warning

자동 생성된 문서는 반드시 사람의 검토를 거쳐야 합니다. LLM이 코드의 의도를 잘못 해석하거나, 비즈니스 맥락을 놓칠 수 있습니다. 자동 생성은 초안 작성의 가속화로 활용하고, 최종 검증은 도메인 전문가가 수행해야 합니다.

정리

레거시 코드 이해는 소프트웨어 개발에서 가장 시간이 많이 소요되는 작업 중 하나입니다. LLM과 AST를 결합한 자동화 도구를 활용하면 코드베이스 탐색, 함수 설명 생성, 의존성 그래프 추출, 아키텍처 다이어그램 생성, JSDoc/docstring 생성까지 체계적으로 자동화할 수 있습니다.

핵심은 AST가 제공하는 정확한 구조 정보를 LLM의 의미 분석 능력과 결합하는 것입니다. 이를 통해 수 주가 걸리던 코드베이스 이해를 수 일로 단축하고, 지식 전달의 병목을 해소할 수 있습니다.

다음 장 미리보기

4장에서는 코드의 문제를 체계적으로 탐지하는 방법을 다룹니다. 코드 스멜(Code Smell)의 분류 체계, LLM 기반 스멜 탐지 기법, 그리고 CodeScene의 Code Health 메트릭을 활용한 기술 부채 정량화 방법을 학습합니다.

이 글이 도움이 되셨나요?

AI / ML

4장: 코드 스멜 감지와 기술 부채 정량화

LLM 기반 코드 스멜 탐지와 CodeScene Code Health 메트릭을 활용한 기술 부채 정량화를 학습합니다. 우선순위 기반 리팩터링 계획 수립까지 다룹니다.

2026년 3월 11일·17분

AI / ML

2장: AST와 LLM 하이브리드 분석

AST 기반 정적 분석과 LLM의 의미 분석을 결합하는 하이브리드 접근법을 학습합니다. cAST 청킹, 순환 복잡도, 결합도/응집도 메트릭을 Python과 TypeScript로 실습합니다.

2026년 3월 7일·17분

AI / ML

5장: LLM 기반 자동 리팩터링

LLM을 활용한 자동 리팩터링의 패턴, 멀티에이전트 아키텍처, 검증 파이프라인을 학습합니다. 37%에서 98%로 정밀도를 끌어올리는 실전 기법을 다룹니다.

2026년 3월 13일·15분

2026년 3월 9일·AI / ML·

3장: 레거시 코드 이해와 문서화

18분1,301자10개 섹션

code-quality ai llm devtools

code-analysis3 / 10

1 2 3 4 5 6 7 8 9 10

이전2장: AST와 LLM 하이브리드 분석 다음4장: 코드 스멜 감지와 기술 부채 정량화

학습 목표

LLM 기반 코드베이스 자동 탐색의 전략과 기법을 이해합니다
함수 및 모듈 수준의 설명을 자동 생성하는 방법을 학습합니다
의존성 그래프를 추출하고 아키텍처 다이어그램을 자동 생성합니다
인라인 주석, JSDoc, docstring을 자동으로 생성하는 파이프라인을 구축합니다

python

import os
from pathlib import Path
from dataclasses import dataclass, field
 
 
@dataclass
class ModuleInfo:
    path: str
    file_count: int
    total_lines: int
    languages: set[str] = field(default_factory=set)
    entry_points: list[str] = field(default_factory=list)
    key_files: list[str] = field(default_factory=list)
 
 
class CodebaseExplorer:
    """코드베이스를 계층적으로 탐색하는 도구"""
 
    LANG_EXTENSIONS = {
        ".py": "Python",
        ".ts": "TypeScript",
        ".tsx": "TypeScript/React",
        ".js": "JavaScript",
        ".java": "Java",
        ".go": "Go",
        ".rs": "Rust",
    }
 
    IGNORE_DIRS = {
        "node_modules", ".git", "__pycache__",
        ".next", "dist", "build", ".venv",
    }
 
    def __init__(self, root: str):
        self.root = Path(root)
 
    def scan_structure(self) -> dict[str, ModuleInfo]:
        """1단계: 디렉터리 구조 스캔"""
        modules: dict[str, ModuleInfo] = {}
 
        for dirpath, dirnames, filenames in os.walk(self.root):
            # 무시할 디렉터리 제거
            dirnames[:] = [
                d for d in dirnames
                if d not in self.IGNORE_DIRS
            ]
 
            rel_path = os.path.relpath(dirpath, self.root)
            if rel_path == ".":
                continue
 
            code_files = [
                f for f in filenames
                if Path(f).suffix in self.LANG_EXTENSIONS
            ]
 
            if code_files:
                total_lines = sum(
                    self._count_lines(Path(dirpath) / f)
                    for f in code_files
                )
                languages = {
                    self.LANG_EXTENSIONS[Path(f).suffix]
                    for f in code_files
                    if Path(f).suffix in self.LANG_EXTENSIONS
                }
 
                modules[rel_path] = ModuleInfo(
                    path=rel_path,
                    file_count=len(code_files),
                    total_lines=total_lines,
                    languages=languages,
                    entry_points=self._find_entry_points(
                        Path(dirpath), code_files
                    ),
                    key_files=self._identify_key_files(code_files),
                )
 
        return modules
 
    def _count_lines(self, filepath: Path) -> int:
        try:
            return len(filepath.read_text().splitlines())
        except (UnicodeDecodeError, PermissionError):
            return 0
 
    def _find_entry_points(
        self, dirpath: Path, files: list[str]
    ) -> list[str]:
        entry_patterns = [
            "main.py", "app.py", "index.ts", "index.tsx",
            "server.py", "main.ts", "cli.py",
        ]
        return [f for f in files if f in entry_patterns]
 
    def _identify_key_files(self, files: list[str]) -> list[str]:
        """설정 파일, 라우터, 모델 등 핵심 파일 식별"""
        key_patterns = [
            "config", "router", "model", "schema",
            "middleware", "service", "controller",
        ]
        return [
            f for f in files
            if any(p in f.lower() for p in key_patterns)
        ]

진입점 기반 탐색

코드베이스를 이해하는 가장 효과적인 방법은 진입점(entry point)에서 시작하여 호출 그래프를 따라가는 것입니다.

entry_point_tracer.py

python

import ast
from dataclasses import dataclass
 
 
@dataclass
class CallChain:
    caller: str
    callee: str
    file_path: str
    line_number: int
 
 
class EntryPointTracer(ast.NodeVisitor):
    """진입점에서 시작하여 호출 체인을 추적"""
 
    def __init__(self):
        self.call_chains: list[CallChain] = []
        self._current_function: str = "<module>"
 
    def visit_FunctionDef(self, node: ast.FunctionDef):
        previous = self._current_function
        self._current_function = node.name
        self.generic_visit(node)
        self._current_function = previous
 
    def visit_Call(self, node: ast.Call):
        callee_name = self._extract_call_name(node)
        if callee_name:
            self.call_chains.append(CallChain(
                caller=self._current_function,
                callee=callee_name,
                file_path="",  # 외부에서 설정
                line_number=node.lineno,
            ))
        self.generic_visit(node)
 
    def _extract_call_name(self, node: ast.Call) -> str | None:
        if isinstance(node.func, ast.Name):
            return node.func.id
        elif isinstance(node.func, ast.Attribute):
            return f"{ast.unparse(node.func.value)}.{node.func.attr}"
        return None

함수 및 모듈 설명 자동 생성

LLM 기반 함수 설명 생성

AST에서 추출한 함수 정보와 소스 코드를 LLM에 전달하여 자연어 설명을 생성합니다.

doc_generator.py

python

from dataclasses import dataclass
 
 
@dataclass
class FunctionDoc:
    name: str
    summary: str
    parameters: list[dict]
    returns: str
    side_effects: list[str]
    complexity_note: str | None
 
 
FUNCTION_DOC_PROMPT = """다음 함수를 분석하고 한국어로 문서를 생성하세요.
 
함수 코드:
---
{code}
---
 
호출 컨텍스트:
- 이 함수를 호출하는 함수: {callers}
- 이 함수가 호출하는 함수: {callees}
 
다음 형식으로 응답하세요:
1. 요약: 이 함수가 하는 일을 한 문장으로
2. 매개변수: 각 매개변수의 역할
3. 반환값: 반환값의 의미
4. 부수 효과: 외부 상태 변경 여부
5. 복잡도 참고: 리팩터링이 필요한 경우 이유"""
 
 
class DocumentationGenerator:
    """LLM을 활용한 코드 문서 자동 생성기"""
 
    def __init__(self, llm_client):
        self.llm_client = llm_client
 
    async def generate_function_doc(
        self,
        code: str,
        callers: list[str],
        callees: list[str],
    ) -> FunctionDoc:
        prompt = FUNCTION_DOC_PROMPT.format(
            code=code,
            callers=", ".join(callers) if callers else "없음",
            callees=", ".join(callees) if callees else "없음",
        )
 
        response = await self.llm_client.generate(prompt)
        return self._parse_response(response)
 
    async def generate_module_overview(
        self,
        module_path: str,
        functions: list[dict],
        imports: list[str],
        exported_symbols: list[str],
    ) -> str:
        prompt = f"""다음 모듈을 분석하고 개요를 생성하세요.
 
모듈 경로: {module_path}
함수 목록: {', '.join(f['name'] for f in functions)}
Import 목록: {', '.join(imports)}
Export 목록: {', '.join(exported_symbols)}
 
다음을 포함하세요:
1. 모듈의 핵심 역할 (한 문장)
2. 주요 기능 요약 (불릿 포인트)
3. 다른 모듈과의 관계
4. 주의사항"""
 
        return await self.llm_client.generate(prompt)
 
    def _parse_response(self, response: str) -> FunctionDoc:
        # LLM 응답 파싱 로직 (간략화)
        lines = response.strip().split("\n")
        return FunctionDoc(
            name="",
            summary=lines[0] if lines else "",
            parameters=[],
            returns="",
            side_effects=[],
            complexity_note=None,
        )

Tip

의존성 그래프 추출

모듈 간 의존성 시각화

코드베이스의 구조를 이해하려면 모듈 간 의존관계를 시각적으로 파악하는 것이 중요합니다.

dependency_graph.py

python

import ast
from collections import defaultdict
from pathlib import Path
 
 
class DependencyGraphBuilder:
    """모듈 간 의존성 그래프를 생성하는 도구"""
 
    def __init__(self, project_root: str):
        self.root = Path(project_root)
        self.edges: list[tuple[str, str]] = []
        self.node_metadata: dict[str, dict] = {}
 
    def build(self) -> dict:
        for py_file in self.root.rglob("*.py"):
            if any(p in str(py_file) for p in ["__pycache__", ".venv"]):
                continue
 
            module = self._path_to_module(py_file)
            imports = self._extract_imports(py_file)
 
            self.node_metadata[module] = {
                "lines": self._count_lines(py_file),
                "type": self._classify_module(py_file),
            }
 
            for imp in imports:
                if self._is_internal(imp):
                    self.edges.append((module, imp))
 
        return {
            "nodes": self.node_metadata,
            "edges": self.edges,
        }
 
    def to_mermaid(self) -> str:
        """Mermaid 다이어그램 문자열 생성"""
        lines = ["graph TD"]
 
        # 노드 정의
        for node, meta in self.node_metadata.items():
            short_name = node.split(".")[-1]
            node_id = node.replace(".", "_")
            lines.append(f"    {node_id}[{short_name}]")
 
        # 엣지 정의
        for source, target in self.edges:
            src_id = source.replace(".", "_")
            tgt_id = target.replace(".", "_")
            lines.append(f"    {src_id} --> {tgt_id}")
 
        return "\n".join(lines)
 
    def _extract_imports(self, filepath: Path) -> list[str]:
        try:
            source = filepath.read_text()
            tree = ast.parse(source)
        except (SyntaxError, UnicodeDecodeError):
            return []
 
        imports = []
        for node in ast.walk(tree):
            if isinstance(node, ast.ImportFrom) and node.module:
                imports.append(node.module)
            elif isinstance(node, ast.Import):
                for alias in node.names:
                    imports.append(alias.name)
        return imports
 
    def _path_to_module(self, filepath: Path) -> str:
        relative = filepath.relative_to(self.root)
        return str(relative.with_suffix("")).replace("/", ".")
 
    def _is_internal(self, module_name: str) -> bool:
        parts = module_name.split(".")
        possible_path = self.root / "/".join(parts)
        return (
            possible_path.with_suffix(".py").exists()
            or (possible_path / "__init__.py").exists()
        )
 
    def _count_lines(self, filepath: Path) -> int:
        try:
            return len(filepath.read_text().splitlines())
        except (UnicodeDecodeError, PermissionError):
            return 0
 
    def _classify_module(self, filepath: Path) -> str:
        name = filepath.stem
        if name in ("models", "model", "schema", "schemas"):
            return "model"
        elif name in ("routes", "router", "views", "controllers"):
            return "controller"
        elif name in ("services", "service"):
            return "service"
        elif name.startswith("test_") or name.endswith("_test"):
            return "test"
        return "module"

생성된 의존성 그래프 예시

아키텍처 다이어그램 자동 생성

의존성 그래프에서 한 단계 더 나아가, LLM을 활용하면 아키텍처 수준의 다이어그램을 자동으로 생성할 수 있습니다.

architecture_diagrammer.py

python

ARCHITECTURE_PROMPT = """다음은 프로젝트의 모듈 의존성 정보입니다.
 
모듈 목록:
{modules}
 
의존관계:
{dependencies}
 
이 정보를 바탕으로 다음을 생성하세요:
 
1. 시스템 아키텍처 설명 (3-5문장)
2. 아키텍처 패턴 식별 (MVC, 레이어드, 헥사고날 등)
3. Mermaid 다이어그램 (레이어별로 그룹핑)
4. 잠재적 아키텍처 문제점"""
 
 
class ArchitectureDiagrammer:
    """의존성 정보를 기반으로 아키텍처 다이어그램 생성"""
 
    def __init__(self, llm_client):
        self.llm_client = llm_client
 
    async def generate_architecture(
        self,
        graph: dict,
    ) -> dict:
        modules_desc = "\n".join(
            f"- {name}: {meta['type']} ({meta['lines']}줄)"
            for name, meta in graph["nodes"].items()
        )
 
        deps_desc = "\n".join(
            f"- {src} -> {tgt}"
            for src, tgt in graph["edges"]
        )
 
        prompt = ARCHITECTURE_PROMPT.format(
            modules=modules_desc,
            dependencies=deps_desc,
        )
 
        response = await self.llm_client.generate(prompt)
 
        return {
            "description": response,
            "generated_at": "auto",
        }

Info

인라인 주석과 문서 자동 생성

JSDoc / docstring 자동 생성

jsdoc-generator.ts

typescript

import { Project, FunctionDeclaration } from "ts-morph";
 
interface GeneratedDoc {
  functionName: string;
  jsdoc: string;
  insertLine: number;
}
 
async function generateJSDocs(
  project: Project,
  filePath: string,
  llmClient: LLMClient,
): Promise<GeneratedDoc[]> {
  const sourceFile = project.getSourceFileOrThrow(filePath);
  const functions = sourceFile.getFunctions();
  const docs: GeneratedDoc[] = [];
 
  for (const func of functions) {
    // 이미 JSDoc이 있으면 건너뜀
    if (func.getJsDocs().length > 0) continue;
 
    const code = func.getFullText();
    const params = func.getParameters().map((p) => ({
      name: p.getName(),
      type: p.getType().getText(),
    }));
    const returnType = func.getReturnType().getText();
 
    const prompt = buildJSDocPrompt(code, params, returnType);
    const generatedJsdoc = await llmClient.generate(prompt);
 
    docs.push({
      functionName: func.getName() ?? "anonymous",
      jsdoc: generatedJsdoc,
      insertLine: func.getStartLineNumber(),
    });
  }
 
  return docs;
}
 
function buildJSDocPrompt(
  code: string,
  params: Array<{ name: string; type: string }>,
  returnType: string,
): string {
  return `다음 TypeScript 함수에 대한 JSDoc 주석을 생성하세요.
한국어로 설명하되, 매개변수명과 타입은 영어 그대로 유지하세요.
 
함수 코드:
\`\`\`typescript
${code}
\`\`\`
 
매개변수: ${params.map((p) => `${p.name}: ${p.type}`).join(", ")}
반환 타입: ${returnType}
 
JSDoc 형식으로만 응답하세요.`;
}

생성 결과 예시

example-output.ts

typescript

/**
 * 사용자의 주문 목록을 조회하고 총 금액을 계산합니다.
 * 만료된 주문은 자동으로 필터링됩니다.
 *
 * @param userId - 조회할 사용자의 고유 식별자
 * @param options - 조회 옵션 (페이지네이션, 정렬)
 * @returns 주문 목록과 총 금액을 포함한 결과 객체
 * @throws UserNotFoundException 사용자를 찾을 수 없는 경우
 */
async function getUserOrders(
  userId: string,
  options: QueryOptions,
): Promise<OrderResult> {
  // ...
}

AI / ML

2장: AST와 LLM 하이브리드 분석

2026년 3월 7일·17분

AI / ML

5장: LLM 기반 자동 리팩터링

2026년 3월 13일·15분

관련 글

4장: 코드 스멜 감지와 기술 부채 정량화

2장: AST와 LLM 하이브리드 분석

5장: LLM 기반 자동 리팩터링

댓글

관련 글

4장: 코드 스멜 감지와 기술 부채 정량화

2장: AST와 LLM 하이브리드 분석

5장: LLM 기반 자동 리팩터링

댓글