Source code for paperrag.repl

"""Interactive REPL for PaperRAG.
REPL: Read, Evaluate, Print, Loop!
This mode is first class in PaperRAG
"""

from __future__ import annotations

import logging
import os
import re
from pathlib import Path

from prompt_toolkit import HTML, PromptSession
from prompt_toolkit.completion import CompleteEvent, Completer, Completion, PathCompleter
from prompt_toolkit.document import Document
from prompt_toolkit.history import FileHistory
from rich.console import Console
from rich.table import Table

from paperrag import __version__
from paperrag.config import PaperRAGConfig, load_rc, PROMPT_PRESETS, PRESET_MAX_TOKENS

console = Console()

# All slash-commands available in the REPL
SLASH_COMMANDS: list[str] = [
    "/index",
    "/focus",
    "/topk",
    "/threshold",
    "/temperature",
    "/max-tokens",
    "/ctx-size",
    "/n-gpu-layers",
    "/n-threads",
    "/prompt",
    "/preset",
    "/export",
    "/no-llm",
    "/think",
    "/model",
    "/config",
    "/rc",
    "/help",
    "/exit",
    "/quit",
]

# Maximum number of user/assistant turn pairs to keep in conversation history.
# 10 turns (20 messages) balances context for follow-ups while staying within
# typical 4096-token context windows when combined with retrieval context.
_MAX_HISTORY_TURNS = 10

HELP_TEXT = """\
[bold]Available commands:[/bold]
  [cyan]<any text>[/cyan]              Query the indexed papers (uses top-k retrieval, with LLM unless /no-llm is active)
  [cyan]/index[/cyan]                  Re-index the current PDF directory/file
  [cyan]/index <path>[/cyan]           Re-index a specific PDF file or directory
  [cyan]/focus <substring>[/cyan]     Focus all subsequent queries on a specific paper
  [cyan]/topk <n>[/cyan]               Set top-k for retrieval (default: 5)
  [cyan]/threshold <n>[/cyan]          Set similarity threshold 0.0-1.0 (default: 0.1)
  [cyan]/temperature <n>[/cyan]        Set LLM temperature 0.0-2.0 (default: 0.0)
  [cyan]/max-tokens <n>[/cyan]         Set LLM max output tokens (default: 1024)
  [cyan]/ctx-size <n>[/cyan]           Set LLM context window size (default: 4096)
  [cyan]/n-gpu-layers <n>[/cyan]       Set GPU layers for llama.cpp backend (0 = CPU only)
  [cyan]/n-threads <n>[/cyan]          Set CPU threads for llama.cpp backend (0 = auto)
  [cyan]/prompt <text>[/cyan]          Set LLM system prompt
  [cyan]/preset <name>[/cyan]         Switch to a named prompt preset: default, reviewer, summarizer, explainer
  [cyan]/export[/cyan]                 Export this session's Q&A to a markdown file (auto-named)
  [cyan]/export <path>[/cyan]          Export to a specific file path
  [cyan]/no-llm[/cyan]                 Toggle retrieval-only mode (disable LLM answers)
  [cyan]/no-llm on|off[/cyan]          Explicitly enable or disable retrieval-only mode
  [cyan]/think[/cyan]                  Toggle thinking/reasoning mode (for models like Qwen3, default: off)
  [cyan]/model <name>[/cyan]           Switch LLM model/backend: Ollama name, local .gguf path, or HF repo (e.g. Qwen/Qwen3-1.7B-GGUF)
  [cyan]/config[/cyan]                 Show current configuration
  [cyan]/rc[/cyan]                     Show loaded .paperragrc files and values
  [cyan]/help[/cyan]                   Show this help message
  [cyan]/exit[/cyan] / [cyan]/quit[/cyan]              Exit the REPL

[dim]Tip: type [bold]/[/bold] + Tab for autocomplete. In review mode, try [cyan]/preset reviewer[/cyan] then [cyan]/export[/cyan] to save your review.[/dim]
"""


# Commands whose argument is a filesystem path (file or directory)
_PATH_COMMANDS = {"/model", "/index", "/export"}

# Commands whose argument is one of the preset names
_PRESET_COMMAND = "/preset"


class _SlashCompleter(Completer):
    """Show slash-command and argument completions in the REPL.

    - Typing '/' + Tab completes the command name.
    - After '/model ', '/index ', or '/export ', Tab completes filesystem paths.
    - After '/preset ', Tab completes preset names.
    """

    def __init__(self) -> None:
        self._path_completer = PathCompleter(expanduser=True)

    def get_completions(
        self, document: Document, complete_event: CompleteEvent
    ):
        text = document.text_before_cursor
        if not text.startswith("/"):
            return

        # --- Argument completion (after command + space) ---
        if " " in text:
            cmd, _, arg = text.partition(" ")
            if cmd in _PATH_COMMANDS:
                # Delegate to PathCompleter for the argument portion
                arg_doc = Document(arg, cursor_position=len(arg))
                yield from self._path_completer.get_completions(arg_doc, complete_event)
            elif cmd == _PRESET_COMMAND:
                for name in PROMPT_PRESETS:
                    if name.startswith(arg):
                        yield Completion(name[len(arg):], start_position=0, display=name)
            return

        # --- Command name completion (no space yet) ---
        for cmd in SLASH_COMMANDS:
            if cmd.startswith(text):
                yield Completion(cmd[len(text):], start_position=0, display=cmd)

[docs] def start_repl( cfg: PaperRAGConfig | None = None, *, auto_focus: "Path | None" = None, review_mode: bool = False, output_path: "Path | None" = None, ) -> None: """Launch the interactive REPL session.""" cfg = cfg or PaperRAGConfig() pdf_dir = Path(cfg.input_dir) from paperrag.parser import discover_pdfs # Discover PDFs without logging import logging parser_logger = logging.getLogger('paperrag.parser') original_level = parser_logger.level parser_logger.setLevel(logging.WARNING) # Suppress INFO logs temporarily try: pdfs = discover_pdfs(pdf_dir) finally: parser_logger.setLevel(original_level) # Always restore log level console.print(f"\n[bold]PaperRAG[/bold] version [cyan]{__version__}[/cyan]") # Validate and display PDF directory # if not pdf_dir.exists(): # console.print(f"[yellow]Warning: PDF directory does not exist: {pdf_dir}[/yellow]") # console.print("[dim]You can specify a different directory with --input-dir <path>[/dim]\n") # else: # console.print(f"PDF directory: {pdf_dir}") from paperrag.vectorstore import VectorStore idx_dir = Path(cfg.index_dir) # Check if index exists and count indexed PDFs loaded_store = None if VectorStore.exists(idx_dir): try: loaded_store = VectorStore.load(idx_dir) indexed_count = len(loaded_store.file_hashes) unindexed_count = len(pdfs) - indexed_count if unindexed_count > 0: console.print( f"Found [green]{len(pdfs)}[/green] PDFs - " f"[yellow]{unindexed_count} unindexed[/yellow]" f" [dim]— run /index to add them[/dim]" ) else: console.print(f"Found [green]{len(pdfs)}[/green] PDFs - [green]all indexed[/green]") except Exception as e: console.print(f"Found [green]{len(pdfs)}[/green] PDFs") console.print(f"[yellow]Warning: Could not load index: {e}[/yellow]") else: console.print(f"[red]Error: No index found at {idx_dir}[/red]") console.print("Run [bold]paperrag index[/bold] to create an index before using the REPL.") import sys sys.exit(1) console.print(f"LLM: [cyan]{cfg.llm.model_name}[/cyan] [dim]top-k={cfg.retriever.top_k} threshold={cfg.retriever.score_threshold} — /config for all settings[/dim]") if review_mode: console.print( "Switch prompts: [cyan]/preset reviewer[/cyan] (or summarizer, explainer). " "Custom: [cyan]/prompt <text>[/cyan]. Save session: [cyan]/export[/cyan].\n" ) else: console.print("Type [cyan]/help[/cyan] for commands, or [cyan]/[/cyan] + Tab for autocomplete.\n") top_k = cfg.retriever.top_k focused_file: str | None = None session_log: list[dict] = [] # tracks Q&A pairs for /export conversation_history: list[dict] = [] # tracks messages for follow-up questions use_llm = True # Eagerly load the retriever (including embedding model) at startup # so the first query doesn't pay the ~6s model-loading penalty. # Pass the already-loaded store to avoid reading the index from disk twice. console.print("[dim]Loading embedding model...[/dim]", end="") retriever = _ensure_retriever(None, cfg, store=loaded_store) if retriever is not None: console.print(" [green]done[/green]") else: console.print(" [red]failed[/red]") # Pre-warm LLM so first query doesn't pay the model-loading cost from paperrag.llm import prewarm_ollama console.print(f"[dim]Warming up LLM ({cfg.llm.model_name})...[/dim]", end="") ok = prewarm_ollama(cfg.llm) console.print(" [green]done[/green]" if ok else " [dim]skipped[/dim]") # Auto-focus for single-PDF review sessions if auto_focus is not None and retriever is not None: all_files = sorted(list(retriever.store.file_hashes.keys())) matches = [f for f in all_files if Path(f).name.lower() == auto_focus.name.lower()] if matches: focused_file = matches[0] console.print(f"[green]Auto-focused on '{auto_focus.name}'[/green]") other_count = len(all_files) - 1 if other_count > 0: console.print( f"[dim]{other_count} other paper(s) also indexed — " f"/focus list to browse, /focus to search all[/dim]" ) else: console.print( f"[yellow]Warning: '{auto_focus.name}' not found in index — " f"searching all papers[/yellow]" ) console.print() # Suppress INFO logs during interactive session to keep output clean. logging.getLogger().setLevel(logging.WARNING) # Create prompt session with history and slash-command completion session = PromptSession( history=FileHistory(str(Path.home() / ".paperrag_history")), completer=_SlashCompleter(), complete_while_typing=False, # only complete on Tab ) while True: try: if focused_file: short_name = Path(focused_file).name if len(short_name) > 20: short_name = short_name[:17] + "..." prompt_text = HTML(f"paperrag <ansigreen>({short_name})</ansigreen>> ") else: prompt_text = "paperrag> " command = session.prompt(prompt_text).strip() except (EOFError, KeyboardInterrupt): console.print("\nBye!") if output_path and session_log: _export_session(session_log, output_path, focused_file=focused_file, cfg=cfg) console.print(f"[green]Session saved to {output_path}[/green]") break if not command: continue if command in ("/exit", "/quit"): console.print("Bye!") if output_path and session_log: _export_session(session_log, output_path, focused_file=focused_file, cfg=cfg) console.print(f"[green]Session saved to {output_path}[/green]") break if command == "/help": console.print(HELP_TEXT) continue cmd_parts = command.split(maxsplit=1) if cmd_parts[0] == "/index": if len(cmd_parts) == 2: new_path = cmd_parts[1].strip() path_obj = Path(new_path) if not path_obj.exists(): console.print(f"[red]Path does not exist: {new_path}[/red]") continue cfg.input_dir = str(path_obj) # Reset index_dir so it auto-derives from the new input path cfg._index_dir = None _handle_index(cfg) retriever = None # force reload after re-index focused_file = None # reset focus as index has changed continue if cmd_parts[0] == "/focus": retriever = _ensure_retriever(retriever, cfg) if retriever is None: continue # Get all unique files in index all_files = sorted(list(retriever.store.file_hashes.keys())) if len(cmd_parts) == 1: focused_file = None console.print("[green]Focus reset: searching all indexed papers.[/green]") continue arg = cmd_parts[1].strip().lower() if arg == "list": console.print(f"[bold]Indexed papers ({len(all_files)} total):[/bold]") for f in all_files[:5]: console.print(f" - {Path(f).name}") if len(all_files) > 5: console.print(f" ... and {len(all_files) - 5} others") continue # Substring/Pattern matching matches = [f for f in all_files if arg in Path(f).name.lower()] if not matches: console.print(f"[red]No indexed papers match '{arg}'[/red]") # Show a small sample to help the user console.print("[dim]Available papers (sample):[/dim]") for f in all_files[:5]: console.print(f" - {Path(f).name}") if len(all_files) > 5: console.print(f" ... and {len(all_files) - 5} others. Use [cyan]/focus list[/cyan] to see more.") elif len(matches) == 1: focused_file = matches[0] console.print(f"Focus set to: [green]{Path(focused_file).name}[/green]") else: console.print(f"[yellow]Multiple matches for '{arg}':[/yellow]") # Show all matches if reasonable, otherwise truncate display_matches = matches[:10] for f in display_matches: console.print(f" - {Path(f).name}") if len(matches) > 10: console.print(f" ... and {len(matches) - 10} other matches.") console.print("[dim]Please be more specific or copy-paste a name from above.[/dim]") continue if cmd_parts[0] == "/topk": if len(cmd_parts) == 2 and cmd_parts[1].isdigit(): top_k = int(cmd_parts[1]) cfg.retriever.top_k = top_k console.print(f"top-k set to [cyan]{top_k}[/cyan]") else: console.print("[yellow]Usage: /topk <number>[/yellow]") continue if cmd_parts[0] == "/threshold": if len(cmd_parts) == 2: try: threshold_val = float(cmd_parts[1]) if 0.0 <= threshold_val <= 1.0: cfg.retriever.score_threshold = threshold_val console.print(f"Threshold set to [cyan]{threshold_val}[/cyan]") else: console.print("[yellow]Threshold must be between 0.0 and 1.0[/yellow]") except ValueError: console.print("[yellow]Usage: /threshold <number>[/yellow]") else: console.print("[yellow]Usage: /threshold <number>[/yellow]") continue if cmd_parts[0] == "/temperature": if len(cmd_parts) == 2: try: temp_val = float(cmd_parts[1]) if 0.0 <= temp_val <= 2.0: cfg.llm.temperature = temp_val console.print(f"Temperature set to [cyan]{temp_val}[/cyan]") else: console.print("[yellow]Temperature must be between 0.0 and 2.0[/yellow]") except ValueError: console.print("[yellow]Usage: /temperature <number>[/yellow]") else: console.print("[yellow]Usage: /temperature <number>[/yellow]") continue if cmd_parts[0] == "/max-tokens": if len(cmd_parts) == 2 and cmd_parts[1].isdigit(): cfg.llm.max_tokens = int(cmd_parts[1]) console.print(f"Max tokens set to [cyan]{cfg.llm.max_tokens}[/cyan]") else: console.print("[yellow]Usage: /max-tokens <number>[/yellow]") continue if cmd_parts[0] == "/ctx-size": if len(cmd_parts) == 2 and cmd_parts[1].isdigit(): val = int(cmd_parts[1]) if val >= 512: cfg.llm.ctx_size = val console.print(f"Context size set to [cyan]{cfg.llm.ctx_size}[/cyan]") else: console.print("[yellow]Context size must be at least 512[/yellow]") else: console.print("[yellow]Usage: /ctx-size <number>[/yellow]") continue if cmd_parts[0] == "/n-gpu-layers": if len(cmd_parts) == 2 and cmd_parts[1].isdigit(): cfg.llm.n_gpu_layers = int(cmd_parts[1]) console.print(f"GPU layers set to [cyan]{cfg.llm.n_gpu_layers}[/cyan] (takes effect on next llama-server start)") else: console.print("[yellow]Usage: /n-gpu-layers <number>[/yellow]") continue if cmd_parts[0] == "/n-threads": if len(cmd_parts) == 2 and cmd_parts[1].isdigit(): cfg.llm.n_threads = int(cmd_parts[1]) label = str(cfg.llm.n_threads) if cfg.llm.n_threads > 0 else f"auto ({os.cpu_count()})" console.print(f"CPU threads set to [cyan]{label}[/cyan] (takes effect on next llama-server start)") else: console.print("[yellow]Usage: /n-threads <number> (0 = auto)[/yellow]") continue if cmd_parts[0] == "/prompt": if len(cmd_parts) == 2: cfg.llm.system_prompt = cmd_parts[1].strip() console.print(f"System prompt set to: [dim]{cfg.llm.system_prompt}[/dim]") else: console.print("[yellow]Usage: /prompt <text>[/yellow]") continue if cmd_parts[0] == "/preset": if len(cmd_parts) == 2: name = cmd_parts[1].strip().lower() if name in PROMPT_PRESETS: cfg.llm.system_prompt = PROMPT_PRESETS[name] cfg.llm.max_tokens = PRESET_MAX_TOKENS.get(name, cfg.llm.max_tokens) console.print( f"Preset [cyan]{name}[/cyan] active. " f"[dim]max_tokens={cfg.llm.max_tokens}[/dim]" ) console.print(f"[dim]Prompt: {cfg.llm.system_prompt[:80]}...[/dim]") else: valid = ", ".join(PROMPT_PRESETS.keys()) console.print(f"[yellow]Unknown preset '{name}'. Valid: {valid}[/yellow]") else: valid = ", ".join(PROMPT_PRESETS.keys()) console.print(f"[yellow]Usage: /preset <name> (valid: {valid})[/yellow]") continue if cmd_parts[0] == "/export": if not session_log: console.print("[yellow]No Q&A in this session yet.[/yellow]") continue if len(cmd_parts) == 2: export_path = Path(cmd_parts[1].strip()) else: import datetime as _dt stamp = _dt.datetime.now().strftime("%Y%m%d_%H%M%S") paper_stem = Path(focused_file).stem if focused_file else "session" export_path = Path(f"{paper_stem}_review_{stamp}.md") _export_session(session_log, export_path, focused_file=focused_file, cfg=cfg) console.print(f"[green]Session exported to {export_path}[/green]") continue if cmd_parts[0] == "/no-llm": if len(cmd_parts) == 2: arg = cmd_parts[1].strip().lower() if arg == "on": use_llm = False elif arg == "off": use_llm = True else: console.print("[yellow]Usage: /no-llm [on|off][/yellow]") continue else: use_llm = not use_llm if use_llm: console.print("LLM mode: [green]on[/green]") else: console.print("LLM mode: [yellow]off[/yellow] [dim](retrieval-only)[/dim]") continue if cmd_parts[0] == "/think": cfg.llm.think = not cfg.llm.think state = "[green]on[/green]" if cfg.llm.think else "[dim]off[/dim]" console.print(f"Thinking mode: {state}") continue if cmd_parts[0] == "/model": if len(cmd_parts) == 2: from paperrag.llm import _is_gguf_model, _is_hf_model raw = cmd_parts[1] # Expand ~ for local file paths expanded = Path(raw).expanduser() expanded_str = str(expanded) # If it's a directory, search for GGUF files inside it if expanded.is_dir(): gguf_files = sorted(expanded.rglob("*.gguf")) if not gguf_files: console.print(f"[red]No .gguf files found in: {expanded_str}[/red]") elif len(gguf_files) == 1: cfg.llm.model_name = str(gguf_files[0]) console.print(f"Model: [cyan]{gguf_files[0].name}[/cyan] Backend: [yellow]llama.cpp[/yellow]") if cfg.llm.n_gpu_layers == 0: console.print("[dim]Tip: /n-gpu-layers -1 to offload all layers to GPU[/dim]") else: console.print(f"[yellow]Multiple GGUF files found — pick one:[/yellow]") for i, f in enumerate(gguf_files, 1): console.print(f" [cyan]{i}.[/cyan] {f}") elif _is_gguf_model(expanded_str): if expanded.is_file(): cfg.llm.model_name = expanded_str console.print(f"Model: [cyan]{expanded.name}[/cyan] Backend: [yellow]llama.cpp[/yellow]") if cfg.llm.n_gpu_layers == 0: console.print("[dim]Tip: /n-gpu-layers -1 to offload all layers to GPU[/dim]") else: console.print(f"[red]GGUF file not found: {expanded_str}[/red]") elif _is_hf_model(expanded_str): cfg.llm.model_name = expanded_str console.print(f"Model: [cyan]{expanded_str}[/cyan] Backend: [yellow]llama.cpp[/yellow] [dim](GGUF downloaded from HuggingFace on first query)[/dim]") if cfg.llm.n_gpu_layers == 0: console.print("[dim]Tip: /n-gpu-layers -1 to offload all layers to GPU[/dim]") else: cfg.llm.model_name = expanded_str console.print(f"Model: [cyan]{expanded_str}[/cyan] Backend: [green]Ollama[/green]") else: console.print("[yellow]Usage: /model <model-name>[/yellow]") continue if command == "/config": console.print("\n[bold]Current Configuration:[/bold]") console.print("[bold]LLM:[/bold]") console.print(f" Model: [cyan]{cfg.llm.model_name}[/cyan]") console.print(f" Temperature: [cyan]{cfg.llm.temperature}[/cyan]") console.print(f" Max tokens: [cyan]{cfg.llm.max_tokens}[/cyan]") console.print(f" Context size: [cyan]{cfg.llm.ctx_size}[/cyan]") console.print(f" GPU layers (llama.cpp): [cyan]{cfg.llm.n_gpu_layers}[/cyan]") n_threads_label = str(cfg.llm.n_threads) if cfg.llm.n_threads > 0 else f"auto ({os.cpu_count()})" console.print(f" CPU threads (llama.cpp): [cyan]{n_threads_label}[/cyan]") think_label = "[green]on[/green]" if cfg.llm.think else "[dim]off[/dim]" console.print(f" Thinking mode: {think_label}") llm_calls_label = "[green]enabled[/green]" if use_llm else "[yellow]disabled[/yellow] [dim](retrieval-only)[/dim]" console.print(f" LLM calls: {llm_calls_label}") active_preset = next( (k for k, v in PROMPT_PRESETS.items() if v == cfg.llm.system_prompt), None ) if active_preset: console.print(f" Active preset: [cyan]{active_preset}[/cyan]") console.print(f" System prompt: [dim]{cfg.llm.system_prompt}[/dim]") console.print("[bold]Retrieval:[/bold]") console.print(f" Embed model: [cyan]{cfg.embedder.model_name}[/cyan]") console.print(f" Top-k: [cyan]{cfg.retriever.top_k}[/cyan]") console.print(f" Threshold: [cyan]{cfg.retriever.score_threshold}[/cyan]") if focused_file: console.print(f" Focus: [green]{Path(focused_file).name}[/green]\n") else: console.print(" Focus: [dim]none (searching all papers)[/dim]\n") continue if command == "/rc": global_path = Path.home() / ".paperragrc" local_path = Path.cwd() / ".paperragrc" console.print("\n[bold].paperragrc files:[/bold]") for label, rc_path in [("Global", global_path), ("Local", local_path)]: if rc_path.is_file(): rc_data = load_rc(rc_path) console.print(f" [green]{label}[/green]: {rc_path}") for k, v in rc_data.items(): console.print(f" {k} = [cyan]{v}[/cyan]") else: console.print(f" [dim]{label}[/dim]: {rc_path} [dim](not found)[/dim]") console.print() continue # Unknown slash-command: give a hint instead of treating it as a query if command.startswith("/"): console.print( f"[yellow]Unknown command: {cmd_parts[0]}. " "Type [bold]/help[/bold] to see available commands.[/yellow]" ) continue # Anything else is treated as a query retriever = _ensure_retriever(retriever, cfg) if retriever is None: continue entry = _handle_query( command, retriever, cfg, top_k=top_k, focused_file=focused_file, use_llm=use_llm, conversation_history=conversation_history if use_llm else None, ) if entry is not None: session_log.append(entry) # Update conversation history for follow-up questions if use_llm and entry.get("answer"): conversation_history.append({"role": "user", "content": command}) conversation_history.append({"role": "assistant", "content": entry["answer"]}) # Keep conversation history bounded to avoid exceeding context limits if len(conversation_history) > _MAX_HISTORY_TURNS * 2: conversation_history[:] = conversation_history[-_MAX_HISTORY_TURNS * 2:]
def _ensure_retriever(retriever, cfg: PaperRAGConfig, store=None): """Lazy-load the retriever, returning None on failure.""" if retriever is not None: return retriever try: from paperrag.retriever import Retriever return Retriever(cfg, store=store) except FileNotFoundError as exc: console.print(f"[red]{exc}[/red]") return None def _handle_query( question: str, retriever, cfg: PaperRAGConfig, *, top_k: int, focused_file: str | None = None, use_llm: bool = True, conversation_history: list[dict] | None = None, ) -> "dict | None": """Run retrieval and LLM for a user question. Returns a session log entry dict on success, or None if no results / error. """ import time t0 = time.perf_counter() results = retriever.retrieve(question, top_k=top_k, file_path=focused_file) t_retrieval = time.perf_counter() - t0 if not results: # When focused on a single paper, fall back to full-document context # instead of giving up — this mimics llama-server's behavior of having # the whole paper in context. if use_llm and focused_file: all_chunks = retriever.get_all_chunks_for_file(focused_file) if all_chunks: console.print("[dim](No retrieval match — using full paper context)[/dim]") results = all_chunks t_retrieval = time.perf_counter() - t0 if not results: # Only use conversation history for follow-ups if the last assistant # turn is recent (i.e., the user is likely asking a follow-up about # the same topic, not a brand new unrelated question). if use_llm and conversation_history and len(conversation_history) >= 2: return _handle_followup(question, cfg, conversation_history, t0) msg = "[yellow]No results found.[/yellow]" if cfg.retriever.score_threshold > 0.1: msg += f" [dim](threshold={cfg.retriever.score_threshold} — try /threshold 0.1 to widen the search)[/dim]" console.print(msg) return None if not use_llm: console.print(f"\n[bold]Retrieved Chunks[/bold] [dim]({t_retrieval:.2f}s)[/dim]") entries: list[str] = [] sources: list[str] = [] seen_source_paths: set[str] = set() for i, result in enumerate(results, start=1): filename = Path(result.file_path).name if result.file_path not in seen_source_paths: seen_source_paths.add(result.file_path) sources.append(filename) snippet = re.sub(r"\s+", " ", result.text).strip() if len(snippet) > 200: snippet = snippet[:197].rstrip() + "..." console.print( f" [cyan][{i}][/cyan] {filename} | {result.section_name} | " f"chunk {result.chunk_id} [dim]({result.score:.2f})[/dim]" ) console.print(f" {snippet}") entries.append( f"[{i}] {filename} | {result.section_name} | " f"chunk {result.chunk_id} | score={result.score:.2f}\n{snippet}" ) t_total = time.perf_counter() - t0 console.print(f"\n[dim]Retrieval only: {t_retrieval:.2f}s | Total: {t_total:.2f}s[/dim]\n") return { "question": question, "answer": "\n\n".join(entries), "sources": sources, } # Show retrieved sources immediately so the user sees useful info # while waiting for the LLM to generate. console.print(f"\n[bold]Sources[/bold] [dim]({t_retrieval:.2f}s)[/dim]") # Deduplicate sources by file: each unique file gets one citation number. seen_files: dict[str, int] = {} for r in results: if r.file_path not in seen_files: seen_files[r.file_path] = len(seen_files) + 1 for file_path, label in seen_files.items(): filename = Path(file_path).name best_score = max(r.score for r in results if r.file_path == file_path) console.print(f" [cyan][{label}][/cyan] {filename} [dim]({best_score:.2f})[/dim]") full_answer = "" try: import sys from paperrag.llm import stream_answer context_chunks = [r.text for r in results] source_files = [r.file_path for r in results] header_printed = False t1 = time.perf_counter() for chunk in stream_answer(question, context_chunks, cfg.llm, source_files=source_files, conversation_history=conversation_history): if not header_printed: console.print("\n[bold green]Answer:[/bold green]") header_printed = True sys.stdout.write(chunk) sys.stdout.flush() full_answer += chunk sys.stdout.write("\n\n") sys.stdout.flush() t_llm = time.perf_counter() - t1 t_total = time.perf_counter() - t0 console.print(f"[dim]Retrieval: {t_retrieval:.2f}s | LLM: {t_llm:.2f}s | Total: {t_total:.2f}s[/dim]\n") except ImportError as exc: console.print(f"[yellow]{exc}[/yellow]") return None except ValueError as exc: # LLM not configured - this is fine, just skip it console.print(f"\n[dim]💡 {exc}[/dim]\n") return None except Exception as exc: from paperrag.llm import describe_llm_error error_msg, hint = describe_llm_error(exc, cfg.llm.model_name) console.print(f"[red]{error_msg}[/red]") if hint: console.print(f"[yellow]Fix: {hint}[/yellow]") return None return { "question": question, "answer": full_answer, "sources": [Path(fp).name for fp in seen_files], } def _handle_followup( question: str, cfg: PaperRAGConfig, conversation_history: list[dict], t0: float, ) -> "dict | None": """Handle a follow-up question using conversation history when retrieval returns no results.""" import sys import time from paperrag.llm import stream_followup console.print("[dim](No new sources found — answering from conversation history)[/dim]") full_answer = "" try: header_printed = False t1 = time.perf_counter() for chunk in stream_followup(question, conversation_history, cfg.llm): if not header_printed: console.print("\n[bold green]Answer:[/bold green]") header_printed = True sys.stdout.write(chunk) sys.stdout.flush() full_answer += chunk sys.stdout.write("\n\n") sys.stdout.flush() t_llm = time.perf_counter() - t1 t_total = time.perf_counter() - t0 console.print(f"[dim]LLM: {t_llm:.2f}s | Total: {t_total:.2f}s[/dim]\n") except ImportError as exc: console.print(f"[yellow]{exc}[/yellow]") return None except Exception as exc: from paperrag.llm import describe_llm_error error_msg, hint = describe_llm_error(exc, cfg.llm.model_name) console.print(f"[red]{error_msg}[/red]") if hint: console.print(f"[yellow]Fix: {hint}[/yellow]") return None return { "question": question, "answer": full_answer, "sources": [], } def _handle_index(cfg: PaperRAGConfig) -> None: """Run the indexing pipeline from inside the REPL.""" from paperrag.chunker import chunk_paper from paperrag.embedder import Embedder from paperrag.parser import compute_file_hashes_parallel, discover_pdfs, parse_pdf from paperrag.parallel import parallel_process_pdfs from paperrag.vectorstore import VectorStore pdf_dir = Path(cfg.input_dir) idx_dir = Path(cfg.index_dir) pdfs = discover_pdfs(pdf_dir) if not pdfs: console.print("[red]No PDFs found.[/red]") return is_single_file = pdf_dir.is_file() embedder = Embedder(cfg.embedder) if VectorStore.exists(idx_dir): store = VectorStore.load(idx_dir) if store.dimension != embedder.dimension: store = VectorStore(idx_dir, embedder.dimension) else: store = VectorStore(idx_dir, embedder.dimension) # Remove deleted files from the index (skip for single-file mode) stale = [] if not is_single_file: current_paths = {str(p) for p in pdfs} stale = [fp for fp in list(store.file_hashes) if fp not in current_paths] for fp in stale: store.remove_by_file(fp) del store.file_hashes[fp] if stale: console.print(f"Removed [red]{len(stale)}[/red] deleted file(s) from index.") # Determine which files need (re)indexing - use parallel hashing n_workers = cfg.indexing.get_n_workers() if len(pdfs) == 1: console.print(f"Checking [cyan]{pdfs[0].name}[/cyan] for changes...") else: console.print(f"Checking [cyan]{len(pdfs)}[/cyan] PDFs for changes...") pdf_hashes = compute_file_hashes_parallel(pdfs, n_workers) to_index: list[Path] = [] for pdf in pdfs: current_hash = pdf_hashes.get(str(pdf)) stored_hash = store.get_file_hash(str(pdf)) if stored_hash is None or stored_hash != current_hash: if stored_hash is not None: store.remove_by_file(str(pdf)) to_index.append(pdf) if not to_index and not stale: console.print("[green]Index is up-to-date.[/green]") return if not to_index: store.version += 1 store.save(config=cfg) console.print(f"[green]Done![/green] Index version: {store.version}") return total = len(to_index) console.print(f"Parsing [cyan]{total}[/cyan] PDF(s) with {n_workers} workers...") # Parallel parse + chunk phase parsed_results = parallel_process_pdfs( to_index, cfg.parser, cfg.chunker, n_workers, timeout=cfg.indexing.pdf_timeout ) # Sequential embed + add phase console.print("Embedding and indexing chunks...") total_chunks = 0 processed_count = 0 checkpoint_interval = cfg.indexing.checkpoint_interval for i, (pdf_path, file_hash, chunks, error) in enumerate(parsed_results, 1): console.print(f" [{i}/{total}] {pdf_path.name}", highlight=False) if error: console.print(f" [red]Error: {error}[/red]") continue if not chunks: console.print(f" [yellow]No chunks produced, skipping.[/yellow]") continue embeddings = embedder.embed([c.text for c in chunks]) store.add(embeddings, chunks) store.set_file_hash(str(pdf_path), file_hash) total_chunks += len(chunks) processed_count += 1 console.print(f" [green]{len(chunks)} chunks[/green]") # Periodic checkpoint if checkpoint_interval > 0 and processed_count >= checkpoint_interval: try: store.save(config=cfg) console.print(f" [dim]Checkpoint saved ({processed_count} PDFs, {total_chunks} chunks)[/dim]") processed_count = 0 except Exception as e: console.print(f" [yellow]Checkpoint save failed: {e}[/yellow]") store.version += 1 store.save(config=cfg) console.print( f"[green]Done![/green] Indexed {total_chunks} chunks from " f"{len(to_index)} file(s). Index version: {store.version}" ) def _export_session( session_log: list[dict], output_path: "Path", *, focused_file: "str | None" = None, cfg: "PaperRAGConfig | None" = None, ) -> None: """Write the session Q&A log to a markdown file.""" import datetime as _dt output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) paper_name = Path(focused_file).name if focused_file else "Multiple papers" preset = None if cfg is not None: preset = next((k for k, v in PROMPT_PRESETS.items() if v == cfg.llm.system_prompt), None) with open(output_path, "w", encoding="utf-8") as f: f.write(f"# PaperRAG Review Session\n\n") f.write(f"**Paper:** {paper_name} \n") f.write(f"**Date:** {_dt.datetime.now().strftime('%Y-%m-%d %H:%M')} \n") if preset: f.write(f"**Preset:** {preset} \n") if cfg is not None: f.write(f"**Model:** {cfg.llm.model_name} \n") f.write("\n---\n\n") for i, entry in enumerate(session_log, 1): f.write(f"## Q{i}: {entry['question']}\n\n") if entry.get("sources"): sources_str = ", ".join(entry["sources"]) f.write(f"*Sources: {sources_str}*\n\n") f.write(f"{entry['answer']}\n\n") if i < len(session_log): f.write("---\n\n")