Files
openclaw-rag-skill/rag_query.py
Nova AI b272748209 Initial commit: OpenClaw RAG Knowledge System
- Full RAG system for OpenClaw agents
- Semantic search across chat history, code, docs, skills
- ChromaDB integration (all-MiniLM-L6-v2 embeddings)
- Automatic AI context retrieval
- Ingest pipelines for sessions, workspace, skills
- Python API and CLI interfaces
- Document management (add, delete, stats, reset)
2026-02-11 03:47:38 +00:00

182 lines
5.2 KiB
Python

#!/usr/bin/env python3
"""
RAG Query - Search the OpenClaw knowledge base
"""
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))
from rag_system import RAGSystem
def format_result(result: dict, index: int) -> str:
"""Format a single search result"""
metadata = result['metadata']
# Determine type
doc_type = metadata.get('type', 'unknown')
source = metadata.get('source', '?')
# Header based on type
if doc_type == 'session':
chunk_idx = metadata.get('chunk_index', '?')
header = f"\n📄 Session {source} (chunk {chunk_idx})"
elif doc_type == 'workspace':
header = f"\n📁 {source}"
elif doc_type == 'skill':
skill_name = metadata.get('skill_name', source)
header = f"\n📜 Skill: {skill_name}"
elif doc_type == 'memory':
header = f"\n🧠 Memory: {source}"
else:
header = f"\n🔹 {doc_type}: {source}"
# Format text (limit length)
text = result['text']
if len(text) > 1000:
text = text[:1000] + "..."
# Get date if available
info = []
if 'ingested_at' in metadata:
info.append(f"indexed {metadata['ingested_at'][:10]}")
# Chunk info
if 'chunk_index' in metadata and 'total_chunks' in metadata:
info.append(f"chunk {metadata['chunk_index']+1}/{metadata['total_chunks']}")
info_str = f" ({', '.join(info)})" if info else ""
return f"{header}{info_str}\n{text}"
def search(
query: str,
n_results: int = 10,
filters: dict = None,
collection_name: str = "openclaw_knowledge",
verbose: bool = True
) -> list:
"""
Search the RAG knowledge base
Args:
query: Search query
n_results: Number of results
filters: Metadata filters (e.g., {"type": "skill"})
collection_name: Collection name
verbose: Print results
Returns:
List of result dicts
"""
if verbose:
print(f"🔍 Query: {query}")
if filters:
print(f"🎯 Filters: {filters}")
print()
# Initialize RAG
rag = RAGSystem(collection_name=collection_name)
# Search
results = rag.search(query, n_results=n_results, filters=filters)
if not results:
if verbose:
print("❌ No results found")
return []
if verbose:
print(f"✅ Found {len(results)} results\n")
print("=" * 80)
for i, result in enumerate(results, 1):
print(format_result(result, i))
print("=" * 80)
return results
def interactive_search(collection_name: str = "openclaw_knowledge"):
"""Interactive search mode"""
print("🚀 OpenClaw RAG Search - Interactive Mode")
print("Type 'quit' or 'exit' to stop\n")
rag = RAGSystem(collection_name=collection_name)
# Show stats
stats = rag.get_stats()
print(f"📊 Collection: {stats['collection_name']}")
print(f" Total documents: {stats['total_documents']}")
print(f" Storage: {stats['persist_directory']}\n")
while True:
try:
query = input("\n🔍 Search query: ").strip()
if not query:
continue
if query.lower() in ['quit', 'exit', 'q']:
print("\n👋 Goodbye!")
break
# Parse filters if any
filters = None
if query.startswith("type:"):
parts = query.split(maxsplit=1)
if len(parts) > 1:
doc_type = parts[0].replace("type:", "")
query = parts[1]
filters = {"type": doc_type}
# Search
results = rag.search(query, n_results=10, filters=filters)
if results:
print(f"\n{len(results)} results:")
print("=" * 80)
for i, result in enumerate(results, 1):
print(format_result(result, i))
print("=" * 80)
else:
print("❌ No results found")
except KeyboardInterrupt:
print("\n\n👋 Goodbye!")
break
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Search OpenClaw RAG knowledge base")
parser.add_argument("query", nargs="?", help="Search query (if not provided, enters interactive mode)")
parser.add_argument("-n", "--num-results", type=int, default=10, help="Number of results")
parser.add_argument("--type", help="Filter by document type (session, workspace, skill, memory)")
parser.add_argument("--collection", default="openclaw_knowledge", help="Collection name")
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode")
args = parser.parse_args()
# Build filters
filters = None
if args.type:
filters = {"type": args.type}
if args.interactive or not args.query:
interactive_search(collection_name=args.collection)
else:
search(
query=args.query,
n_results=args.num_results,
filters=filters,
collection_name=args.collection
)