- Full RAG system for OpenClaw agents - Semantic search across chat history, code, docs, skills - ChromaDB integration (all-MiniLM-L6-v2 embeddings) - Automatic AI context retrieval - Ingest pipelines for sessions, workspace, skills - Python API and CLI interfaces - Document management (add, delete, stats, reset)
182 lines
5.2 KiB
Python
182 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
RAG Query - Search the OpenClaw knowledge base
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add parent directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from rag_system import RAGSystem
|
|
|
|
|
|
def format_result(result: dict, index: int) -> str:
|
|
"""Format a single search result"""
|
|
metadata = result['metadata']
|
|
|
|
# Determine type
|
|
doc_type = metadata.get('type', 'unknown')
|
|
source = metadata.get('source', '?')
|
|
|
|
# Header based on type
|
|
if doc_type == 'session':
|
|
chunk_idx = metadata.get('chunk_index', '?')
|
|
header = f"\n📄 Session {source} (chunk {chunk_idx})"
|
|
elif doc_type == 'workspace':
|
|
header = f"\n📁 {source}"
|
|
elif doc_type == 'skill':
|
|
skill_name = metadata.get('skill_name', source)
|
|
header = f"\n📜 Skill: {skill_name}"
|
|
elif doc_type == 'memory':
|
|
header = f"\n🧠 Memory: {source}"
|
|
else:
|
|
header = f"\n🔹 {doc_type}: {source}"
|
|
|
|
# Format text (limit length)
|
|
text = result['text']
|
|
if len(text) > 1000:
|
|
text = text[:1000] + "..."
|
|
|
|
# Get date if available
|
|
info = []
|
|
if 'ingested_at' in metadata:
|
|
info.append(f"indexed {metadata['ingested_at'][:10]}")
|
|
|
|
# Chunk info
|
|
if 'chunk_index' in metadata and 'total_chunks' in metadata:
|
|
info.append(f"chunk {metadata['chunk_index']+1}/{metadata['total_chunks']}")
|
|
|
|
info_str = f" ({', '.join(info)})" if info else ""
|
|
|
|
return f"{header}{info_str}\n{text}"
|
|
|
|
|
|
def search(
|
|
query: str,
|
|
n_results: int = 10,
|
|
filters: dict = None,
|
|
collection_name: str = "openclaw_knowledge",
|
|
verbose: bool = True
|
|
) -> list:
|
|
"""
|
|
Search the RAG knowledge base
|
|
|
|
Args:
|
|
query: Search query
|
|
n_results: Number of results
|
|
filters: Metadata filters (e.g., {"type": "skill"})
|
|
collection_name: Collection name
|
|
verbose: Print results
|
|
|
|
Returns:
|
|
List of result dicts
|
|
"""
|
|
if verbose:
|
|
print(f"🔍 Query: {query}")
|
|
if filters:
|
|
print(f"🎯 Filters: {filters}")
|
|
print()
|
|
|
|
# Initialize RAG
|
|
rag = RAGSystem(collection_name=collection_name)
|
|
|
|
# Search
|
|
results = rag.search(query, n_results=n_results, filters=filters)
|
|
|
|
if not results:
|
|
if verbose:
|
|
print("❌ No results found")
|
|
return []
|
|
|
|
if verbose:
|
|
print(f"✅ Found {len(results)} results\n")
|
|
print("=" * 80)
|
|
|
|
for i, result in enumerate(results, 1):
|
|
print(format_result(result, i))
|
|
print("=" * 80)
|
|
|
|
return results
|
|
|
|
|
|
def interactive_search(collection_name: str = "openclaw_knowledge"):
|
|
"""Interactive search mode"""
|
|
print("🚀 OpenClaw RAG Search - Interactive Mode")
|
|
print("Type 'quit' or 'exit' to stop\n")
|
|
|
|
rag = RAGSystem(collection_name=collection_name)
|
|
|
|
# Show stats
|
|
stats = rag.get_stats()
|
|
print(f"📊 Collection: {stats['collection_name']}")
|
|
print(f" Total documents: {stats['total_documents']}")
|
|
print(f" Storage: {stats['persist_directory']}\n")
|
|
|
|
while True:
|
|
try:
|
|
query = input("\n🔍 Search query: ").strip()
|
|
|
|
if not query:
|
|
continue
|
|
|
|
if query.lower() in ['quit', 'exit', 'q']:
|
|
print("\n👋 Goodbye!")
|
|
break
|
|
|
|
# Parse filters if any
|
|
filters = None
|
|
if query.startswith("type:"):
|
|
parts = query.split(maxsplit=1)
|
|
if len(parts) > 1:
|
|
doc_type = parts[0].replace("type:", "")
|
|
query = parts[1]
|
|
filters = {"type": doc_type}
|
|
|
|
# Search
|
|
results = rag.search(query, n_results=10, filters=filters)
|
|
|
|
if results:
|
|
print(f"\n✅ {len(results)} results:")
|
|
print("=" * 80)
|
|
|
|
for i, result in enumerate(results, 1):
|
|
print(format_result(result, i))
|
|
print("=" * 80)
|
|
else:
|
|
print("❌ No results found")
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\n👋 Goodbye!")
|
|
break
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Search OpenClaw RAG knowledge base")
|
|
parser.add_argument("query", nargs="?", help="Search query (if not provided, enters interactive mode)")
|
|
parser.add_argument("-n", "--num-results", type=int, default=10, help="Number of results")
|
|
parser.add_argument("--type", help="Filter by document type (session, workspace, skill, memory)")
|
|
parser.add_argument("--collection", default="openclaw_knowledge", help="Collection name")
|
|
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Build filters
|
|
filters = None
|
|
if args.type:
|
|
filters = {"type": args.type}
|
|
|
|
if args.interactive or not args.query:
|
|
interactive_search(collection_name=args.collection)
|
|
else:
|
|
search(
|
|
query=args.query,
|
|
n_results=args.num_results,
|
|
filters=filters,
|
|
collection_name=args.collection
|
|
) |