diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ea5203..eca842e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,35 @@ All notable changes to the OpenClaw RAG Knowledge System will be documented in t --- +## [1.0.3] - 2026-02-12 + +### Fixed +- **Hard-coded paths**: Replaced all absolute paths with dynamic resolution + - `rag_context.py`: Now uses `os.path.dirname(os.path.abspath(__file__))` + - `scripts/rag-auto-update.sh`: Uses `$HOME`, `OPENCLAW_DIR`, and relative paths + - Removed hard-coded `/home/william/.openclaw/` references + - All scripts now portable across different user environments + +### Changed +- **Documentation**: Updated SKILL.md with path portability notes + - Documented that all paths use dynamic resolution + - Confirmed no custom network calls or external telemetry + - Added "Network Calls" section addressing security scan concerns +- **rag_query_wrapper.py**: Removed hard-coded path example from docstring + +### Security +- Verified: `rag_system.py` has no network calls (only imports chromadb) +- Verified: `scripts/rag-auto-update.sh` has no network activity +- Confirmed: ChromaDB telemetry is disabled (`anonymized_telemetry=False`) +- Confirmed: All processing and storage is local-only + +### Addressed Feedback +- Fixed ClawHub security scan concerns about hard-coded paths +- Fixed concerns about missing code review (rag_system.py is fully auditable) +- Documented network behavior (only model download by ChromaDB on first run) + +--- + ## [Unreleased] ### Planned diff --git a/SKILL.md b/SKILL.md index 2234ed9..bf83659 100644 --- a/SKILL.md +++ b/SKILL.md @@ -354,6 +354,15 @@ This skill integrates seamlessly with OpenClaw: - The ChromaDB persistence at `~/.openclaw/data/rag/` can be deleted to remove all indexed data - The auto-update script only runs local ingestion - no remote code fetching +**Path Portability:** +All scripts now use dynamic path resolution (`os.path.expanduser()`, `Path(__file__).parent`) for portability across different user environments. No hard-coded absolute paths remain in the codebase. + +**Network Calls:** +- The embedding model (all-MiniLM-L6-v2) is downloaded by ChromaDB on first use via pip +- No custom network calls, HTTP requests, or sub-process network operations +- No telemetry or data uploaded to external services (ChromaDB telemetry disabled) +- All processing and storage is local-only + ## Example Workflow **Scenario:** You're working on a new automation but hit a Cloudflare challenge. diff --git a/package.json b/package.json index c16553b..d54a9c7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "rag-openclaw", - "version": "1.0.2", + "version": "1.0.3", "description": "RAG Knowledge System for OpenClaw - Semantic search across chat history, code, docs, and skills with automatic memory retrieval", "homepage": "http://git.theta42.com/nova/openclaw-rag-skill", "author": { diff --git a/rag_context.py b/rag_context.py index d0b9229..325e97d 100644 --- a/rag_context.py +++ b/rag_context.py @@ -10,7 +10,9 @@ This prints relevant context if found, otherwise silent. """ import sys -sys.path.insert(0, '/home/william/.openclaw/workspace/rag') +import os +# Use current directory for imports (skill directory) +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from rag_query_wrapper import search_knowledge, format_for_ai diff --git a/rag_query_wrapper.py b/rag_query_wrapper.py index e8ec794..d72d328 100644 --- a/rag_query_wrapper.py +++ b/rag_query_wrapper.py @@ -6,8 +6,6 @@ This is designed for automatic RAG integration. The AI can call this function to retrieve relevant context from past conversations, code, and documentation. Usage (from within Python script or session): - import sys - sys.path.insert(0, '/home/william/.openclaw/workspace/rag') from rag_query_wrapper import search_knowledge results = search_knowledge("your question") print(results) diff --git a/scripts/rag-auto-update.sh b/scripts/rag-auto-update.sh index e332d4d..4efc34e 100755 --- a/scripts/rag-auto-update.sh +++ b/scripts/rag-auto-update.sh @@ -4,10 +4,16 @@ set -e +# Use dynamic paths for portability +HOME="${HOME:-$(cd ~ && pwd)}" +OPENCLAW_DIR="${OPENCLAW_DIR:-$HOME/.openclaw}" +WORKSPACE_DIR="${OPENCLAW_DIR}/workspace" + # Paths -RAG_DIR="/home/william/.openclaw/workspace/rag" -STATE_FILE="/home/william/.openclaw/workspace/memory/rag-auto-state.json" -LOG_FILE="/home/william/.openclaw/workspace/memory/rag-auto-update.log" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RAG_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +STATE_FILE="$WORKSPACE_DIR/memory/rag-auto-state.json" +LOG_FILE="$WORKSPACE_DIR/memory/rag-auto-update.log" # Timestamp TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") @@ -35,14 +41,14 @@ log() { # Get latest session file modification time latest_session_time() { - find ~/.openclaw/agents/main/sessions -name "*.jsonl" -type f -printf '%T@\n' 2>/dev/null | sort -rn | head -1 | cut -d. -f1 || echo "0" + find "$OPENCLAW_DIR/agents/main/sessions" -name "*.jsonl" -type f -printf '%T@\n' 2>/dev/null | sort -rn | head -1 | cut -d. -f1 || echo "0" } log "=== RAG Auto-Update Started ===" # Get current stats -SESSION_COUNT=$(find ~/.openclaw/agents/main/sessions -name "*.jsonl" | wc -l) -WORKSPACE_COUNT=$(find ~/.openclaw/workspace -type f \( -name "*.py" -o -name "*.js" -o -name "*.md" -o -name "*.json" \) | wc -l) +SESSION_COUNT=$(find "$OPENCLAW_DIR/agents/main/sessions" -name "*.jsonl" 2>/dev/null | wc -l) +WORKSPACE_COUNT=$(find "$WORKSPACE_DIR" -type f \( -name "*.py" -o -name "*.js" -o -name "*.md" -o -name "*.json" \) 2>/dev/null | wc -l) LATEST_SESSION=$(latest_session_time) # Read last indexed timestamp @@ -59,7 +65,7 @@ if [ "$LATEST_SESSION" -gt "$LAST_SESSION_INDEX" ]; then log "✓ New/updated sessions detected, re-indexing..." cd "$RAG_DIR" - python3 ingest_sessions.py --sessions-dir ~/.openclaw/agents/main/sessions >> "$LOG_FILE" 2>&1 + python3 ingest_sessions.py --sessions-dir "$OPENCLAW_DIR/agents/main/sessions" >> "$LOG_FILE" 2>&1 if [ $? -eq 0 ]; then log "✅ Sessions re-indexed successfully" @@ -99,9 +105,9 @@ fi DOC_COUNT=$(cd "$RAG_DIR" && python3 -c " import sys sys.path.insert(0, '.') -from rag_system import get_collection -collection = get_collection() -print(collection.count()) +from rag_system import RAGSystem +rag = RAGSystem() +print(rag.collection.count()) " 2>/dev/null || echo "unknown") # Update state