#!/bin/bash
#
# Use transaction/timestamp matching to find correct file associations
#
set -e

API_BASE="${API_BASE:-http://localhost:8001}"

echo "=========================================="
echo "Finding Correct Files Using Timestamps"
echo "=========================================="
echo ""

echo "Strategy: Match document creation time with node creation time"
echo "Documents and their files should have been created at similar times."
echo ""

# Restart server to get new endpoint
echo "1️⃣ Restarting server with timestamp matching endpoint..."
cd /home/plagis/workspace/plagis_aumentum
pkill -9 -f "python.*aumentum_api" 2>/dev/null || true
sleep 2

source venv/bin/activate
python3 aumentum_api.py > /tmp/api_timestamp.log 2>&1 &
sleep 8

if ! curl -s "http://localhost:8001/health" > /dev/null 2>&1; then
    echo "❌ Server failed to start"
    exit 1
fi

echo "✅ Server started"
echo ""

echo "2️⃣ Analyzing documents by timestamp..."
echo ""

# Analyze each problem document
for doc in "PL11089" "PL689" "BP102" "PL6204" "PL12321"; do
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo "Analyzing: $doc"
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    
    response=$(curl -s "$API_BASE/debug/match-by-transaction-time?document_number=$doc" 2>/dev/null)
    
    if echo "$response" | jq -e '.potential_matches' > /dev/null 2>&1; then
        # Show document creation time
        doc_created=$(echo "$response" | jq -r '.document_info[0].create_date')
        echo "Document created: $doc_created"
        echo ""
        
        echo "Top matches by timestamp proximity:"
        echo "$response" | jq -r '.potential_matches[] | "  \(.confidence): Node labeled \"\(.node_labeled_as)\" (±\(.hours_diff)h)\n    URL: \(.content_url)\n    Created: \(.node_created_time)"'
        echo ""
        
        # Get the strongest match
        best_match=$(echo "$response" | jq -r '.potential_matches[0]')
        best_label=$(echo "$best_match" | jq -r '.node_labeled_as')
        best_url=$(echo "$best_match" | jq -r '.content_url')
        hours_diff=$(echo "$best_match" | jq -r '.hours_diff')
        
        echo "🎯 BEST MATCH for $doc:"
        echo "   Node labeled: $best_label"
        echo "   Time diff: ${hours_diff}h"
        echo "   URL: $best_url"
        echo ""
        
        if [ "$doc" != "$best_label" ]; then
            echo "   ⚠️  MISMATCH DETECTED!"
            echo "   → $doc should use node labeled '$best_label'"
            echo ""
        else
            echo "   ✅ Already correct (document matches node label)"
            echo ""
        fi
    else
        echo "  ❌ API error or not found"
    fi
    
    echo ""
done

echo "=========================================="
echo "Building Recommended Mapping"
echo "=========================================="
echo ""

echo "Based on timestamp analysis, the CORRECT mapping should be:"
echo ""
echo "| Document | Should Use Node Labeled | Recommendation |"
echo "|----------|------------------------|----------------|"

for doc in "PL11089" "PL689" "BP102" "PL6204" "PL12321"; do
    response=$(curl -s "$API_BASE/debug/match-by-transaction-time?document_number=$doc" 2>/dev/null)
    
    if echo "$response" | jq -e '.potential_matches[0]' > /dev/null 2>&1; then
        best_label=$(echo "$response" | jq -r '.potential_matches[0].node_labeled_as')
        best_url=$(echo "$response" | jq -r '.potential_matches[0].content_url')
        hours_diff=$(echo "$response" | jq -r '.potential_matches[0].hours_diff')
        
        if [ "$doc" = "$best_label" ]; then
            echo "| $doc | $best_label | ✅ Correct (no fix needed) |"
        else
            echo "| $doc | $best_label (±${hours_diff}h) | 🔄 Use this node's file |"
        fi
    else
        echo "| $doc | ??? | ❌ No match found |"
    fi
done

echo ""
echo "=========================================="
echo "Next Steps"
echo "=========================================="
echo ""
echo "1. Review the timestamp matches above"
echo "2. The closest timestamp matches are most likely correct"
echo "3. Update CORRECT_FILE_MAPPING if recommendations differ from current mapping"
echo ""
echo "📋 Save this output for reference!"
echo ""

