#!/bin/bash
#
# Clear PDF cache and test fresh generation for PL11089
#
set -e

API_BASE="${API_BASE:-http://localhost:8001}"
CACHE_DIR="${CACHE_DIR:-/tmp/aumentum_pdfs}"

GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

echo "=========================================="
echo "Clear Cache and Test PL11089"
echo "=========================================="
echo "API Base: $API_BASE"
echo "Cache Dir: $CACHE_DIR"
echo ""

# Step 1: Clear cache
echo -e "${BLUE}Step 1: Clearing PDF cache${NC}"
echo "----------------------------"
if [ -d "$CACHE_DIR" ]; then
    echo "Found cache directory: $CACHE_DIR"
    
    # Count files before
    file_count=$(ls -1 "$CACHE_DIR"/*.pdf 2>/dev/null | wc -l || echo "0")
    echo "Files in cache: $file_count"
    
    # Clear PL11089 and PL689 files specifically
    rm -f "$CACHE_DIR"/PL11089*.pdf "$CACHE_DIR"/PL689*.pdf
    echo -e "${GREEN}✅ Cleared PL11089 and PL689 cache files${NC}"
else
    echo "Cache directory does not exist: $CACHE_DIR"
fi

echo ""

# Step 2: Get document IDs
echo -e "${BLUE}Step 2: Getting document IDs${NC}"
echo "----------------------------"

echo "Querying PL11089..."
pl11089_response=$(curl -s "$API_BASE/documents/by-document-number?document_number=PL11089")
echo "$pl11089_response" | jq '.'

pl11089_doc_id=$(echo "$pl11089_response" | jq -r '.items[0].id // empty')
pl11089_pages=$(echo "$pl11089_response" | jq -r '.items[0].page_count // 0')

echo ""
echo "Querying PL689..."
pl689_response=$(curl -s "$API_BASE/documents/by-document-number?document_number=PL689")
echo "$pl689_response" | jq '.'

pl689_doc_id=$(echo "$pl689_response" | jq -r '.items[0].id // empty')
pl689_pages=$(echo "$pl689_response" | jq -r '.items[0].page_count // 0')

echo ""
echo "Summary:"
echo "  PL11089: Document ID = $pl11089_doc_id, Pages = $pl11089_pages"
echo "  PL689:   Document ID = $pl689_doc_id, Pages = $pl689_pages"

echo ""

# Step 3: Get content URLs to see what files are associated
echo -e "${BLUE}Step 3: Checking content URL associations${NC}"
echo "----------------------------"

if [ ! -z "$pl11089_doc_id" ]; then
    echo "Running diagnostic script..."
    cd /home/plagis/workspace/plagis_aumentum
    python3 diagnose_image_associations.py
fi

echo ""

# Step 4: Generate fresh PDF for PL11089
if [ ! -z "$pl11089_doc_id" ]; then
    echo -e "${BLUE}Step 4: Generating FRESH PDF for PL11089${NC}"
    echo "----------------------------"
    echo "Document ID: $pl11089_doc_id"
    echo "API URL: $API_BASE/documents/pdf-by-document-number?document_number=PL11089&document_id=$pl11089_doc_id"
    
    # Use curl with verbose output to see what's happening
    curl -v "$API_BASE/documents/pdf-by-document-number?document_number=PL11089&document_id=$pl11089_doc_id" \
        --output /tmp/test_PL11089_fresh.pdf 2>&1 | grep -E "(< |> |HTTP|X-Document)"
    
    if [ -f /tmp/test_PL11089_fresh.pdf ]; then
        file_size=$(stat -f%z /tmp/test_PL11089_fresh.pdf 2>/dev/null || stat -c%s /tmp/test_PL11089_fresh.pdf)
        echo -e "${GREEN}✅ PDF downloaded: /tmp/test_PL11089_fresh.pdf${NC}"
        echo "   Size: $file_size bytes"
        echo ""
        echo "📋 Please manually verify this PDF contains PL11089 content (not PL689)"
        echo "   Open: /tmp/test_PL11089_fresh.pdf"
    else
        echo -e "${RED}❌ Failed to download PDF${NC}"
    fi
else
    echo -e "${YELLOW}⚠️  PL11089 not found in database, skipping PDF generation${NC}"
fi

echo ""
echo "=========================================="
echo "Test Complete"
echo "=========================================="
echo ""
echo "Next steps:"
echo "1. Check the diagnostic output above for database associations"
echo "2. Open /tmp/test_PL11089_fresh.pdf and verify it's the correct document"
echo "3. If it's still showing PL689 content, the database has incorrect associations"

