#!/bin/bash
#
# Database Dump Cleanup Script
# Removes empty files, incomplete schemas, and renames files for clarity
#

set -e

DUMP_DIR="$(dirname "$0")"
cd "$DUMP_DIR"

echo "=========================================="
echo "Database Dump Cleanup"
echo "=========================================="
echo ""

# Backup original files
BACKUP_DIR="backup_$(date +%Y%m%d_%H%M%S)"
echo "📦 Creating backup in: $BACKUP_DIR"
mkdir -p "$BACKUP_DIR"
cp -a *.sql *.txt "$BACKUP_DIR/" 2>/dev/null || true
echo "✅ Backup created"
echo ""

# 1. Delete empty data file
echo "🗑️  Removing empty data file..."
if [ -f "data_20251113_185158.sql" ]; then
    rm -v "data_20251113_185158.sql"
    echo "✅ Removed: data_20251113_185158.sql"
else
    echo "ℹ️  File already removed"
fi
echo ""

# 2. Delete incomplete schema file
echo "🗑️  Removing incomplete schema file..."
if [ -f "schema_20251113_185123.sql" ]; then
    rm -v "schema_20251113_185123.sql"
    echo "✅ Removed: schema_20251113_185123.sql (incomplete)"
else
    echo "ℹ️  File already removed"
fi
echo ""

# 3. Rename data files to indicate they're samples
echo "📝 Renaming data files to indicate sample data..."
for f in data_*.sql; do
    if [ -f "$f" ]; then
        new_name="${f%.sql}_SAMPLE_1000_ROWS.sql"
        if [ "$f" != "$new_name" ]; then
            mv -v "$f" "$new_name"
        fi
    fi
done
echo "✅ Files renamed"
echo ""

# 4. Create README
echo "📄 Creating README.md..."
cat > README.md << 'EOF'
# Database Dump - LRS43 (MSSQL → SQL Migration)

**Source Database:** LRS43 (MSSQL/SQL Server)  
**Target Database:** SQL (PostgreSQL/MySQL)  
**Export Date:** 2025-11-13  
**Type:** Sample Data (1000 rows per table) - **Intentional for migration testing**

## ⚠️ IMPORTANT NOTICE

This is a **SAMPLE DATA DUMP** for migration testing from MSSQL to SQL (PostgreSQL/MySQL). Each table contains only the first 1,000 rows by design.

**Migration Context:**
- Source: MSSQL (SQL Server) with MSSQL-specific syntax
- Target: PostgreSQL or MySQL (requires syntax conversion)
- Purpose: Schema analysis and migration testing

**DO NOT USE FOR PRODUCTION RESTORATION** - This is sample data only.

## Files

### Schema
- `schema_20251113_185155.sql` - Complete schema for all 168 tables

### Sample Data (1000 rows each)
- `data_alf_content_data_SAMPLE_1000_ROWS.sql` - Content data (2,088,009 total rows)
- `data_alf_content_url_SAMPLE_1000_ROWS.sql` - Content URLs (2,078,163 total rows)
- `data_alf_node_properties_SAMPLE_1000_ROWS.sql` - Node properties (38,838,046 total rows)
- `data_alf_node_SAMPLE_1000_ROWS.sql` - Nodes (3,559,056 total rows)
- `data_alf_qname_SAMPLE_1000_ROWS.sql` - QNames (278 total rows - COMPLETE ✅)
- `data_lr_dictionary_SAMPLE_1000_ROWS.sql` - Dictionary (3,691 total rows)
- `data_lr_party_SAMPLE_1000_ROWS.sql` - Parties (167,223 total rows)
- `data_lr_source_document_SAMPLE_1000_ROWS.sql` - Source documents (315,496 total rows)
- `data_lr_transaction_document_SAMPLE_1000_ROWS.sql` - Transaction documents (360,551 total rows)
- `data_lr_transaction_SAMPLE_1000_ROWS.sql` - Transactions (109,868 total rows)

### Statistics
- `statistics_20251113_185409.txt` - Row counts for all 168 tables

## Usage

### View Statistics
```bash
cat statistics_20251113_185409.txt
```

### Import Schema Only
```bash
sqlcmd -S server -d database -i schema_20251113_185155.sql
```

### Import Sample Data
```bash
for f in data_*_SAMPLE_1000_ROWS.sql; do
    sqlcmd -S server -d database -i "$f"
done
```

## Migration Notes

**MSSQL → SQL Syntax Conversion Required:**
- Square brackets `[column]` → Remove or use `"column"` (PostgreSQL) / `` `column` `` (MySQL)
- `IDENTITY(1,1)` → `SERIAL` (PostgreSQL) or `AUTO_INCREMENT` (MySQL)
- `GO` statements → Remove (not needed for PostgreSQL/MySQL)
- `TOP N` → `LIMIT N` (if present in queries)

**Other Notes:**
- No transaction wrappers - each INSERT auto-commits
- Only 11 of 168 tables have data dumps (key tables for migration)
- For full migration, modify `scripts/dump_mssql_database.py` to remove `limit=1000`
- Consider using migration tools: `pgloader` (PostgreSQL) or `mysqlimport` (MySQL)

## Cleanup

Run `cleanup_database_dump.sh` to:
- Remove empty files
- Remove duplicate/incomplete schemas
- Rename files for clarity
EOF

echo "✅ README.md created"
echo ""

# Summary
echo "=========================================="
echo "Cleanup Complete!"
echo "=========================================="
echo ""
echo "📊 Summary:"
echo "   - Backup created: $BACKUP_DIR/"
echo "   - Empty files removed"
echo "   - Incomplete schema removed"
echo "   - Data files renamed (sample indicator)"
echo "   - README.md created"
echo ""
echo "📁 Current files:"
ls -lh *.sql *.txt 2>/dev/null | awk '{print "   " $9 " (" $5 ")"}'
echo ""
echo "✅ Done!"

