-- ================================================================================
-- FIND CORRECT FILES USING INDEXING & SCANNING TRANSACTION TYPE
-- ================================================================================
-- Strategy: Files uploaded during "indexing and scanning" transaction are the correct files
-- ================================================================================

-- STEP 1: Find the transaction type ID for "indexing and scanning"
-- ================================================================================
PRINT '1️⃣ Finding Indexing & Scanning Transaction Type';
PRINT '================================================================';

SELECT 
    Id,
    code,
    label,
    descr,
    category
FROM LRSAdmin.lr_dictionary
WHERE category = 'transaction_type'
AND (
    label LIKE '%index%' 
    OR label LIKE '%scan%'
    OR code LIKE '%index%'
    OR code LIKE '%scan%'
)
ORDER BY Id;

-- Also check lr_transaction_metadata
SELECT 
    Id,
    code,
    label,
    descr,
    category
FROM LRSAdmin.lr_transaction_metadata
WHERE label LIKE '%index%' 
OR label LIKE '%scan%'
OR code LIKE '%index%'
OR code LIKE '%scan%'
ORDER BY Id;

-- STEP 2: Get transactions for our problem documents
-- ================================================================================
PRINT '';
PRINT '2️⃣ Transactions for Problem Documents';
PRINT '================================================================';

SELECT 
    sd.document_number,
    sd.id AS document_id,
    sd.document_type,
    td.transaction_id,
    t.transaction_type,
    tm.label AS transaction_type_label,
    t.submission_date,
    t.register_date,
    t.transaction_number
FROM LRSAdmin.lr_source_document sd
JOIN LRSAdmin.lr_transaction_document td ON td.document_id = sd.id
JOIN LRSAdmin.lr_transaction t ON t.id = td.transaction_id
LEFT JOIN LRSAdmin.lr_transaction_metadata tm ON tm.Id = t.transaction_type
WHERE sd.document_number IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
ORDER BY sd.document_number, sd.id, td.transaction_id;

-- STEP 3: Find ALL documents in the same indexing/scanning transactions
-- ================================================================================
PRINT '';
PRINT '3️⃣ All Documents in Indexing/Scanning Transactions';
PRINT '================================================================';

-- First, let's see what transaction types are most common for these documents
SELECT 
    tm.label AS transaction_type_label,
    t.transaction_type,
    COUNT(DISTINCT sd.document_number) AS doc_count,
    COUNT(DISTINCT td.transaction_id) AS txn_count
FROM LRSAdmin.lr_source_document sd
JOIN LRSAdmin.lr_transaction_document td ON td.document_id = sd.id
JOIN LRSAdmin.lr_transaction t ON t.id = td.transaction_id
LEFT JOIN LRSAdmin.lr_transaction_metadata tm ON tm.Id = t.transaction_type
WHERE sd.document_number IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
GROUP BY tm.label, t.transaction_type
ORDER BY doc_count DESC;

-- STEP 4: Match Alfresco nodes to documents via transaction timing
-- ================================================================================
PRINT '';
PRINT '4️⃣ Matching Nodes to Documents via Transaction';
PRINT '================================================================';

-- For documents in indexing/scanning transactions, find Alfresco nodes uploaded near transaction date
SELECT 
    sd.document_number,
    sd.id AS document_id,
    sd.create_date AS doc_created,
    t.id AS transaction_id,
    t.submission_date AS txn_date,
    tm.label AS txn_type,
    np.string_value AS node_labeled_as,
    n.id AS node_id,
    cu.content_url
FROM LRSAdmin.lr_source_document sd
JOIN LRSAdmin.lr_transaction_document td ON td.document_id = sd.id
JOIN LRSAdmin.lr_transaction t ON t.id = td.transaction_id
LEFT JOIN LRSAdmin.lr_transaction_metadata tm ON tm.Id = t.transaction_type
CROSS APPLY (
    SELECT TOP 1 np.string_value, n.id
    FROM LRSAdmin.alf_node_properties np
    JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id
    JOIN LRSAdmin.alf_node n ON n.id = np.node_id AND n.node_deleted = 0
    WHERE q.local_name = 'sourceRids'
    AND np.string_value IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
    ORDER BY ABS(DATEDIFF(DAY, sd.create_date, CAST(n.audit_created AS datetime)))
) AS closest_node (string_value, id)
LEFT JOIN LRSAdmin.alf_node_properties np ON np.node_id = closest_node.id AND np.string_value = closest_node.string_value
LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = closest_node.id
LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
WHERE sd.document_number IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
AND sd.id IN (
    SELECT MIN(id) FROM LRSAdmin.lr_source_document 
    WHERE document_number IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
    GROUP BY document_number
)
ORDER BY sd.document_number;

PRINT '';
PRINT '================================================================';
PRINT 'Analysis Complete';
PRINT '================================================================';

