<?php
/**
 * Created by PhpStorm.
 * User: cbarranco
 * Date: 5/13/16
 * Time: 10:16 AM
 */

namespace Visionware\DataManager;

use AWS;
use Closure;
use Cron\CronExpression;
use DB;
use League\Flysystem\Exception;
use Visionware\DataManager\Definition\TableDefinition;
use Visionware\DataManager\Facades\DataManager;
use Carbon\Carbon;
use Illuminate\Database\Connection;
use Visionware\DataManager\Exceptions\IngestionFieldsMismatchException;
use Visionware\DataManager\Exceptions\RemoteFileDownloadException;
use Visionware\DataManager\Exceptions\RemoteFileNotFoundException;
use Visionware\DataManager\Exceptions\UnableToCreateLoadFileException;
use Visionware\DataManager\Info\SchemaInfo;
use Visionware\DataManager\Info\TableInfo;

class Ingester extends DataManagerProcess {
    /**
     * @var Connection
     */
    protected $db;
    protected $allVersions;
    protected $currentFile;
    protected $s3;
    protected $skipTransactions;
    protected $latestOnly;
    protected $newerThan;
    protected $newestFileOnly;

    public function __construct(SchemaInfo $schema) {
        parent::__construct($schema);
        $this->db = DataManager::getHistoryConnection();
        $this->allVersions = false;
        $this->latestOnly = false;
        $this->skipTransactions = false;
        $this->newerThan = 0;
        $this->newestFileOnly = false;
    }

    public function allVersions() {
        $this->allVersions = true;
    }

    public function latestOnly() {
        $this->latestOnly = true;
    }

    public function noTransactions() {
        $this->skipTransactions = true;
    }

    public function newerThan($date) {
        $this->newerThan = trim($date);
    }

    public function newestFileOnly() {
        $this->newestFileOnly = true;
    }

    public function go() {
        $start = Carbon::now();
        $this->s3 = AWS::createClient('s3');
        $import_files = $this->definition->getIngestFiles();
        $import_files_prefixes = $this->definition->getIngestFilePrefixes();
        $import_files_regex = $this->definition->getIngestFileRegex();

        $this->setLogPrefix($this->tableName);
        
        $this->notice("Starting to ingest table");

        $this->updateStartTime($this->tableName, $start);

        try {
            $conTimezone = config('marcolin-api.cron_timezone', 'UTC');
            $timezoneCarbon = Carbon::now($conTimezone);

            $shouldRunCron = false;
            $shouldIgnoreCron = false;

            $cronRunIfStrings = $this->definition->getIngestFileCronRunIf();
            foreach ($cronRunIfStrings as $cronRunIfString) {
                if ($cronRunIfString !== false) {
                    $shouldRunCron = $shouldRunCron || CronExpression::factory($cronRunIfString)->isDue($timezoneCarbon->toDateTimeString());
                } else {
                    $shouldRunCron = true;
                }
            }

            $cronIgnoreIfStrings = $this->definition->getIngestFileCronIgnoreIf();
            foreach ($cronIgnoreIfStrings as $cronIgnoreIfString) {
                if ($cronIgnoreIfString !== false) {
                    $shouldIgnoreCron = $shouldIgnoreCron || CronExpression::factory($cronIgnoreIfString)->isDue($timezoneCarbon->toDateTimeString());
                }
            }

            $shouldRun = $shouldRunCron && !$shouldIgnoreCron;

            if ($shouldRun) {
                if (count($import_files_prefixes) > 0){
                    foreach ($import_files_prefixes as $key => $prefix) {
                        if ($this->latestOnly) {
                            $this->updateLatest();
                            continue;
                        }

                        $result = $this->s3->listObjects([
                            'Bucket' => config('datamanager.import-bucket'),
                            'Prefix' => $prefix,
                            'MaxKeys' => 1000,
                        ]);

                        $fileNameArray = [];
                        $regex = '/' . $prefix . $import_files_regex[$key] . '/';
                        foreach ($result['Contents'] as $version) {
                            if (preg_match($regex, $version['Key']) === 0) continue;

                            $fileNameArray[$version['Key']] = $version['Key'];
                        }

                        asort($fileNameArray);

                        foreach ($fileNameArray as $fileName) {
                            $this->info("Starting to ingest $fileName");
                            $this->ingestFileVersions($fileName);
                            $this->info("Finished ingesting $fileName");
                        }
                    }
                }else if (count($import_files_regex) > 0){
                    foreach ($import_files_regex as $key => $regex) {
                        if ($this->latestOnly) {
                            $this->updateLatest();
                            continue;
                        }

                        $result = $this->s3->listObjects([
                            'Bucket' => config('datamanager.import-bucket'),
                            'MaxKeys' => 1000,
                        ]);

                        $fileNameArray = [];
                        $regex = '/' . $regex . '/';
                        foreach ($result['Contents'] as $version) {
                            if (preg_match($regex, $version['Key']) === 0) continue;

                            $fileNameArray[$version['Key']] = $version['Key'];
                        }

                        asort($fileNameArray);

                        foreach ($fileNameArray as $fileName) {
                            $this->info("Starting to ingest $fileName");
                            $this->ingestFileVersions($fileName);
                            $this->info("Finished ingesting $fileName");
                        }
                    }
                }


                foreach ($import_files as $fileName) {
                    if ($this->latestOnly) {
                        $this->updateLatest();
                        continue;
                    }

                    $this->info("Starting to ingest $fileName");

                    $this->ingestFileVersions($fileName);
                    $this->info("Finished ingesting $fileName");
                }

                $this->updateLatest();
            } else {
                $this->notice("Skipping ingestion do to cron restraints.");
            }
        } catch (\Exception $e) {
            $this->critical("AHHHHHH EXCEPTION CAUGHT WHILE INGESTING {$this->tableName}! AHHHHHHHHHHHHHHHHH", ['exception' => $e]);
            if (!$this->skipTransactions) $this->db->rollBack();
            throw $e;
        }

        $end = Carbon::now();
        $diff = $end->diffForHumans($start, true);
        $this->updateEndTime($this->tableName, $end, $end->diffInSeconds($start, true), $diff);
        $this->notice("Finished ingesting table in $diff total");
    }

    private function getIngestedVersions($fileName) {
        $versions = $this->db->table($this->tableName . '_files')
            ->where('file_name', $fileName)
            ->pluck('version_id')
        ;
        return $versions;
    }

    private function ingestFileVersions($fileName) {
        $ingestedVersions = $this->getIngestedVersions($fileName);
        $result = $this->s3->listObjectVersions([
            'Bucket' => config('datamanager.import-bucket'),
            'Prefix' => $fileName,
        ]);

        $versions = $result['Versions'];
        if ($this->allVersions) {
            usort($versions, function($a, $b) {
                if ($a['LastModified'] > $b['LastModified']) return 1;
                else if ($a['LastModified'] == $b['LastModified']) return -1;
                return 0;
            });
        }

        $updateLatest = false;

        foreach ($versions as $version) {
            if ($version['VersionId'] == 'null') continue;
            if ($this->newestFileOnly && $version['IsLatest'] === false) continue;
            if ($version['LastModified']->format('Y-m-d') <= $this->newerThan) continue;
            $this->currentFile = $version;

            if ($this->force || !in_array($version['VersionId'], $ingestedVersions)) {
                $this->notice("Ingesting file {$this->currentFile['Key']} version {$this->currentFile['LastModified']->format('Y-m-d H:i:s')}...");
                $return = $this->ingestFile();
                if ($return !== false) $updateLatest = true;
            } else {
                $this->info("File {$this->currentFile['Key']} version {$this->currentFile['LastModified']->format('Y-m-d H:i:s')} has already been ingested, skipping...");
            }
            if (!$this->allVersions) break;
        }
    }

    private function ingestFile() {
        $fileName = $this->currentFile['Key'];

        if ($this->definition->isDateInFilename()) {
            $lastModifiedString = substr($this->currentFile['Key'], strrpos($this->currentFile['Key'], '_') + 1, strrpos($this->currentFile['Key'], '.') - strrpos($this->currentFile['Key'], '_') - 1);
            if (strlen($lastModifiedString) == 8) {
                $lastModifiedString .= '000000';
            }
            $lastModified = date_format(date_create_from_format('YmdHis', $lastModifiedString), 'Y-m-d H:i:s');
        } else {
            $lastModified = $this->currentFile['LastModified']->format('Y-m-d H:i:s');
        }

        $this->info("Downloading file...");
        $localPath = $this->downloadFile($fileName);
        $this->info("Building LOAD DATA file...");
        $destinationColumns = $this->getDestinationColumns();
        $loadFilePath = $localPath . '.load';
        $loadColumns = $this->buildLoadFile($localPath, $loadFilePath, $fileName, $lastModified);
        if ($loadColumns === false) return false;

        $this->info("Truncating staging table...");
        $this->db->table("{$this->tableName}_staging")->truncate();

        if (!$this->skipTransactions) $this->db->beginTransaction();

        $this->info("Loading data into staging table...");
        $this->loadData($loadFilePath, $loadColumns);

        $this->info("Updating records table from staging...");
        $this->updateRecords($destinationColumns);

        $this->info("Updating files table from staging...");
        $this->updateFiles();

        $fileId = $this->db->table("{$this->tableName}_files")->where('file_name', '=', $fileName)->where('modified_time', '=', $lastModified)->value('id');
        if ($fileId) {
            $this->info("Deleting old file_record rows...");
            $this->deleteOldRecords($fileId);
        }

        $this->info("Updating file_record table from staging...");
        $this->updateFileRecords();

        if (!$this->skipTransactions) $this->db->commit();

        return true;
    }

    private function getDestinationColumns() {
        $destination_columns = [];
        foreach ($this->definition->fields() as $field) {
            if (!$field->hasImportField()) continue;
            $destination_columns[] = $field->name();
        }
        return $destination_columns;
    }

    private function downloadFile() {
        $localPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . $this->currentFile['Key'];

        $this->info("Downloading file {$this->currentFile['Key']} version {$this->currentFile['LastModified']->format('Y-m-d H:i:s')}...");
        $s3 = AWS::createClient('s3');
        $s3->getObject([
            'Bucket' => config('datamanager.import-bucket'),
            'Key' => $this->currentFile['Key'],
            'VersionId' => $this->currentFile['VersionId'],
            'SaveAs' => $localPath,
        ]);
        return $localPath;
    }

    private function buildLoadFile($localPath, &$loadFilePath, $fileName, $lastModified) {
        $localFileObject = new \SplFileObject($localPath, 'r');
        if ($localFileObject === false) {
            throw new UnableToCreateLoadFileException("Unable to open $localPath");
        }

        $localFileObject->setFlags(
            \SplFileObject::READ_AHEAD
            | \SplFileObject::SKIP_EMPTY
            | \SplFileObject::DROP_NEW_LINE
        );
        
        $localFileObject->seek(PHP_INT_MAX);
        $localFileObject->rewind();

        $fileDelim = $this->definition->getIngestFileDelimiter();
        
        
        $sourceFields = explode($fileDelim, $localFileObject->current());
        $localFileObject->next();

        $fieldColumnMap = $this->definition->getFieldColumnMap();
        $destinationColumns = array_values($fieldColumnMap);

        foreach ($fieldColumnMap as $src => $dest) {
            if (!in_array($src, $sourceFields)) {
                $this->emergency("Import file does not contain field $src", ['source_fields' => $sourceFields, 'field_column_map' => $fieldColumnMap]);
                if (!$this->allVersions) throw new IngestionFieldsMismatchException("Import file does not contain field $src");
                return false;
            }
        }
        if (count($sourceFields) != count($destinationColumns)) {
            $this->warning("Import file field count does not match definition!", ['source_fields' => $sourceFields, 'destination_columns' => $destinationColumns]);
        }

        $loadFileObject = new \SplFileObject($loadFilePath, 'w');
        $currentLine = 0;
        do {
            $outputRow = [];
            $raw = explode($fileDelim, $localFileObject->current());
            if(count($raw) == count($sourceFields)) {
                $inputRow = @array_combine($sourceFields, $raw);
                if ($inputRow !== false) {
                    foreach ($fieldColumnMap as $field => $column) {
                        $outputRow[$column] = str_replace([',', '"'], ['\\,', '\\"'], trim(utf8_encode($inputRow[$field])));
                    }
                    $special_fields = [
                        'record_hash' => md5(implode('', $outputRow)),
                        'file_name' => $fileName,
                        'file_modified_time' => $lastModified,
                        'file_version_id' => $this->currentFile['VersionId'],
                        'sequence' => $currentLine,
                    ];
                    $outputRow = array_merge($special_fields, $outputRow);
                    $loadFileObject->fwrite(implode(',', $outputRow) . "\n");
                } else {
                    $this->warning("Line $currentLine has invalid field count, skipping line.", ['raw_line' => $raw]);
                }
            }

            $localFileObject->next();
            $currentLine++;
        } while (!$localFileObject->eof());
        $loadFileObject->fflush();
        $sortFilePath = "$loadFilePath.sorted";
        $output = $return = false;
        @exec("sort $loadFilePath > $sortFilePath", $output, $return);
        if ($return === 0) $loadFilePath = $sortFilePath;

        return implode(', ', array_keys($outputRow));
    }

    private function loadData($load_file_path, $load_columns) {
        $sql =
            "LOAD DATA LOCAL INFILE '$load_file_path' IGNORE INTO TABLE `{$this->tableName}_staging` FIELDS TERMINATED BY ',' ($load_columns)";
        $this->debug($sql);
        $this->db->getPdo()->exec(str_replace("\\", "/", $sql));
    }

    private function updateRecords() {
        $columns = [];

        foreach ($this->definition->getImportFields() as $field) {
            if ($field->hasTransformation()) {
                $columns[$field->getUntransformedName()] = $field->name();
                $columns[$field->name()] = $field->getTransformation();
            } else {
                $columns[$field->name()] = $field->name();
            }
        }

        $insertString = '';
        $selectString = '';
        $updateString = '';
        foreach ($columns as $destColumn => $sourceColumn) {
            $insertString .= ", $destColumn";
            $selectString .= ", $sourceColumn as $destColumn";
            $updateString .= ", $destColumn = VALUES($destColumn)";
        }

        $sql = <<<SQLEND2
INSERT IGNORE INTO `{$this->tableName}_records` (record_hash, first_modified_time, latest_modified_time $insertString)
SELECT record_hash, file_modified_time, file_modified_time $selectString
FROM `{$this->tableName}_staging`
ON DUPLICATE KEY UPDATE latest_modified_time = VALUES(latest_modified_time) $updateString
SQLEND2;

        $this->debug($sql);
        $this->db->statement($sql);
    }

    private function updateLatest() {
        $this->info("Updating latest table...");
        $historyImportDbName = DataManager::getImportDbName();
        $tableName = $this->tableName;

        $srcFilesString = '';
        $fileRecordWhereString = '';

        $is_append_only = $this->definition->getIsAppendOnly();
        if (!$is_append_only) {
            // Get the list of files
            $sql = <<<SQLEND
SELECT id FROM {$tableName}_files
JOIN (SELECT file_name, MAX(modified_time) max_modified_time FROM {$tableName}_files GROUP BY file_name) maxFiles
ON (maxFiles.file_name = {$tableName}_files.file_name AND maxFiles.max_modified_time = {$tableName}_files.modified_time)
SQLEND;
            $this->debug($sql);
            $result = $this->db->select(DB::raw($sql));
            $srcFiles = [];
            foreach ($result as $file) {
                $srcFiles[$file->id] = $file->id;
            }
            $srcFilesString = "'".implode("','", $srcFiles)."'";

            if (count($srcFiles) > 1) {
                $fileRecordWhereString = "`{$tableName}_file_record`.`file_id` IN ($srcFilesString)";
            } else {
                $fileRecordWhereString = "`{$tableName}_file_record`.`file_id` = $srcFilesString";
            }

            $subQueries = [];
            foreach ($srcFiles as $fileId) {
                $subQueries[] = "SELECT `record_hash` FROM `{$tableName}_file_record` WHERE `{$tableName}_file_record`.`file_id` = $fileId";
            }
            $subQueryString = implode(' UNION ', $subQueries);

            /**
             * Sets first_modified_time of records that are not in the latest set to null.
             * This "deletes" records in the table.
             */
            $sql = <<<SQLEND
UPDATE 
    `{$tableName}_records`
SET 
    `{$tableName}_records`.`first_modified_time` = 0
WHERE 
    `{$tableName}_records`.`first_modified_time` != 0 
    AND `{$tableName}_records`.`record_hash` NOT IN ($subQueryString)
SQLEND;
            $this->debug($sql);
            $this->db->statement($sql);

            /**
             * Sets first_modified_time of records that are in the latest set to their latest time.
             * This un-deletes "deleted" records in the table.
             */
            $sql = <<<SQLEND
UPDATE    
    `{$tableName}_records`
        JOIN
    `{$tableName}_file_record` USING (`record_hash`)
SET
    `{$tableName}_records`.`first_modified_time` = `{$tableName}_records`.`latest_modified_time`
WHERE 
    `{$tableName}_records`.`first_modified_time` = 0 
    AND {$fileRecordWhereString}
SQLEND;
            $this->debug($sql);
            $this->db->statement($sql);
        }

        /**
         * Calculate new count and max.
         */
        $uniqueKey = $this->definition->key();
        $groupBy = implode(', ', $uniqueKey);
        if (!$is_append_only) {
            $sql = <<<SQLEND
SELECT 
    COUNT(DISTINCT $groupBy) AS srcCount, MAX(`first_modified_time`) AS srcMax
FROM
    `{$tableName}_records`
        JOIN
    `{$tableName}_file_record` USING (`record_hash`)
WHERE
    $fileRecordWhereString;
SQLEND;
        } else {
            $sql = <<<SQLEND
SELECT 
    COUNT(DISTINCT $groupBy) AS srcCount, MAX(`first_modified_time`) AS srcMax
FROM
    `{$tableName}_records`    
SQLEND;
        }
        $this->debug($sql);
        $result = $this->db->select(DB::raw($sql));
        $srcCount = $result[0]->srcCount;
        $srcMax = $result[0]->srcMax;

        $sql = "SELECT COUNT(*) as latestCount, max(date_modified) as latestMax FROM `$historyImportDbName`.`{$tableName}_latest`;";
        $this->debug($sql);
        $result = $this->db->select(DB::raw($sql));
        $latestCount = $result[0]->latestCount;;
        $latestMax = $result[0]->latestMax;

        $this->debug("Source Count: $srcCount");
        $this->debug("Latest Count: $latestCount");
        $this->debug("Source Max: $srcMax");
        $this->debug("Latest Max: $latestMax");
        if ($latestCount == $srcCount && $latestMax == $srcMax) {
            $this->info('Latest table is already up to date, skipping!');
            return;
        }

        $columns = [];
        foreach ($this->definition->getImportFields() as $field) {
            $columns[$field->name()] = $field->name();
        }
        $selectString = "`{$tableName}_records`.`record_hash`, `" . implode("`, `{$tableName}_records`.`", array_keys($columns)) . '`, first_modified_time as date_modified';
        $insertString = 'record_hash, ' . implode(', ', array_keys($columns)) . ', date_modified';

        $innerWhere = '';
        $delim = '';
        foreach ($this->definition->key() as $columnName) {
            $innerWhere .= "{$delim}`innerTable`.`{$columnName}` = `{$tableName}_records`.`{$columnName}`";
            $delim = ' AND ';
        }
        $innerSelectString = "SELECT 1 FROM `{$tableName}_records` `innerTable` WHERE {$innerWhere} AND `innerTable`.`first_modified_time` > `{$tableName}_records`.`first_modified_time`";


        if (!$is_append_only) {
            $sql = <<<SQL
INSERT IGNORE INTO `$historyImportDbName`.`{$tableName}_latest` ($insertString)
SELECT $selectString 
FROM `{$tableName}_records`
  JOIN {$tableName}_file_record ON ({$tableName}_file_record.record_hash = {$tableName}_records.record_hash)
WHERE {$fileRecordWhereString}
  AND `{$tableName}_records`.`first_modified_time` != 0
AND NOT EXISTS ({$innerSelectString})
SQL;
        } else {
            $sql = <<<SQL
INSERT IGNORE INTO `$historyImportDbName`.`{$tableName}_latest` ($insertString)
SELECT $selectString FROM `{$tableName}_records`
WHERE NOT EXISTS ({$innerSelectString})
SQL;
        }
        $this->debug($sql);
        $this->db->statement("TRUNCATE TABLE `$historyImportDbName`.`{$tableName}_latest`");
        $this->db->statement($sql);
    }

    private function updateFiles() {
        $sql =
            "INSERT IGNORE INTO `{$this->tableName}_files` (file_name, modified_time, version_id) SELECT s.file_name, s.file_modified_time, s.file_version_id FROM `{$this->tableName}_staging` s LIMIT 1";
        $this->debug($sql);
        $this->db->statement($sql);
    }

    private function deleteOldRecords($file_id) {
        $sql = "DELETE FROM `{$this->tableName}_file_record` where file_id = $file_id";
        $this->debug($sql);
        $this->db->statement($sql);
    }

    private function updateFileRecords() {
        $sql =
            "INSERT IGNORE INTO `{$this->tableName}_file_record` (file_id, record_hash, sequence) SELECT f.id, s.record_hash, s.sequence FROM `{$this->tableName}_staging` s JOIN `{$this->tableName}_files` f ON (f.file_name = s.file_name AND f.modified_time = s.file_modified_time)";
        $this->debug($sql);
        $this->db->statement($sql);
    }

    private function updateStartTime($table, Carbon $time) {
        $timeString = $time->toDateTimeString();
        $sql = <<<SQL
REPLACE INTO ingestions (table_name, start_time, end_time, total_time, state)
VALUES ('$table', '$timeString', null, -1, 'ingesting')
;
SQL;
        $this->debug($sql);
        $this->db->statement($sql);
    }

    private function updateEndTime($table, Carbon $time, $tookSeconds, $humanDiff) {
        $timeString = $time->toDateTimeString();
        $sql = <<<SQL
UPDATE ingestions 
SET end_time = '$timeString', total_time = $tookSeconds, state = 'done in $humanDiff'
WHERE table_name = '$table'
;
SQL;
        $this->debug($sql);
        $this->db->statement($sql);
    }
}