diff --git a/README.md b/README.md index 5000d20..2003e59 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,13 @@ # MAXQDA HTML to Ecel The aim of this project is to create a propper excel-file from the html output of the paraphrases. -The project does not need a database. +The project does not need a database-server. It is able to work ith SQLite. + +## stack +- Laravel 8 +- spatie/simple-excel +- PHP 8.1 +- SQLite + +## Licence +This package is free to use as stated by the [LICENCE.md](LICENSE.md) under the MIT License, but you can [buy me a coffee](https://www.buymeacoffee.com/redFreak) if you want :D. diff --git a/app/Console/Commands/ExcelExportCommand.php b/app/Console/Commands/ExcelExportCommand.php new file mode 100644 index 0000000..0e0cbf7 --- /dev/null +++ b/app/Console/Commands/ExcelExportCommand.php @@ -0,0 +1,249 @@ +toDateTimeString()) . '.xlsx') + ); + $writer = $simpleExcelWriter->getWriter(); + + // set header style + /** @var Style $style */ + $style = (new StyleBuilder()) + ->setFontBold() + ->setFontColor(Color::BLACK) + ->setShouldWrapText() + ->setBackgroundColor(Color::LIGHT_BLUE) + ->build(); + $simpleExcelWriter->setHeaderStyle($style); + + $interviews = Interview::all(); + $interview_last = $interviews->last(); + + $interviews->each(function (Interview $interview) use ($writer, $simpleExcelWriter, $interview_last) { + // set the interview name as sheet name + $writer->getCurrentSheet()->setName($interview->id); + + // collect and sort the data + $editors = $interview->editors->unique(); + $paraphrasesByEditor = $this->getParaphrasesByEditors($editors, $interview); + + // write the paraphrases to the sheet + $row = 1; + $editorIterators = $this->getEditorIterators($paraphrasesByEditor); + // add rows + do { + if (!$this->isLoopContinued($editorIterators)) break; + + $rowData = $this->getRowData($row, $editors, $editorIterators); + $simpleExcelWriter->addRow($rowData); + + ++$row; + } while (true); + + if (!$interview->is($interview_last)) { + $writer->addNewSheetAndMakeItCurrent(); + } + }); + + return Command::SUCCESS; + } + + /** + * Sort the paraphrases by editor and within by position (12-45 (id: 42) < 12-43 (6) < 12 (12) < 12 (18)) + * + * @param EloquentCollection $editors + * @param Interview $interview + * + * @return SupportCollection + */ + private function getParaphrasesByEditors(EloquentCollection $editors, Interview $interview): SupportCollection + { + return $editors->mapWithKeys( + fn($editor) => [ + $editor->name => $interview->paraphrases->where('editor_id', $editor->id)->sortBy(function ( + Paraphrase $paraphrase + ) { + // sort order should be 12-47 before 12-46 before 12, but all with 12 in order of reading it to the db + $sortKey = Str::padLeft($paraphrase->position_start, 6, 0); + $sortKey .= ($paraphrase->position_start === $paraphrase->position_end ? 999999 : Str::padLeft($paraphrase->position_end, + 6, 0)); + + return $sortKey.Str::padLeft($paraphrase->id, 6, 0); + }) + ] + ); + } + + /** + * @param SupportCollection $paraphrasesByEditor + * + * @return SupportCollection + */ + private function getEditorIterators(SupportCollection $paraphrasesByEditor): SupportCollection + { + $editorIterators = new SupportCollection(); + $paraphrasesByEditor->each(function (EloquentCollection $paraphrases, string $editorName) use (&$editorIterators + ) { + // make sure we iterate form the beginning and add the iterator + $iterator = $paraphrases->getIterator(); +// $iterator->rewind(); + $editorIterators->add( + $iterator + ); + }); + + return $editorIterators; + }/** + * @param int $row + * @param EloquentCollection $editors + * @param SupportCollection $editorIterators + * + * @return array + */ + private function getRowData(int $row, EloquentCollection $editors, SupportCollection $editorIterators): array + { + $rowData = [ + '#' => $row, + ]; + + // collect the current paraphrases + $currentParaphrases = $this->getCurrentParaphrases($editors, $editorIterators); + + // get the minimum start position + $positionStart = $currentParaphrases + ->min(fn(?Paraphrase $paraphrase) => $paraphrase?->position_start ?? 999999); + // get the maximum end position + $positionEnd = $currentParaphrases + ->where('position_start', '=', $positionStart) + ->max(fn(?Paraphrase $paraphrase) => $paraphrase->position_end); + + // write the cells per editor + $editors->each(function (Editor $editor, int $editor_index) use ( + &$editorIterators, + $currentParaphrases, + $positionStart, + $positionEnd, + &$rowData + ) { + $rowData = $this->getParaphraseCells($currentParaphrases, $editor_index, $positionStart, $positionEnd, + $editor, $editorIterators, $rowData); + }); + $rowData['pos.'] = ($positionStart === $positionEnd ? $positionStart : $positionStart.' - '.$positionEnd); + $rowData['final paraphrase'] = ''; + $rowData['parent encoding'] = ''; + $rowData['category'] = ''; + + return $rowData; + } + + /** + * @param SupportCollection $editorIterators + * + * @return bool + */ + private function isLoopContinued(SupportCollection $editorIterators): bool + { + $continueLoop = false; + $editorIterators->each(function (Iterator $editorIterator) use (&$continueLoop) { + $continueLoop = $continueLoop || $editorIterator->valid(); + }); + + return $continueLoop; + } + + /** + * @param EloquentCollection $editors + * @param SupportCollection $editorIterators + * + * @return EloquentCollection + */ + private function getCurrentParaphrases( + EloquentCollection $editors, + SupportCollection $editorIterators + ): EloquentCollection { + $currentParaphrases = new EloquentCollection(); + $editors->each( + fn( + Editor $editor, + int $editor_index + ) => $currentParaphrases->add($editorIterators->get($editor_index)->current()) + ); + + return $currentParaphrases; + } + + /** + * @param EloquentCollection $currentParaphrases + * @param int $editor_index + * @param mixed $positionStart + * @param mixed $positionEnd + * @param Editor $editor + * @param SupportCollection $editorIterators + * @param array $rowData + * + * @return array + */ + private function getParaphraseCells( + EloquentCollection $currentParaphrases, + int $editor_index, + mixed $positionStart, + mixed $positionEnd, + Editor $editor, + SupportCollection $editorIterators, + array $rowData + ): array { + /** @var Paraphrase $paraphrase */ + $paraphrase = $currentParaphrases->get($editor_index); + if ($paraphrase?->position_start === $positionStart && $paraphrase->position_end === $positionEnd) { + // write the data + $rowData[$editor->name] = $paraphrase->paraphrase; + // and advance the iterator + $editorIterators->get($editor_index)->next(); + } else { + // write an empty cell + $rowData[$editor->name] = '-'; + } + + return $rowData; + } +} diff --git a/app/Models/Editor.php b/app/Models/Editor.php index 0a84020..dbd08cf 100644 --- a/app/Models/Editor.php +++ b/app/Models/Editor.php @@ -26,6 +26,6 @@ public function paraphrases() public function interviews() { - return $this->hasManyThrough(Interview::class, Paraphrase::class); + return $this->belongsToMany(Interview::class, 'paraphrases'); } } diff --git a/app/Models/Interview.php b/app/Models/Interview.php index e27b384..c663d15 100644 --- a/app/Models/Interview.php +++ b/app/Models/Interview.php @@ -26,6 +26,6 @@ public function paraphrases() public function editors() { - return $this->hasManyThrough(Editor::class, Paraphrase::class); + return $this->belongsToMany(Editor::class, 'paraphrases'); } } diff --git a/composer.json b/composer.json index 19c00e4..4814663 100644 --- a/composer.json +++ b/composer.json @@ -9,7 +9,8 @@ "guzzlehttp/guzzle": "^7.2", "laravel/framework": "^9.19", "laravel/sanctum": "^3.0", - "laravel/tinker": "^2.7" + "laravel/tinker": "^2.7", + "spatie/simple-excel": "^2.3" }, "require-dev": { "roave/security-advisories": "dev-latest", diff --git a/composer.lock b/composer.lock index 6cc57d0..d68452f 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "f154b5e3b0edc297ab69101870f9458d", + "content-hash": "0f2087b3a8a182be6bb57a651d7ca367", "packages": [ { "name": "brick/math", @@ -2104,6 +2104,95 @@ ], "time": "2022-10-28T22:51:32+00:00" }, + { + "name": "openspout/openspout", + "version": "v3.7.4", + "source": { + "type": "git", + "url": "https://github.com/openspout/openspout.git", + "reference": "dfbbd53b5edcd486b45a37f6a04fac33073c70f3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/openspout/openspout/zipball/dfbbd53b5edcd486b45a37f6a04fac33073c70f3", + "reference": "dfbbd53b5edcd486b45a37f6a04fac33073c70f3", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-filter": "*", + "ext-libxml": "*", + "ext-xmlreader": "*", + "ext-zip": "*", + "php": "~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0" + }, + "require-dev": { + "ext-zlib": "*", + "friendsofphp/php-cs-fixer": "^3.4", + "phpstan/phpstan": "^1.4", + "phpstan/phpstan-phpunit": "^1.0", + "phpunit/phpunit": "^9.5" + }, + "suggest": { + "ext-iconv": "To handle non UTF-8 CSV files (if \"php-intl\" is not already installed or is too limited)", + "ext-intl": "To handle non UTF-8 CSV files (if \"iconv\" is not already installed)" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.3.x-dev" + } + }, + "autoload": { + "psr-4": { + "OpenSpout\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Adrien Loison", + "email": "adrien@box.com" + } + ], + "description": "PHP Library to read and write spreadsheet files (CSV, XLSX and ODS), in a fast and scalable way", + "homepage": "https://github.com/openspout/openspout", + "keywords": [ + "OOXML", + "csv", + "excel", + "memory", + "odf", + "ods", + "office", + "open", + "php", + "read", + "scale", + "spreadsheet", + "stream", + "write", + "xlsx" + ], + "support": { + "issues": "https://github.com/openspout/openspout/issues", + "source": "https://github.com/openspout/openspout/tree/v3.7.4" + }, + "funding": [ + { + "url": "https://paypal.me/filippotessarotto", + "type": "custom" + }, + { + "url": "https://github.com/Slamdunk", + "type": "github" + } + ], + "time": "2022-03-31T06:15:15+00:00" + }, { "name": "phpoption/phpoption", "version": "1.9.0", @@ -2836,6 +2925,65 @@ ], "time": "2022-09-16T03:22:46+00:00" }, + { + "name": "spatie/simple-excel", + "version": "2.3.0", + "source": { + "type": "git", + "url": "https://github.com/spatie/simple-excel.git", + "reference": "741527835370b34134d685324f8502cae6735fed" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/spatie/simple-excel/zipball/741527835370b34134d685324f8502cae6735fed", + "reference": "741527835370b34134d685324f8502cae6735fed", + "shasum": "" + }, + "require": { + "illuminate/support": "^8.71|^9.0", + "openspout/openspout": "^3.0", + "php": "^8.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.4", + "spatie/phpunit-snapshot-assertions": "^4.0", + "spatie/temporary-directory": "^1.2" + }, + "type": "library", + "autoload": { + "psr-4": { + "Spatie\\SimpleExcel\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Freek Van der Herten", + "email": "freek@spatie.be", + "homepage": "https://spatie.be", + "role": "Developer" + } + ], + "description": "Read and write simple Excel and CSV files", + "homepage": "https://github.com/spatie/simple-excel", + "keywords": [ + "simple-excel", + "spatie" + ], + "support": { + "source": "https://github.com/spatie/simple-excel/tree/2.3.0" + }, + "funding": [ + { + "url": "https://github.com/spatie", + "type": "github" + } + ], + "time": "2022-10-11T11:37:13+00:00" + }, { "name": "symfony/console", "version": "v6.1.7", diff --git a/storage/exports/.gitignore b/storage/exports/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/storage/exports/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore