From 8e4eb0b2a101449e10d82b2f8163090d39aab5d3 Mon Sep 17 00:00:00 2001 From: Steve Boyd Date: Wed, 20 Sep 2023 10:36:29 +1200 Subject: [PATCH] NEW phpoffice/phpword support --- README.md | 21 ++++++++++-- composer.json | 3 +- src/ImportField.php | 69 ++++++++++++++++++++++++++++++++-------- src/Importer.php | 10 ++++++ src/PHPWordImporter.php | 53 ++++++++++++++++++++++++++++++ src/ServiceConnector.php | 2 ++ 6 files changed, 141 insertions(+), 17 deletions(-) create mode 100644 src/Importer.php create mode 100644 src/PHPWordImporter.php diff --git a/README.md b/README.md index 272c0fd..8f24f1e 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,29 @@ Install with [composer](https://getcomposer.org/) by running `composer require s ## Configuration -You will need to set the following three environment variables: +**Note:** Using of of docvert is primarily designed for Common Web Platform (CWP) clients + +If you are using docver then you will need to set the following three environment variables: - `DOCVERT_USERNAME` - `DOCVERT_PASSWORD` - `DOCVERT_URL` -**Note:** This module is primarily designed for Common Web Platform (CWP) clients. There will be additional setup required to use this module as intended, if you are not using the CWP government edition. +If do not have the cwp/cwp-core module installed then enable document converter with the following configuration - note will be automatically applied if you also have the cwp/cwp-core module installed and the `DOCVERT_USERNAME` environment variable set. + +```yaml +Page: + extensions: + - SilverStripe\DocumentConverter\PageExtension +``` + +By default this module will use docvert, though it's highly recommend you instead use the phpoffice/phpword module instead. Enable this with the following configuration: + +```yaml +SilverStripe\DocumentConverter\ImportField: + importer_class: SilverStripe\DocumentConverter\PHPWordImporter +``` + +docvert support is now deprecated and will be removed in the next major version ## User Guide diff --git a/composer.json b/composer.json index 4cfe5dd..09b9ab0 100644 --- a/composer.json +++ b/composer.json @@ -10,7 +10,8 @@ "require": { "php": "^7.4 || ^8.0", "silverstripe/cms": "^4", - "silverstripe/asset-admin": "^1" + "silverstripe/asset-admin": "^1", + "phpoffice/phpword": "^1.1" }, "require-dev": { "ext-curl": "*", diff --git a/src/ImportField.php b/src/ImportField.php index 8065bcc..8efdece 100644 --- a/src/ImportField.php +++ b/src/ImportField.php @@ -13,7 +13,7 @@ use SilverStripe\Assets\Folder; use SilverStripe\Assets\Image; use SilverStripe\Assets\Upload; -use SilverStripe\Core\Config\Config; +use SilverStripe\Control\Controller; use SilverStripe\Core\Convert; use SilverStripe\Core\Injector\Injector; use SilverStripe\Control\Director; @@ -25,6 +25,7 @@ use SilverStripe\Versioned\Versioned; use SilverStripe\View\Parsers\HTMLValue; use Tidy; +use SilverStripe\AssetAdmin\Controller\AssetAdmin; /** * DocumentImporterField is built on top of UploadField to access a document @@ -58,10 +59,12 @@ class ImportField extends UploadField private static $importer_class = ServiceConnector::class; + protected $attachEnabled = false; + /** * Process the document immediately upon upload. */ - public function upload(HTTPRequest $request) + public function Xupload(HTTPRequest $request) { if ($this->isDisabled() || $this->isReadonly()) { return $this->httpError(403); @@ -94,11 +97,12 @@ public function upload(HTTPRequest $request) if (!$return['error']) { // Get options for this import. - $splitHeader = (int)$request->postVar('SplitHeader'); - $keepSource = (bool)$request->postVar('KeepSource'); - $chosenFolderID = (int)$request->postVar('ChosenFolderID'); - $publishPages = (bool)$request->postVar('PublishPages'); - $includeTOC = (bool)$request->postVar('IncludeTOC'); + $splitHeader = (int) $request->postVar('SplitHeader'); + $keepSource = (bool) $request->postVar('KeepSource'); + $chosenFolderID = (int) $request->postVar('ChosenFolderID'); + $publishPages = (bool) $request->postVar('PublishPages'); + $includeTOC = (bool) $request->postVar('IncludeTOC'); + // ^^^ These do nothing, they're aren't sent on upload // Process the document and write the page. $preservedDocument = null; @@ -114,6 +118,31 @@ public function upload(HTTPRequest $request) } } + if (($return['error'] ?? 1) == 0) { + // asset-admin UploadField.js considers any error including 0 to be an error + // so simply unset the key if there is no error + unset($return['error']); + } + + // generate the same result as UploadField + // note we don't need to do this if there is an actual error because the JSON that's + // returned is good enough to display an error message + if (!isset($return['error'])) { + if ($preservedDocument) { + $file = $preservedDocument; + } else { + // create a temporary File object to return to the client + $upload = Upload::create(); + $file = File::create(); + $upload->loadIntoFile($tmpfile, $file, $chosenFolderID); + } + $result = AssetAdmin::singleton()->getObjectFromData($file); + $return = array_merge($result, $return); + if (!$keepSource) { + // $file->delete(); + } + } + $response = HTTPResponse::create(json_encode([$return])); $response->addHeader('Content-Type', 'application/json'); return $response; @@ -277,7 +306,6 @@ protected function writeContent($subtitle, $subdoc, $subnode, $sort = null, $pub */ public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = false, $chosenFolderID = null) { - $fileDescriptor = [ 'name' => $tmpFile['name'], 'path' => $tmpFile['tmp_name'], @@ -286,6 +314,7 @@ public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = f $sourcePage = $this->form->getRecord(); $importerClass = $this->config()->get('importer_class'); + /** @var Importer $importer */ $importer = Injector::inst()->create($importerClass, $fileDescriptor, $chosenFolderID); $content = $importer->import(); @@ -316,27 +345,39 @@ public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = f // make sure any images are added as Image records with a relative link to assets $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; + $dir = Controller::join_links(ASSETS_DIR, $folderName); $imgs = $xpath->query('//img'); for ($i = 0; $i < $imgs->length; $i++) { $img = $imgs->item($i); - $originalPath = 'assets/' . $folderName . '/' . $img->getAttribute('src'); + $originalPath = Controller::join_links($dir, $img->getAttribute('src')); + // ignore base64 encoded images which show up when importing using PHPOffice/PHPWord Word2007 + // counter-intuitively it seems that we can simply ignore these and images + // are still imported correctly + if (preg_match("#data:image/.+?;base64,#", $originalPath)) { + continue; + } + // base64 inline image - note intentionally not starting regex ^ as it may start with ASSETS_DIR + // if (preg_match("#data:image/(.+?);base64,(.+)$#", $originalPath, $matches)) { + // $ext = $matches[1]; + // $contents = $matches[2]; + // $filepath = tempnam($dir, 'image') . '.' . $ext; + // file_put_contents($filepath, $contents); + // $originalPath = $filepath; + // } $name = FileNameFilter::create()->filter(basename($originalPath ?? '')); - $image = Image::get()->filter([ 'Name' => $name, - 'ParentID' => (int)$chosenFolderID + 'ParentID' => (int) $chosenFolderID ])->first(); if (!($image && $image->exists())) { $image = Image::create(); - $image->ParentID = (int)$chosenFolderID; + $image->ParentID = (int) $chosenFolderID; $image->Name = $name; $image->write(); } - // make sure it's put in place correctly so Image record knows where it is. // e.g. in the case of underscores being renamed to dashes. @rename(Director::getAbsFile($originalPath) ?? '', Director::getAbsFile($image->getFilename()) ?? ''); - $img->setAttribute('src', $image->getFilename()); } diff --git a/src/Importer.php b/src/Importer.php new file mode 100644 index 0000000..0314fe6 --- /dev/null +++ b/src/Importer.php @@ -0,0 +1,10 @@ +fileDescriptor = $fileDescriptor; + $this->chosenFolderID = $chosenFolderID; + } + + /** + * @return string + */ + public function import() + { + // read word doc + $source = $this->fileDescriptor['path']; + $ext = pathinfo($this->fileDescriptor['name'], PATHINFO_EXTENSION); + $readerName = 'Word2007'; + if ($ext === 'doc') { + // Word 1997 + $readerName = 'MsDoc'; + } + $phpWord = \PhpOffice\PhpWord\IOFactory::load($source, $readerName); + // write it out as HTML + $chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null; + $folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : ''; + $filepath = tempnam(ASSETS_PATH . $folderName, 'converted'); + $objWriter = \PhpOffice\PhpWord\IOFactory::createWriter($phpWord, 'HTML'); + $objWriter->save($filepath); + $content = file_get_contents($filepath); + unlink($filepath); + return $content; + } +} diff --git a/src/ServiceConnector.php b/src/ServiceConnector.php index c2687c1..70b3da6 100644 --- a/src/ServiceConnector.php +++ b/src/ServiceConnector.php @@ -11,6 +11,8 @@ use ZipArchive; /** + * This class uses the legacy docvert service + * * Utility class hiding the specifics of the document conversion process. */ class ServiceConnector