Skip to content

Commit

Permalink
NEW phpoffice/phpword support
Browse files Browse the repository at this point in the history
  • Loading branch information
emteknetnz committed Sep 19, 2023
1 parent 5a6cd32 commit 8e4eb0b
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 17 deletions.
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,29 @@ Install with [composer](https://getcomposer.org/) by running `composer require s

## Configuration

You will need to set the following three environment variables:
**Note:** Using of of docvert is primarily designed for Common Web Platform (CWP) clients

If you are using docver then you will need to set the following three environment variables:
- `DOCVERT_USERNAME`
- `DOCVERT_PASSWORD`
- `DOCVERT_URL`

**Note:** This module is primarily designed for Common Web Platform (CWP) clients. There will be additional setup required to use this module as intended, if you are not using the CWP government edition.
If do not have the cwp/cwp-core module installed then enable document converter with the following configuration - note will be automatically applied if you also have the cwp/cwp-core module installed and the `DOCVERT_USERNAME` environment variable set.

```yaml
Page:
extensions:
- SilverStripe\DocumentConverter\PageExtension
```
By default this module will use docvert, though it's highly recommend you instead use the phpoffice/phpword module instead. Enable this with the following configuration:
```yaml
SilverStripe\DocumentConverter\ImportField:
importer_class: SilverStripe\DocumentConverter\PHPWordImporter
```
docvert support is now deprecated and will be removed in the next major version
## User Guide
Expand Down
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"require": {
"php": "^7.4 || ^8.0",
"silverstripe/cms": "^4",
"silverstripe/asset-admin": "^1"
"silverstripe/asset-admin": "^1",
"phpoffice/phpword": "^1.1"
},
"require-dev": {
"ext-curl": "*",
Expand Down
69 changes: 55 additions & 14 deletions src/ImportField.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
use SilverStripe\Assets\Folder;
use SilverStripe\Assets\Image;
use SilverStripe\Assets\Upload;
use SilverStripe\Core\Config\Config;
use SilverStripe\Control\Controller;
use SilverStripe\Core\Convert;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\Control\Director;
Expand All @@ -25,6 +25,7 @@
use SilverStripe\Versioned\Versioned;
use SilverStripe\View\Parsers\HTMLValue;
use Tidy;
use SilverStripe\AssetAdmin\Controller\AssetAdmin;

/**
* DocumentImporterField is built on top of UploadField to access a document
Expand Down Expand Up @@ -58,10 +59,12 @@ class ImportField extends UploadField

private static $importer_class = ServiceConnector::class;

protected $attachEnabled = false;

/**
* Process the document immediately upon upload.
*/
public function upload(HTTPRequest $request)
public function Xupload(HTTPRequest $request)
{
if ($this->isDisabled() || $this->isReadonly()) {
return $this->httpError(403);
Expand Down Expand Up @@ -94,11 +97,12 @@ public function upload(HTTPRequest $request)

if (!$return['error']) {
// Get options for this import.
$splitHeader = (int)$request->postVar('SplitHeader');
$keepSource = (bool)$request->postVar('KeepSource');
$chosenFolderID = (int)$request->postVar('ChosenFolderID');
$publishPages = (bool)$request->postVar('PublishPages');
$includeTOC = (bool)$request->postVar('IncludeTOC');
$splitHeader = (int) $request->postVar('SplitHeader');
$keepSource = (bool) $request->postVar('KeepSource');
$chosenFolderID = (int) $request->postVar('ChosenFolderID');
$publishPages = (bool) $request->postVar('PublishPages');
$includeTOC = (bool) $request->postVar('IncludeTOC');
// ^^^ These do nothing, they're aren't sent on upload

// Process the document and write the page.
$preservedDocument = null;
Expand All @@ -114,6 +118,31 @@ public function upload(HTTPRequest $request)
}
}

if (($return['error'] ?? 1) == 0) {
// asset-admin UploadField.js considers any error including 0 to be an error
// so simply unset the key if there is no error
unset($return['error']);
}

// generate the same result as UploadField
// note we don't need to do this if there is an actual error because the JSON that's
// returned is good enough to display an error message
if (!isset($return['error'])) {
if ($preservedDocument) {
$file = $preservedDocument;
} else {
// create a temporary File object to return to the client
$upload = Upload::create();
$file = File::create();
$upload->loadIntoFile($tmpfile, $file, $chosenFolderID);
}
$result = AssetAdmin::singleton()->getObjectFromData($file);
$return = array_merge($result, $return);
if (!$keepSource) {
// $file->delete();
}
}

$response = HTTPResponse::create(json_encode([$return]));
$response->addHeader('Content-Type', 'application/json');
return $response;
Expand Down Expand Up @@ -277,7 +306,6 @@ protected function writeContent($subtitle, $subdoc, $subnode, $sort = null, $pub
*/
public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = false, $chosenFolderID = null)
{

$fileDescriptor = [
'name' => $tmpFile['name'],
'path' => $tmpFile['tmp_name'],
Expand All @@ -286,6 +314,7 @@ public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = f

$sourcePage = $this->form->getRecord();
$importerClass = $this->config()->get('importer_class');
/** @var Importer $importer */
$importer = Injector::inst()->create($importerClass, $fileDescriptor, $chosenFolderID);
$content = $importer->import();

Expand Down Expand Up @@ -316,27 +345,39 @@ public function importFromPOST($tmpFile, $splitHeader = false, $publishPages = f
// make sure any images are added as Image records with a relative link to assets
$chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
$folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
$dir = Controller::join_links(ASSETS_DIR, $folderName);
$imgs = $xpath->query('//img');
for ($i = 0; $i < $imgs->length; $i++) {
$img = $imgs->item($i);
$originalPath = 'assets/' . $folderName . '/' . $img->getAttribute('src');
$originalPath = Controller::join_links($dir, $img->getAttribute('src'));
// ignore base64 encoded images which show up when importing using PHPOffice/PHPWord Word2007
// counter-intuitively it seems that we can simply ignore these and images
// are still imported correctly
if (preg_match("#data:image/.+?;base64,#", $originalPath)) {
continue;
}
// base64 inline image - note intentionally not starting regex ^ as it may start with ASSETS_DIR
// if (preg_match("#data:image/(.+?);base64,(.+)$#", $originalPath, $matches)) {
// $ext = $matches[1];
// $contents = $matches[2];
// $filepath = tempnam($dir, 'image') . '.' . $ext;
// file_put_contents($filepath, $contents);
// $originalPath = $filepath;
// }
$name = FileNameFilter::create()->filter(basename($originalPath ?? ''));

$image = Image::get()->filter([
'Name' => $name,
'ParentID' => (int)$chosenFolderID
'ParentID' => (int) $chosenFolderID
])->first();
if (!($image && $image->exists())) {
$image = Image::create();
$image->ParentID = (int)$chosenFolderID;
$image->ParentID = (int) $chosenFolderID;
$image->Name = $name;
$image->write();
}

// make sure it's put in place correctly so Image record knows where it is.
// e.g. in the case of underscores being renamed to dashes.
@rename(Director::getAbsFile($originalPath) ?? '', Director::getAbsFile($image->getFilename()) ?? '');

$img->setAttribute('src', $image->getFilename());
}

Expand Down
10 changes: 10 additions & 0 deletions src/Importer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

namespace SilverStripe\DocumentConverter;

interface Importer
{
public function __construct($fileDescriptor, $chosenFolderID = null);

public function import();
}
53 changes: 53 additions & 0 deletions src/PHPWordImporter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

namespace SilverStripe\DocumentConverter;

use GraphQL\Exception\InvalidArgument;
use SilverStripe\ORM\DataObject;

class PHPWordImporter implements Importer
{
private array $fileDescriptor;
private ?int $chosenFolderID;

/**
* @param array $fileDescriptor
* @param int|null $chosenFolderID
*/
public function __construct($fileDescriptor, $chosenFolderID = null)
{
if (!is_array($fileDescriptor)) {
throw new InvalidArgument('fileDescriptor must be an array');
}
if (!is_int($chosenFolderID) && !is_null($chosenFolderID)) {
throw new InvalidArgument('chosenFolderID must be an int or null');
}
$this->fileDescriptor = $fileDescriptor;
$this->chosenFolderID = $chosenFolderID;
}

/**
* @return string
*/
public function import()
{
// read word doc
$source = $this->fileDescriptor['path'];
$ext = pathinfo($this->fileDescriptor['name'], PATHINFO_EXTENSION);
$readerName = 'Word2007';
if ($ext === 'doc') {
// Word 1997
$readerName = 'MsDoc';
}
$phpWord = \PhpOffice\PhpWord\IOFactory::load($source, $readerName);
// write it out as HTML
$chosenFolder = ($this->chosenFolderID) ? DataObject::get_by_id(Folder::class, $this->chosenFolderID) : null;
$folderName = ($chosenFolder) ? '/' . $chosenFolder->Name : '';
$filepath = tempnam(ASSETS_PATH . $folderName, 'converted');
$objWriter = \PhpOffice\PhpWord\IOFactory::createWriter($phpWord, 'HTML');
$objWriter->save($filepath);
$content = file_get_contents($filepath);
unlink($filepath);
return $content;
}
}
2 changes: 2 additions & 0 deletions src/ServiceConnector.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
use ZipArchive;

/**
* This class uses the legacy docvert service
*
* Utility class hiding the specifics of the document conversion process.
*/
class ServiceConnector
Expand Down

0 comments on commit 8e4eb0b

Please sign in to comment.