Skip to content

Commit

Permalink
Merge pull request #4233 from oleibman/nodtdload
Browse files Browse the repository at this point in the history
Ignore Settings::libXmlLoaderOptions
  • Loading branch information
oleibman authored Nov 22, 2024
2 parents c01b94b + 343d26a commit fb74dcd
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 52 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com)
and this project adheres to [Semantic Versioning](https://semver.org).

## TBD - 3.5.0
## 2024-11-22 - 3.5.0

### Added

- Nothing yet.

### Changed

- Nothing yet.
- Settings::libXmlLoaderOptions is ignored. [PR #4233](https://github.com/PHPOffice/PhpSpreadsheet/pull/4233)

### Moved

- Nothing yet.

### Deprecated

- Settings::setLibXmlLoaderOptions() and Settings::getLibXmlLoaderOptions() are no longer needed - no replacement.
- Worksheet::getHashCode is no longer needed.

### Fixed
Expand Down
1 change: 0 additions & 1 deletion docs/topics/reading-and-writing-to-file.md
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ versions of Microsoft Excel.
**Excel 2003 XML limitations** Please note that Excel 2003 XML format
has some limits regarding to styling cells and handling large
spreadsheets via PHP.
Also, only files using charset UTF-8 or ISO-8859-* are supported.

### \PhpOffice\PhpSpreadsheet\Reader\Xml

Expand Down
7 changes: 3 additions & 4 deletions src/PhpSpreadsheet/Reader/Gnumeric.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
Expand Down Expand Up @@ -104,7 +103,7 @@ public function listWorksheetNames(string $filename): array

$xml = new XMLReader();
$contents = $this->gzfileGetContents($filename);
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
$xml->xml($contents);
$xml->setParserProperty(2, true);

$worksheetNames = [];
Expand Down Expand Up @@ -133,7 +132,7 @@ public function listWorksheetInfo(string $filename): array

$xml = new XMLReader();
$contents = $this->gzfileGetContents($filename);
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
$xml->xml($contents);
$xml->setParserProperty(2, true);

$worksheetInfo = [];
Expand Down Expand Up @@ -248,7 +247,7 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp

/** @var XmlScanner */
$securityScanner = $this->securityScanner;
$xml2 = simplexml_load_string($securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
$xml2 = simplexml_load_string($securityScanner->scan($gFileData));
$xml = self::testSimpleXml($xml2);

$gnmXML = $xml->children(self::NAMESPACE_GNM);
Expand Down
2 changes: 1 addition & 1 deletion src/PhpSpreadsheet/Reader/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Html extends BaseReader

private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';

private const DECLARES_CHARSET = '/ charset=/i';
private const DECLARES_CHARSET = '/\\bcharset=/i';

/**
* Input encoding.
Expand Down
41 changes: 22 additions & 19 deletions src/PhpSpreadsheet/Reader/Ods.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
Expand Down Expand Up @@ -58,9 +57,12 @@ public function canRead(string $filename): bool
$mimeType = $zip->getFromName($stat['name']);
} elseif ($zip->statName('META-INF/manifest.xml')) {
$xml = simplexml_load_string(
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('META-INF/manifest.xml')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan(
$zip->getFromName(
'META-INF/manifest.xml'
)
)
);
if ($xml !== false) {
$namespacesContent = $xml->getNamespaces(true);
Expand Down Expand Up @@ -98,9 +100,10 @@ public function listWorksheetNames(string $filename): array

$xml = new XMLReader();
$xml->xml(
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
null,
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scanFile(
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE
)
);
$xml->setParserProperty(2, true);

Expand Down Expand Up @@ -145,9 +148,10 @@ public function listWorksheetInfo(string $filename): array

$xml = new XMLReader();
$xml->xml(
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
null,
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scanFile(
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE
)
);
$xml->setParserProperty(2, true);

Expand Down Expand Up @@ -254,9 +258,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
// Meta

$xml = @simplexml_load_string(
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('meta.xml')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan($zip->getFromName('meta.xml'))
);
if ($xml === false) {
throw new Exception('Unable to read data from {$pFilename}');
Expand All @@ -270,8 +273,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp

$dom = new DOMDocument('1.01', 'UTF-8');
$dom->loadXML(
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('styles.xml')),
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan($zip->getFromName('styles.xml'))
);

$pageSettings = new PageSettings($dom);
Expand All @@ -280,8 +283,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp

$dom = new DOMDocument('1.01', 'UTF-8');
$dom->loadXML(
$this->getSecurityScannerOrThrow()->scan($zip->getFromName(self::INITIAL_FILE)),
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan($zip->getFromName(self::INITIAL_FILE))
);

$officeNs = (string) $dom->lookupNamespaceUri('office');
Expand Down Expand Up @@ -690,8 +693,8 @@ private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): voi
{
$dom = new DOMDocument('1.01', 'UTF-8');
$dom->loadXML(
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('settings.xml')),
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan($zip->getFromName('settings.xml'))
);
//$xlinkNs = $dom->lookupNamespaceUri('xlink');
$configNs = (string) $dom->lookupNamespaceUri('config');
Expand Down
22 changes: 11 additions & 11 deletions src/PhpSpreadsheet/Reader/Xlsx.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\WorkbookView;
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
use PhpOffice\PhpSpreadsheet\Shared\Drawing;
use PhpOffice\PhpSpreadsheet\Shared\File;
Expand Down Expand Up @@ -123,7 +122,7 @@ private function loadZip(string $filename, string $ns = '', bool $replaceUnclose
$rels = @simplexml_load_string(
$this->getSecurityScannerOrThrow()->scan($contents),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions(),
0,
$ns
);

Expand All @@ -138,7 +137,7 @@ private function loadZipNonamespace(string $filename, string $ns): SimpleXMLElem
$rels = simplexml_load_string(
$this->getSecurityScannerOrThrow()->scan($contents),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions(),
0,
($ns === '' ? $ns : '')
);

Expand Down Expand Up @@ -245,11 +244,13 @@ public function listWorksheetInfo(string $filename): array

$xml = new XMLReader();
$xml->xml(
$this->getSecurityScannerOrThrow()->scan(
$this->getFromZipArchive($this->zip, $fileWorksheetPath)
),
null,
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan(
$this->getFromZipArchive(
$this->zip,
$fileWorksheetPath
)
)
);
$xml->setParserProperty(2, true);

Expand Down Expand Up @@ -2001,9 +2002,8 @@ private function readRibbon(Spreadsheet $excel, string $customUITarget, ZipArchi
if ($dataRels) {
// exists and not empty if the ribbon have some pictures (other than internal MSO)
$UIRels = simplexml_load_string(
$this->getSecurityScannerOrThrow()->scan($dataRels),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan($dataRels)
);
if (false !== $UIRels) {
// we need to save id and target to avoid parsing customUI.xml and "guess" if it's a pseudo callback who load the image
Expand Down
5 changes: 1 addition & 4 deletions src/PhpSpreadsheet/Reader/Xlsx/Properties.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use PhpOffice\PhpSpreadsheet\Document\Properties as DocumentProperties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Settings;
use SimpleXMLElement;

class Properties
Expand All @@ -23,9 +22,7 @@ private function extractPropertyData(string $propertyData): ?SimpleXMLElement
{
// okay to omit namespace because everything will be processed by xpath
$obj = simplexml_load_string(
$this->securityScanner->scan($propertyData),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
$this->securityScanner->scan($propertyData)
);

return $obj === false ? null : $obj;
Expand Down
6 changes: 2 additions & 4 deletions src/PhpSpreadsheet/Reader/Xml.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
Expand Down Expand Up @@ -132,9 +131,8 @@ private function trySimpleXMLLoadStringPrivate(string $filename, string $fileOrS
}
if ($continue) {
$xml = @simplexml_load_string(
$this->getSecurityScannerOrThrow()->scan($data),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
$this->getSecurityScannerOrThrow()
->scan($data)
);
}
} catch (Throwable $e) {
Expand Down
10 changes: 5 additions & 5 deletions src/PhpSpreadsheet/Settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ public static function htmlEntityFlags(): int
* Set default options for libxml loader.
*
* @param ?int $options Default options for libxml loader
*
* @deprecated 3.5.0 no longer needed
*/
public static function setLibXmlLoaderOptions(?int $options): int
{
Expand All @@ -110,14 +112,12 @@ public static function setLibXmlLoaderOptions(?int $options): int
* Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
*
* @return int Default options for libxml loader
*
* @deprecated 3.5.0 no longer needed
*/
public static function getLibXmlLoaderOptions(): int
{
if (self::$libXmlLoaderOptions === null) {
return self::setLibXmlLoaderOptions(null);
}

return self::$libXmlLoaderOptions;
return self::$libXmlLoaderOptions ?? (defined('LIBXML_DTDLOAD') ? (LIBXML_DTDLOAD | LIBXML_DTDATTR) : 0);
}

/**
Expand Down
1 change: 1 addition & 0 deletions tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public static function providerCharset(): array
['charset.UTF-16.lebom.html', 'À1'],
['charset.gb18030.html', '电视机'],
['charset.unknown.html', 'exception'],
['xhtml4.entity.xhtml', 'exception'],
];
}
}
2 changes: 1 addition & 1 deletion tests/data/Reader/HTML/charset.ISO-8859-1.html4.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html lang='en'>
<head>
<meta http-equiv="Content-Type" content="text/html; CHARSET=ISO-8859-1">
<meta http-equiv="Content-Type" content="text/html;CHARSET=ISO-8859-1">
<title>ISO-8859-1 Html4 Doctype and Meta</title>
</head>
<body>
Expand Down
17 changes: 17 additions & 0 deletions tests/data/Reader/HTML/xhtml4.entity.xhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" [
<!ENTITY test "It worked">
]>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml;charset=utf-8" />
<title>HTML Entities</title>
</head>
<body>
<table>
<tbody>
<tr><td>&test;</td></tr>
</tbody>
</table>
</body>
</html>

0 comments on commit fb74dcd

Please sign in to comment.