Skip to content

Commit

Permalink
Prepare 0.3.0 Release
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Sep 28, 2023
1 parent e415048 commit 1506a96
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 25 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

All Notable changes to `bakame/html-table` will be documented in this file.

## [0.3.0](https://github.com/bakame-php/html-table/compare/0.2.0...0.3.0) - 2023-09-27
## [0.3.0](https://github.com/bakame-php/html-table/compare/0.2.0...0.3.0) - 2023-09-28

### Added

Expand All @@ -14,6 +14,7 @@ All Notable changes to `bakame/html-table` will be documented in this file.

- Improve identifier validation for `Parser::tablePosition`
- Remove the `$tableOffset` property.
- `tableHeader` can now re-arrange the table column and remove any unwanted column.

### Deprecated

Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,19 @@ $parser = Parser::new()->tableHeader(['rank', 'team', 'winner']);
```
**If you specify a non-empty array as the table header, it will take precedence over any other table header related options.**
**Because it is a tabular data each cell MUST be unique otherwise an exception will be thrown**
You can skip some of the source column by skipping them by their offsets
```php
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
$parser = Parser::new()->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team']);
// only 3 column will be extracted the 4th, 6th and 8th column
// and re-arrange as 'rank' first and 'team' last
```
### includeSection and excludeSection
Tells which section should be parsed based on the `Section` enum
Expand All @@ -239,8 +249,8 @@ Tells which section should be parsed based on the `Section` enum
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
$parser = Parser::new()->includeSection(Section::thead); // thead is included during parsing
$parser = Parser::new()->excludeSection(Section::tr); // table direct tr children are not included during parsing
$parser = Parser::new()->includeSection(Section::tbody); // thead and tfoot are included during parsing
$parser = Parser::new()->excludeSection(Section::tr, Section::tfoot); // table direct tr children and tfoot are not included during parsing
```
**By default, the `thead` section is not parse. If a `thead` row is selected to be the header, it will
Expand Down
4 changes: 3 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,22 @@
],
"require": {
"ext-dom": "*",
"ext-json": "*",
"ext-libxml": "*",
"ext-mbstring": "*",
"ext-simplexml": "*",
"league/csv": "^9.11.0"
},
"require-dev": {
"ext-curl": "*",
"ext-xdebug": "*",
"friendsofphp/php-cs-fixer": "^v3.28.0",
"laravel/prompts": "^0.1.9",
"phpstan/phpstan": "^1.10.35",
"phpstan/phpstan-deprecation-rules": "^1.1.4",
"phpstan/phpstan-phpunit": "^1.3.14",
"phpstan/phpstan-strict-rules": "^1.5.1",
"phpunit/phpunit": "^10.3.5",
"symfony/css-selector": "^6.3",
"symfony/var-dumper": "^6.3.4"
},
"autoload": {
Expand Down
45 changes: 29 additions & 16 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,15 @@
use Iterator;
use League\Csv\ResultSet;
use League\Csv\SyntaxError;
use League\Csv\TabularDataReader;
use SimpleXMLElement;
use Stringable;

use function array_combine;
use function array_fill;
use function array_filter;
use function array_key_exists;
use function array_merge;
use function array_pad;
use function array_shift;
use function array_slice;
use function array_unique;
use function count;
use function fclose;
use function fopen;
use function in_array;
Expand Down Expand Up @@ -193,10 +188,12 @@ public function tableHeaderPosition(Section $section, int $offset = 0): self
};
}

public function includeSection(Section $section): self
public function includeSection(Section ...$sections): self
{
$includedSections = $this->includedSections;
$includedSections[$section->value] = 1;
foreach ($sections as $section) {
$includedSections[$section->value] = 1;
}

return match ($this->includedSections) {
$includedSections => $this,
Expand All @@ -213,10 +210,12 @@ public function includeSection(Section $section): self
};
}

public function excludeSection(Section $section): self
public function excludeSection(Section ...$sections): self
{
$includedSections = $this->includedSections;
unset($includedSections[$section->value]);
foreach ($sections as $section) {
unset($includedSections[$section->value]);
}

return match ($this->includedSections) {
$includedSections => $this,
Expand Down Expand Up @@ -322,7 +321,7 @@ public function tableCaption(?string $caption = null): self
* @throws ParserError
* @throws SyntaxError
*/
public function parseFile(mixed $filenameOrStream, $filenameContext = null): TabularDataReader
public function parseFile(mixed $filenameOrStream, $filenameContext = null): Table
{
if (is_resource($filenameOrStream)) {
return $this->parseHtml($this->streamToString($filenameOrStream));
Expand Down Expand Up @@ -374,7 +373,7 @@ public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|str
$result = $xpath->query('(//caption)[1]');
$caption = $result->item(0)?->nodeValue ?? $this->caption;

return new Table(new ResultSet($this->extractTableContents($xpath, $header), $header), $caption);
return new Table(new ResultSet($this->extractTableContents($xpath, $header), array_values($header)), $caption);
}

/**
Expand Down Expand Up @@ -564,16 +563,30 @@ private function extractRecord(DOMElement $tr, array &$rowSpanIndices = []): arr
*/
private function formatRecord(array $record, array $header): array
{
$cellCount = count($header);
$record = match ($cellCount) {
0 => $record,
count($record) => array_combine($header, $record),
default => array_combine($header, array_slice(array_pad($record, $cellCount, ''), 0, $cellCount)),
$record = match ([]) {
$header => $record,
default => $this->combineArray($record, $header),
};

return match (null) {
$this->formatter => $record,
default => ($this->formatter)($record),
};
}

/**
* @param array<string> $record
* @param array<string> $header
*
* @return array<string, string|null>
*/
private function combineArray(array $record, array $header): array
{
$row = [];
foreach ($header as $offset => $value) {
$row[$value] = $record[$offset] ?? null;
}

return $row;
}
}
46 changes: 42 additions & 4 deletions src/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ public function it_will_return_the_same_options(): void
$parser
->tablePosition(0)
->tableHeaderPosition(Section::thead, 0)
->includeSection(Section::tbody)
->includeSection(Section::tfoot)
->includeSection(Section::tr)
->includeSection(Section::tbody, Section::tfoot, Section::tr)
->tableHeader([])
->resolveTableHeader()
->ignoreXmlErrors()
Expand Down Expand Up @@ -286,6 +284,47 @@ public function it_will_use_the_submitted_headers(): void
], $table->first());
}


#[Test]
public function it_will_rearrange_the_content_with_table_header(): void
{
$html = <<<TABLE
<table class="table-csv-data" id="testb">
<tfoot>
<tr data-record-offset="5"><td title="prenoms">Abel</td><td title="nombre">14</td><td title="sexe">M</td><td title="annee">2004</td></tr>
<tr data-record-offset="6"><td title="prenoms">Abiga</td><td title="nombre">6</td><td title="sexe">F</td><td title="annee">2004</td></tr>
<tr data-record-offset="7"><td title="prenoms">Aboubacar</td><td title="nombre">8</td><td title="sexe">M</td><td title="annee">2004</td></tr>
<tr data-record-offset="8"><td title="prenoms">Aboubakar</td><td title="nombre">6</td><td title="sexe">M</td><td title="annee">2004</td></tr>
</tfoot>
</table>
TABLE;

$header = [3 => 'Annee', 2 => 'Sexe', 0 => 'Firstname', 1 => 'Count'];
$table = Parser::new()
->tableHeader($header)
->parseHtml($html);

self::assertSame($table->getHeader(), array_values($header));
self::assertSame([
'Annee' => '2004',
'Sexe' => 'M',
'Firstname' => 'Abel',
'Count' => '14',
], $table->first());

$header = [3 => 'Annee', 0 => 'Firstname', 1 => 'Count'];
$table = Parser::new()
->tableHeader($header)
->parseHtml($html);

self::assertSame($table->getHeader(), array_values($header));
self::assertSame([
'Annee' => '2004',
'Firstname' => 'Abel',
'Count' => '14',
], $table->first());
}

#[Test]
public function it_will_duplicate_colspan_data(): void
{
Expand Down Expand Up @@ -391,7 +430,6 @@ public function it_will_found_no_header_in_any_section(): void
self::assertSame([], $table->getHeader());
}


#[Test]
public function it_will_use_the_table_footer(): void
{
Expand Down

0 comments on commit 1506a96

Please sign in to comment.