Skip to content

Commit

Permalink
Optimized single col tables
Browse files Browse the repository at this point in the history
  • Loading branch information
spacecowboy committed May 28, 2024
1 parent b276b08 commit 29b259f
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,17 @@ class HtmlLinearizer {
}
}

"td", "th" -> {
// If we end up here, that means the table has been optimized out. Treat as a div.
asElement(blockStyle) {
linearizeChildren(
element.childNodes(),
blockStyle = blockStyle,
baseUrl = baseUrl,
)
}
}

"table" -> {
finalizeAndAddCurrentElement(blockStyle)

Expand Down Expand Up @@ -493,8 +504,16 @@ class HtmlLinearizer {
}
}

// If there is only a single row, then don't bother with a table
if (rowSequence.count() == 1) {
val colCount =
rowSequence
.map { row ->
row.children().count { it.tagName() == "td" || it.tagName() == "th" }
}
.maxOrNull()
?: 0

// If there is only a single row, or a single column, then don't bother with a table
if (colCount == 1 || rowSequence.count() == 1) {
linearizeChildren(
element.childNodes(),
blockStyle = blockStyle,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1160,10 +1160,36 @@ class HtmlLinearizerTest {

val result = linearizer.linearize(html, baseUrl).elements

assertEquals(2, result.count { it is LinearTable }, "Expected two tables in result")
assertEquals(1, result.count { it is LinearTable }, "Expected one table in result")
assertEquals(8, result.filterIsInstance<LinearTable>().first().rowCount, "Expected table with 8 rows")
}

@Test
fun `table with single column is optimized out`() {
val html =
"""
<table>
<tbody>
<tr>
<td>Single column table</td>
</tr>
<tr>
<td>Second row</td>
</tr>
</tbody>
</table>
""".trimIndent()

val baseUrl = "https://example.com"

val result = linearizer.linearize(html, baseUrl).elements

assertEquals(2, result.size, "Expected two text items: $result")
assertTrue("Expected all to be linear text items: $result") {
result.all { it is LinearText }
}
}

@Test
fun `insane nested table`() {
// from kill-the-newsletter
Expand Down

0 comments on commit 29b259f

Please sign in to comment.