Skip to content

Commit

Permalink
Fix breaks within spans
Browse files Browse the repository at this point in the history
  • Loading branch information
dgunning committed Nov 27, 2024
1 parent 2c8a206 commit f80b2b7
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 64 deletions.
10 changes: 2 additions & 8 deletions data/html/Oracle.10-Q.html
Original file line number Diff line number Diff line change
Expand Up @@ -3377,11 +3377,7 @@
<td style="white-space:nowrap;vertical-align:bottom;border-bottom:2.25pt double #ffffff03;"><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><span style="font-family:'Calibri',sans-serif;min-width:fit-content;">&#160;</span></p></td>
</tr>
</table></ix:nonNumeric></div><p style="text-indent:-3.448%;padding-left:3.333%;font-size:10pt;margin-top:2pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="white-space:pre-wrap;font-size:10pt;font-family:Times New Roman;min-width:fit-content;">&#160;</span></p><ix:exclude><div class="page-node-content"><div style="z-index:-3;flex-direction:column;display:flex;padding-bottom:0.5in;min-height:1in;justify-content:flex-end;position:relative;box-sizing:border-box;"><p style="font-size:10pt;margin-top:12pt;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-size:9pt;font-family:'Calibri',sans-serif;min-width:fit-content;">17</span></p></div></div></ix:exclude><ix:exclude>
<hr style="page-break-after:always;"/>
<div style="padding-top:0.5in;z-index:-3;min-height:1in;position:relative;box-sizing:border-box;"><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><a href="#tocpage"><span style="color:#0000ff;white-space:pre-wrap;text-decoration:underline;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">Table of Contents</span></a></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><span style="white-space:pre-wrap;font-size:10pt;font-family:Times New Roman;min-width:fit-content;">&#160;</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">ORACLE CORPORATION</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">NOTES TO CONDENSED CONSOLIDATED FINANCIAL STATEMENTS&#8212;(Continued)</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">August 31, 2024</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">(Unaudited)</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><span style="white-space:pre-wrap;font-size:10pt;font-family:Times New Roman;min-width:fit-content;">&#160;</span></p></div></ix:exclude><p style="text-indent:-3.448%;padding-left:3.333%;font-size:10pt;margin-top:2pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">Disaggregation of Revenues</span></p><p style="font-size:10pt;margin-top:6pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">We have considered information that is regularly reviewed by our CODMs in evaluating financial performance and disclosures presented outside of our financial statements in our earnings releases and used in investor presentations to disaggregate revenues to depict how the nature, amount, timing and uncertainty of revenues and cash flows are affected by economic factors. The principal category we use to disaggregate revenues is the nature of our products and services as presented in our condensed consolidated statements of operations.</span><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;"> </span></p><p style="font-size:10pt;margin-top:6pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">The following table is a summary of our total revenues by geographic region:</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:justify;">&#160;</p>

<div style="font-size:11pt;font-family:'Calibri',sans-serif;">
<ix:nonNumeric id="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929" contextRef="C_9acd4737-5fa8-4cd0-a7b2-f306f25f859d" name="us-gaap:DisaggregationOfRevenueTableTextBlock" escape="true" continuedAt="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929_1">
<hr style="page-break-after:always;"/><div style="padding-top:0.5in;z-index:-3;min-height:1in;position:relative;box-sizing:border-box;"><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><a href="#tocpage"><span style="color:#0000ff;white-space:pre-wrap;text-decoration:underline;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">Table of Contents</span></a></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><span style="white-space:pre-wrap;font-size:10pt;font-family:Times New Roman;min-width:fit-content;">&#160;</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">ORACLE CORPORATION</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">NOTES TO CONDENSED CONSOLIDATED FINANCIAL STATEMENTS&#8212;(Continued)</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">August 31, 2024</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:center;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">(Unaudited)</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><span style="white-space:pre-wrap;font-size:10pt;font-family:Times New Roman;min-width:fit-content;">&#160;</span></p></div></ix:exclude><p style="text-indent:-3.448%;padding-left:3.333%;font-size:10pt;margin-top:2pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-weight:bold;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">Disaggregation of Revenues</span></p><p style="font-size:10pt;margin-top:6pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">We have considered information that is regularly reviewed by our CODMs in evaluating financial performance and disclosures presented outside of our financial statements in our earnings releases and used in investor presentations to disaggregate revenues to depict how the nature, amount, timing and uncertainty of revenues and cash flows are affected by economic factors. The principal category we use to disaggregate revenues is the nature of our products and services as presented in our condensed consolidated statements of operations.</span><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;"> </span></p><p style="font-size:10pt;margin-top:6pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">The following table is a summary of our total revenues by geographic region:</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:justify;">&#160;</p><div style="font-size:11pt;font-family:'Calibri',sans-serif;"><ix:nonNumeric id="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929" contextRef="C_9acd4737-5fa8-4cd0-a7b2-f306f25f859d" name="us-gaap:DisaggregationOfRevenueTableTextBlock" escape="true" continuedAt="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929_1">
<table style="margin-left:auto;border-spacing:0;table-layout:fixed;width:100.0%;border-collapse:separate;margin-right:auto;">
<tr style="visibility:collapse;">
<td style="width:74.37%;"/>
Expand Down Expand Up @@ -3453,9 +3449,7 @@
<td style="white-space:pre-wrap;vertical-align:bottom;border-bottom:2.25pt double #000000;text-align:center;"><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:right;"><span style="font-family:'Calibri',sans-serif;color:#000000;white-space:pre-wrap;min-width:fit-content;"><ix:nonFraction id="F_a80933cc-5be6-4e76-af8e-a520bfb15821" contextRef="C_b63519ff-a3d5-4726-8e5c-69da3f7690ee" name="us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax" unitRef="U_USD" scale="6" decimals="-6" format="ixt:num-dot-decimal">12,453</ix:nonFraction></span></p></td>
<td style="white-space:nowrap;vertical-align:bottom;border-bottom:2.25pt double #ffffff03;"><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:left;"><span style="font-family:'Calibri',sans-serif;min-width:fit-content;">&#160;</span></p></td>
</tr>
</table><p style="padding-bottom:1pt;font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;border-bottom:0.5pt solid;margin-right:77.065%;text-align:left;"><span style="white-space:pre-wrap;font-size:4pt;font-family:'Calibri',sans-serif;min-width:fit-content;">&#160;</span></p><div class="item-list-element-wrapper" style="display:flex;margin-top:2pt;justify-content:flex-start;align-items:baseline;margin-bottom:0;min-width:3.333%;text-align:justify;"><span style="transform:scale(0.67);color:#000000;white-space:pre-wrap;vertical-align:super;font-size:7.5pt;font-family:Calibri;transform-origin:top left;min-width:3.333%;word-break:keep-all;display:inline-flex;justify-content:flex-start;">(1)</span><div style="width:100%;display:inline;"><ix:footnote id="FNT_101dfb5e-8641-4a4f-a4a3-578450fa445b" xml:lang="en-US"><span style="color:#000000;white-space:pre-wrap;font-size:7.5pt;font-family:'Calibri',sans-serif;min-width:fit-content;">Comprised of Europe, the Middle East and Africa</span></ix:footnote></div></div></ix:nonNumeric></div>

<p style="font-size:10pt;margin-top:12pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">The following table presents our cloud services and license support revenues by offerings:</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:justify;">&#160;</p><div style="font-size:11pt;font-family:'Calibri',sans-serif;"><ix:continuation id="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929_1" continuedAt="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929_2">
</table><p style="padding-bottom:1pt;font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;border-bottom:0.5pt solid;margin-right:77.065%;text-align:left;"><span style="white-space:pre-wrap;font-size:4pt;font-family:'Calibri',sans-serif;min-width:fit-content;">&#160;</span></p><div class="item-list-element-wrapper" style="display:flex;margin-top:2pt;justify-content:flex-start;align-items:baseline;margin-bottom:0;min-width:3.333%;text-align:justify;"><span style="transform:scale(0.67);color:#000000;white-space:pre-wrap;vertical-align:super;font-size:7.5pt;font-family:Calibri;transform-origin:top left;min-width:3.333%;word-break:keep-all;display:inline-flex;justify-content:flex-start;">(1)</span><div style="width:100%;display:inline;"><ix:footnote id="FNT_101dfb5e-8641-4a4f-a4a3-578450fa445b" xml:lang="en-US"><span style="color:#000000;white-space:pre-wrap;font-size:7.5pt;font-family:'Calibri',sans-serif;min-width:fit-content;">Comprised of Europe, the Middle East and Africa</span></ix:footnote></div></div></ix:nonNumeric></div><p style="font-size:10pt;margin-top:12pt;font-family:Times New Roman;margin-bottom:0;text-align:justify;"><span style="color:#000000;white-space:pre-wrap;font-size:10pt;font-family:'Calibri',sans-serif;min-width:fit-content;">The following table presents our cloud services and license support revenues by offerings:</span></p><p style="font-size:10pt;margin-top:0;font-family:Times New Roman;margin-bottom:0;text-align:justify;">&#160;</p><div style="font-size:11pt;font-family:'Calibri',sans-serif;"><ix:continuation id="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929_1" continuedAt="F_9079b6e9-6a00-4fcb-a8f4-42be9baf8929_2">
<table style="margin-left:auto;border-spacing:0;table-layout:fixed;width:100.0%;border-collapse:separate;margin-right:auto;">
<tr style="visibility:collapse;">
<td style="width:74.37%;"/>
Expand Down
98 changes: 43 additions & 55 deletions edgar/files/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,17 +403,11 @@ def _determine_heading_level(self, style: StyleInfo) -> int:

def _process_element(self, element: Tag) -> Optional[Union[DocumentNode, List[DocumentNode]]]:
"""Process an element into one or more document nodes"""
nodes = []
current_text = []

# Handle ix: tags by getting their content
if element.name.startswith('ix:'):
# Find the first meaningful child (like a table)
for child in element.children:
if isinstance(child, Tag):
return self._process_element(child)
# If no meaningful children, treat as regular text container
#text = element.get_text(strip=True)
text = self._get_text_with_spacing(element)
if text:
return DocumentNode(
Expand All @@ -422,65 +416,59 @@ def _process_element(self, element: Tag) -> Optional[Union[DocumentNode, List[Do
style=self.parse_style(element.get('style', ''))
)

# First, determine if this element itself should be a specific node type
# Process specific element types
if element.name == 'table':
table_node = self._process_table(element)
return table_node if table_node else None
return self._process_table(element)
elif element.name == 'div':
return self._process_div(element, self.parse_style(element.get('style', '')))
elif element.name == 'p':
return self._process_paragraph(element)

# Process other elements
nodes = []
for child in element.children:
if isinstance(child, Tag):
child_result = self._process_element(child)
if child_result:
if isinstance(child_result, list):
nodes.extend(child_result)
else:
nodes.append(child_result)

return nodes[0] if len(nodes) == 1 else nodes if nodes else None

def _process_paragraph(self, element: Tag) -> Optional[DocumentNode]:
"""Process a paragraph element into a single text node"""
text_parts = []

# Process children
for child in element.children:
if isinstance(child, NavigableString):
text = str(child).strip()
text = str(child) # Don't strip individual NavigableStrings
if text:
current_text.append(text)
text_parts.append(text)
elif isinstance(child, Tag):
if current_text:
try:
nodes.append(DocumentNode(
type='paragraph',
content=' '.join(current_text),
style=self.parse_style(element.get('style', ''))
))
except ValueError as e:
# Log the error but continue processing
print(f"Warning: Failed to create paragraph node: {e}")
current_text = []

if child.name == 'table':
table_node = self._process_table(child)
if table_node:
nodes.append(table_node)
elif child.name == 'br':
current_text.append('\n')
elif child.name.startswith('ix:'):
# Process ix: tags recursively
ix_result = self._process_element(child)
if ix_result:
if isinstance(ix_result, list):
nodes.extend(ix_result)
else:
nodes.append(ix_result)
else:
child_result = self._process_element(child)
if child_result:
if isinstance(child_result, list):
nodes.extend(child_result)
else:
nodes.append(child_result)
if child.name == 'br':
text_parts.append('\n')
elif child.name in ['span', 'strong', 'em', 'b', 'i', 'a']:
# Handle inline elements
inline_text = child.get_text() # Don't strip inline text
if inline_text:
text_parts.append(inline_text)
# We'll ignore any div elements since they shouldn't be in paragraphs

if not text_parts:
return None

if current_text:
try:
nodes.append(DocumentNode(
type='paragraph',
content=' '.join(current_text),
style=self.parse_style(element.get('style', ''))
))
except ValueError as e:
print(f"Warning: Failed to create paragraph node: {e}")
# Join all parts and then normalize whitespace at the end
combined_text = ''.join(text_parts)
# Replace multiple spaces with single space and strip at the end
normalized_text = ' '.join(combined_text.split())

return nodes[0] if len(nodes) == 1 else nodes if nodes else None
return DocumentNode(
type='paragraph',
content=normalized_text,
style=self.parse_style(element.get('style', ''))
)


def _get_text_with_spacing(self, element: Tag) -> str:
Expand Down
Loading

0 comments on commit f80b2b7

Please sign in to comment.