Skip to content

Commit

Permalink
update parser
Browse files Browse the repository at this point in the history
  • Loading branch information
nikiquickie committed Dec 10, 2024
1 parent 33f9892 commit d6650cd
Showing 1 changed file with 18 additions and 19 deletions.
37 changes: 18 additions & 19 deletions src/app/pages/user/parser/parser.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,25 @@ import { Component } from '@angular/core';
standalone: false
})
export class ParserComponent {
htmlJson: string = ''; // Input field for raw HTML
quantityJson: string = ''; // JSON field for quantities
productJson: string = ''; // JSON field for products
parsedQuantities: any[] = []; // Output for parsed quantities
parsedProducts: any[] = []; // Output for parsed products
error: string = ''; // Error message if input is invalid
htmlJson: string = '';
quantityJson: string = '';
productJson: string = '';
parsedQuantities: any[] = [];
parsedProducts: any[] = [];
error: string = '';

parseJson() {
try {
// Clear previous results
this.parsedQuantities = [];
this.parsedProducts = [];
this.error = '';

const parser = new DOMParser();
const doc = parser.parseFromString(this.htmlJson, 'text/html');

// Extract Products
const product = {
name: doc.querySelector('.h1')?.textContent?.trim() || '',
description: doc.querySelector('#tab-description')?.textContent?.trim() || '',
description: this.getPlainTextContent(doc.querySelector('#tab-description')) || '',
country: this.getSiblingText(doc, 'Країна виробника'),
volume: Number(this.getSiblingText(doc, "Об'єм")?.replace(' мл', '').trim()) || 0,
weight: Number(this.getSiblingText(doc, 'Міцність нікотину')?.replace(' мг', '').trim()) || 0,
Expand All @@ -38,34 +36,35 @@ export class ParserComponent {

this.parsedProducts.push(product);

// Populate product JSON output
this.productJson = JSON.stringify(this.parsedProducts, null, 2);

// Extract Quantities
const quantityElements = doc.querySelectorAll('.hpm-type-images .hpm-item');
quantityElements.forEach(el => {
const quantity = {
name: el.querySelector('img')?.getAttribute('alt') || '',
thumb: el.querySelector('img')?.getAttribute('src') || '',
code: 0, // Код товару
quantity: 5 // Default value as "В наявності більше 5 шт." in HTML
code: 0,
quantity: 5
};
this.parsedQuantities.push(quantity);
});

// Populate quantity JSON output
this.quantityJson = JSON.stringify(this.parsedQuantities, null, 2);
} catch (err) {
this.error = 'Failed to parse the HTML. Please check the format and try again.';
console.error(err);
}
}

/**
* Get the text content of a sibling cell for a given header text.
* @param doc - The parsed document.
* @param headerText - The header text to search for.
*/
private getPlainTextContent(element: Element | null): string {
const clone = element?.cloneNode(true) as HTMLElement | null;
if (clone) {
clone.querySelectorAll('style, script').forEach(el => el.remove());
return clone.textContent?.trim() || '';
}
return '';
}

private getSiblingText(doc: Document, headerText: string): string | null {
const header = Array.from(doc.querySelectorAll('td')).find(td => td.textContent?.trim() === headerText);
return header?.nextElementSibling?.textContent?.trim() || null;
Expand Down

0 comments on commit d6650cd

Please sign in to comment.