Skip to content
This repository has been archived by the owner on Jul 3, 2020. It is now read-only.

Commit

Permalink
Implemented the TheaterScheduleHTMLParser.
Browse files Browse the repository at this point in the history
  • Loading branch information
bofirial committed Jun 11, 2017
1 parent 7d9442b commit 2e533a6
Showing 1 changed file with 67 additions and 2 deletions.
69 changes: 67 additions & 2 deletions Melody49Notifier/DataAbstraction/TheaterScheduleHTMLParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,76 @@ public TheaterSchedule ParseTheaterScheduleHTML(string html)

TheaterSchedule theaterSchedule = new TheaterSchedule();

//theaterSchedule.ScheduleDescription = htmlDocument.DocumentNode.Descendants("div").Where(x => x?.Attributes?["id"]?.Value == "playweek").First().ChildNodes.First().InnerText;
theaterSchedule.ScheduleDescription = GetScheduleDescription(htmlDocument);

theaterSchedule.ScheduleDescription = htmlDocument.DocumentNode.SelectNodes("//div[@id=\"playweek\"]/p").First().InnerText;
HtmlNodeCollection tableRows = GetTheaterShowingsTableRows(htmlDocument);

string currentScreen = string.Empty;
TheaterShowing currentTheaterShowing = null;
theaterSchedule.Showings = new List<TheaterShowing>();

foreach (HtmlNode tableRow in tableRows)
{
string column1Text = GetColumnText(tableRow, 0);
string column2Text = GetColumnText(tableRow, 2);

if (IsScreenRow(column1Text, column2Text))
{
currentScreen = column1Text.Replace(":", "").Trim();
}
else if (!IsEmptyString(column1Text) && !IsEmptyString(column2Text))
{
if (IsShowingScheduleRow(column2Text))
{
currentTheaterShowing.ShowingScheduleDescription = $"{currentTheaterShowing.ShowingScheduleDescription}{column1Text} {column2Text}\r\n";
}
else
{
currentTheaterShowing = new TheaterShowing()
{
Screen = currentScreen,
MovieDescription = column1Text,
ActorDescription = column2Text
};

theaterSchedule.Showings.Add(currentTheaterShowing);
}
}
}

return theaterSchedule;
}

private static HtmlNodeCollection GetTheaterShowingsTableRows(HtmlDocument htmlDocument)
{
return htmlDocument.DocumentNode.SelectNodes("(//body/div/table)[2]//tr");
}

private static string GetScheduleDescription(HtmlDocument htmlDocument)
{
return htmlDocument.DocumentNode.SelectNodes("//div[@id=\"playweek\"]/p").First().InnerText;
}

private static bool IsShowingScheduleRow(string column2Text)
{
return TimeSpan.TryParse(column2Text.Trim(), out TimeSpan time);
}

private static bool IsScreenRow(string column1Text, string column2Text)
{
return column1Text.ToLower().Contains("screen") && IsEmptyString(column2Text);
}

private static string GetColumnText(HtmlNode tableRow, int index)
{
return tableRow.ChildNodes.Where(x => x.Name == "td").ElementAt(index).InnerText;
}

private static bool IsEmptyString(string columnText)
{
string trimmedLoweredColumnText = columnText?.Trim()?.ToLower();

return string.IsNullOrEmpty(trimmedLoweredColumnText) || trimmedLoweredColumnText == "&nbsp;";
}
}
}

0 comments on commit 2e533a6

Please sign in to comment.