Skip to content

Commit

Permalink
use dotnet for parsing feeds
Browse files Browse the repository at this point in the history
  • Loading branch information
laudebugs committed Oct 14, 2024
1 parent 32dd534 commit 0e88794
Show file tree
Hide file tree
Showing 10 changed files with 661 additions and 528 deletions.
45 changes: 14 additions & 31 deletions .github/workflows/generate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,24 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [20.x]
dotnet: [8.x]
env:
NUGET_PACKAGES: ${{ github.workspace }}/.nuget/packages
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Cache node modules
uses: actions/cache@v2
env:
cache-name: node-cache
- name: Setup dotnet
uses: actions/setup-dotnet@v4
with:
# npm cache files are stored in `~/.npm` on Linux/macOS
path: ~/.npm
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-build-${{ env.cache-name }}-
${{ runner.os }}-build-
${{ runner.os }}-
dotnet-version: |
8.x
cache: true
cache-dependency-path: FeedParser/packages.lock.json
- name: Restore Nuget Packages
run: dotnet restore FeedParser/FeedParser.csproj

- uses: actions/cache@v2
env:
Expand All @@ -44,31 +43,15 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-
- name: 'Install npm dependencies 🪨'
if: steps.node-cache.outputs.cache-hit != 'true'
run: npm install

- name: 'Install python deps 🐍'
if: steps.python-cache.outputs.cache-hit != 'true'
run: pip3 install -r requirements.txt

- name: 'compile 🏗️'
run: npx tsc

- name: 'Prepare 🧑‍🍳'
run: |
mkdir dist
touch dist/rssUrls.txt
mkdir tmp
mkdir tmp/dist
touch tmp/dist/logs.md
mkdir tmp/dist/podcasts
mkdir tmp/dist/podcasts_palettes
mkdir tmp/dist/.github
mkdir tmp/dist/.github/workflows
- name: 'Build 🏗️'
run: dotnet build FeedParser/FeedParser.csproj

- name: 'Generate Files 🤖'
run: node --max-old-space-size=8192 out/index.js
run: dotnet run --project FeedParser/FeedParser.csproj

- name: 'Generate Palette 🎨'
run: |
Expand Down
50 changes: 49 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,52 @@ tmp
out.json

# Other files
*.DS_Store
*.DS_Store

*.swp
*.*~
project.lock.json
.DS_Store
*.pyc
nupkg/

# Visual Studio Code
.vscode/

# Rider
.idea/

# Visual Studio
.vs/

# Fleet
.fleet/

# Code Rush
.cr/

# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates

# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
build/
bld/
[Bb]in/
[Oo]bj/
[Oo]ut/
msbuild.log
msbuild.err
msbuild.wrn

# python virtual env
venv/
path/to/venv/
19 changes: 19 additions & 0 deletions FeedParser/FeedParser.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RestorePackagesWithLockFile>true</RestorePackagesWithLockFile>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="CodeHollow.FeedReader" Version="1.2.6" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="OPMLCore.NET" Version="1.0.0" />
<PackageReference Include="Slugify.Core" Version="4.0.1" />
<PackageReference Include="SyndicationLib" Version="0.2.0-beta" />
</ItemGroup>

</Project>
15 changes: 15 additions & 0 deletions FeedParser/Models/WorkingFolder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace PodcastDataGenerator.Models
{
public class WorkingFolder
{
public string Path {get; set;}
public List<string> SubFolderPaths { get; set;}
public List<string> FilesToGenerate { get; set;}
public WorkingFolder(string path, List<string> subFolderPaths, List<string> filesToGenerate)
{
Path = path;
SubFolderPaths = subFolderPaths;
FilesToGenerate = filesToGenerate;
}
}
}
68 changes: 68 additions & 0 deletions FeedParser/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// See https://aka.ms/new-console-template for more information
using System.Xml;
using CodeHollow.FeedReader;
using Newtonsoft.Json;
using OPMLCore.NET;
using PodcastDataGenerator.Models;
using Slugify;
using Syndication.Parser;

var workingDir = Directory.GetCurrentDirectory();//(Directory.GetCurrentDirectory());

List<WorkingFolder> folders = [
new WorkingFolder("tmp/dist", ["podcasts","podcasts_palettes",".github/workflows"], ["tmp/dist/logs.md"]),
new WorkingFolder("dist", [], ["dist/rssUrls.txt"])
];

folders.ForEach(folder =>
{
if (Directory.Exists($"{workingDir}/{folder.Path}"))
{
Directory.Delete($"{workingDir}/{folder.Path}", true);
}
Directory.CreateDirectory($"{workingDir}/{folder.Path}");
folder.SubFolderPaths.ForEach(subFolderPath => Directory.CreateDirectory($"{workingDir}/{folder.Path}/{subFolderPath}"));
folder.FilesToGenerate.ForEach(fileName => {
using(File.Create($"{workingDir}/{fileName}")){}
});
});


SlugHelper helper = new SlugHelper();

Opml opml = new Opml($"{workingDir}/data/podcasts_opml.xml");

foreach (Outline outline in opml.Body.Outlines)
{
Console.WriteLine(outline.Text);
}

async Task<Feed?> ParseFeed(string xmlUrl)
{
try
{
var parsedFeed = await FeedReader.ReadAsync(xmlUrl);
Console.WriteLine($"Parsed Feed {parsedFeed.Title}");
// Write to file
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.LoadXml(parsedFeed.OriginalDocument);
var feedAsJson = JsonConvert.SerializeXmlNode(xmlDocument, Newtonsoft.Json.Formatting.Indented);
File.WriteAllText($"{workingDir}/tmp/dist/podcasts/{helper.GenerateSlug(parsedFeed.Title)}.json", feedAsJson);

return parsedFeed;

}
catch (System.Exception err)
{

Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"Error fetching feed: {xmlUrl}: {err.Message}");
Console.ResetColor();
return null;
}
}

var feeds = opml.Body.Outlines.First().Outlines;
File.AppendAllLines($"{workingDir}/dist/rssUrls.txt",feeds.Select(feed => feed.XMLUrl));
var parsedFeeds = feeds.Select(async feedItem => await ParseFeed(feedItem.XMLUrl));
await Task.WhenAll(parsedFeeds);
75 changes: 75 additions & 0 deletions FeedParser/packages.lock.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"version": 1,
"dependencies": {
"net8.0": {
"CodeHollow.FeedReader": {
"type": "Direct",
"requested": "[1.2.6, )",
"resolved": "1.2.6",
"contentHash": "KpnP1zlX5zk58PUrREYUXC/7gk8ljjS8mkPfmFeuofaJlHJm62990cZdWLiMBuwWhV/HA6NAu9Ck5uM7GOJl0A=="
},
"Newtonsoft.Json": {
"type": "Direct",
"requested": "[13.0.3, )",
"resolved": "13.0.3",
"contentHash": "HrC5BXdl00IP9zeV+0Z848QWPAoCr9P3bDEZguI+gkLcBKAOxix/tLEAAHC+UvDNPv4a2d18lOReHMOagPa+zQ=="
},
"OPMLCore.NET": {
"type": "Direct",
"requested": "[1.0.0, )",
"resolved": "1.0.0",
"contentHash": "7glfVDnpazLC1FjtJ3hH82yRKz9mJTd+9w25V43wZdnUAw/7fIfIzTr0DNj7wZmTMLp/d5+xo4VzYPNtvvNAdA=="
},
"Slugify.Core": {
"type": "Direct",
"requested": "[4.0.1, )",
"resolved": "4.0.1",
"contentHash": "0YKMECQGdi7O4T1SL1IFaXWSPJdXNFMoGUdxMPsqI+TBuKlhiTD7mBxRH8gd9WD3MJSTEqyF0WsGl7l+NpoXXA==",
"dependencies": {
"System.Memory": "4.5.5",
"System.Text.Encoding.CodePages": "6.0.0"
}
},
"SyndicationLib": {
"type": "Direct",
"requested": "[0.2.0-beta, )",
"resolved": "0.2.0-beta",
"contentHash": "eqcfwrpj7qrSs0k+e0LQKtTz4qfTaXKuR0rwp4QHeuoyRScd76MOLyETkZwyLUYVz0PI/+FoW1nKpPOftdhL1g==",
"dependencies": {
"Brackets": "0.6.2"
}
},
"Brackets": {
"type": "Transitive",
"resolved": "0.6.2",
"contentHash": "+6WZO/OgUiVuDaS10NAPOJpd7Gg+F2eiGW+mzcG4CEHxlUbv8p1ENZDVtVeUpUZLRbFk52cKUQZuMVUYVyPvrQ==",
"dependencies": {
"System.IO.Pipelines": "8.0.0"
}
},
"System.IO.Pipelines": {
"type": "Transitive",
"resolved": "8.0.0",
"contentHash": "FHNOatmUq0sqJOkTx+UF/9YK1f180cnW5FVqnQMvYUN0elp6wFzbtPSiqbo1/ru8ICp43JM1i7kKkk6GsNGHlA=="
},
"System.Memory": {
"type": "Transitive",
"resolved": "4.5.5",
"contentHash": "XIWiDvKPXaTveaB7HVganDlOCRoj03l+jrwNvcge/t8vhGYKvqV+dMv6G4SAX2NoNmN0wZfVPTAlFwZcZvVOUw=="
},
"System.Runtime.CompilerServices.Unsafe": {
"type": "Transitive",
"resolved": "6.0.0",
"contentHash": "/iUeP3tq1S0XdNNoMz5C9twLSrM/TH+qElHkXWaPvuNOt+99G75NrV0OS2EqHx5wMN7popYjpc8oTjC1y16DLg=="
},
"System.Text.Encoding.CodePages": {
"type": "Transitive",
"resolved": "6.0.0",
"contentHash": "ZFCILZuOvtKPauZ/j/swhvw68ZRi9ATCfvGbk1QfydmcXBkIWecWKn/250UH7rahZ5OoDBaiAudJtPvLwzw85A==",
"dependencies": {
"System.Runtime.CompilerServices.Unsafe": "6.0.0"
}
}
}
}
}
Loading

0 comments on commit 0e88794

Please sign in to comment.