From 55f0a557c12502318971d6a47f97f3062e20d53d Mon Sep 17 00:00:00 2001 From: Josiah Campbell <9521010+jocmp@users.noreply.github.com> Date: Mon, 22 Jan 2024 21:39:19 -0600 Subject: [PATCH] Handle body link feeds --- .../java/com/jocmp/feedfinder/FeedFinder.kt | 10 ++-- .../com/jocmp/feedfinder/sources/BodyLinks.kt | 41 +++++++++++++++ .../{MetaLinkSource.kt => MetaLinks.kt} | 6 +-- .../sources/{XMLSource.kt => XML.kt} | 2 +- .../java/com/jocmp/feedfinder/TestRequest.kt | 12 +++++ .../jocmp/feedfinder/sources/BodyLinksTest.kt | 52 +++++++++++++++++++ ...MetaLinkSourceTest.kt => MetaLinksTest.kt} | 15 ++---- .../sources/{XMLSourceTest.kt => XMLTest.kt} | 7 +-- 8 files changed, 121 insertions(+), 24 deletions(-) create mode 100644 feedfinder/src/main/java/com/jocmp/feedfinder/sources/BodyLinks.kt rename feedfinder/src/main/java/com/jocmp/feedfinder/sources/{MetaLinkSource.kt => MetaLinks.kt} (98%) rename feedfinder/src/main/java/com/jocmp/feedfinder/sources/{XMLSource.kt => XML.kt} (85%) create mode 100644 feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt create mode 100644 feedfinder/src/test/java/com/jocmp/feedfinder/sources/BodyLinksTest.kt rename feedfinder/src/test/java/com/jocmp/feedfinder/sources/{MetaLinkSourceTest.kt => MetaLinksTest.kt} (77%) rename feedfinder/src/test/java/com/jocmp/feedfinder/sources/{XMLSourceTest.kt => XMLTest.kt} (67%) diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt index cb7ffe41..9a242ae0 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt @@ -1,9 +1,10 @@ package com.jocmp.feedfinder import com.jocmp.feedfinder.parser.Feed -import com.jocmp.feedfinder.sources.MetaLinkSource +import com.jocmp.feedfinder.sources.BodyLinks +import com.jocmp.feedfinder.sources.MetaLinks import com.jocmp.feedfinder.sources.Source -import com.jocmp.feedfinder.sources.XMLSource +import com.jocmp.feedfinder.sources.XML import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.withContext import java.net.MalformedURLException @@ -41,8 +42,9 @@ class FeedFinder internal constructor( private fun sources(response: Response): List { return listOf( - XMLSource(response), - MetaLinkSource(response = response, request = request), + XML(response), + MetaLinks(response = response, request = request), + BodyLinks(response = response, request = request), ) } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/BodyLinks.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/BodyLinks.kt new file mode 100644 index 00000000..570b787f --- /dev/null +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/BodyLinks.kt @@ -0,0 +1,41 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.DefaultRequest +import com.jocmp.feedfinder.Request +import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.parser.Feed +import com.jocmp.feedfinder.parser.Parser +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import org.jsoup.nodes.Element +import java.net.URL + +internal class BodyLinks( + private val response: Response, + private val request: Request = DefaultRequest() +) : Source { + override suspend fun find(): List { + val document = response.findDocument() ?: return emptyList() + + return coroutineScope { + document.select("a") + .filter { element -> isCandidate(element) } + .map { async { request.fetch(url = URL(it.absUrl("href"))) } } + .awaitAll() + .mapNotNull { response -> + (response.parse() as? Parser.Result.ParsedFeed)?.feed + } + } + } + + private fun isCandidate(anchor: Element): Boolean { + val href = anchor.attr("href") + return href.isNotBlank() && + TYPES.any { type -> href.contains(type) } + } + + companion object { + private val TYPES = listOf("feed", "xml", "rss", "atom") + } +} diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinkSource.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinks.kt similarity index 98% rename from feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinkSource.kt rename to feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinks.kt index 2d4c4d21..a4dc2124 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinkSource.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinks.kt @@ -5,13 +5,13 @@ import com.jocmp.feedfinder.Request import com.jocmp.feedfinder.Response import com.jocmp.feedfinder.parser.Feed import com.jocmp.feedfinder.parser.Parser -import org.jsoup.nodes.Element -import java.net.URL import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.coroutineScope +import org.jsoup.nodes.Element +import java.net.URL -internal class MetaLinkSource( +internal class MetaLinks( private val response: Response, private val request: Request = DefaultRequest() ) : Source { diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt similarity index 85% rename from feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt rename to feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt index 107c16f3..46685383 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt @@ -4,7 +4,7 @@ import com.jocmp.feedfinder.Response import com.jocmp.feedfinder.parser.Feed import com.jocmp.feedfinder.parser.Parser.Result.ParsedFeed -internal class XMLSource(private val response: Response): Source { +internal class XML(private val response: Response): Source { override suspend fun find(): List { val result = response.parse() diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt new file mode 100644 index 00000000..d569a1ba --- /dev/null +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt @@ -0,0 +1,12 @@ +package com.jocmp.feedfinder + +import java.io.File +import java.net.URL + +internal class TestRequest(val sites: Map) : Request { + override suspend fun fetch(url: URL): Response { + val body = File(sites[url.toString()]!!).readText() + + return Response(url = url, body = body) + } +} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/BodyLinksTest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/BodyLinksTest.kt new file mode 100644 index 00000000..dd44a891 --- /dev/null +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/BodyLinksTest.kt @@ -0,0 +1,52 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.TestRequest +import com.jocmp.feedfinder.testResource +import kotlinx.coroutines.runBlocking +import org.junit.Test +import java.net.URL +import kotlin.test.assertEquals + +class BodyLinksTest { + val document = """ + RSS + RSS + RSS + RSS + """.trimIndent() + + @Test + fun `finds candidate links in the document body`() = runBlocking { + val response = Response( + url = URL("https://example.com"), body = document + ) + + val sites = mapOf( + "https://example.com/feed" to testResource("arstechnica_feed.xml"), + "https://example.com/xml" to testResource("arstechnica_feed.xml"), + "https://example.com/atom" to testResource("arstechnica_feed.xml"), + "https://example.com/rss" to testResource("arstechnica_feed.xml"), + ) + + val source = BodyLinks(response, TestRequest(sites)) + assertEquals(expected = 4, source.find().size) + } + + @Test + fun `should skip HTML links`() = runBlocking { + val response = Response( + url = URL("https://example.com"), body = document + ) + + val sites = mapOf( + "https://example.com/feed" to testResource("arstechnica_feed.xml"), + "https://example.com/xml" to testResource("arstechnica_feed.xml"), + "https://example.com/atom" to testResource("arstechnica_feed.xml"), + "https://example.com/rss" to testResource("arstechnica.html"), + ) + + val source = BodyLinks(response, TestRequest(sites)) + assertEquals(expected = 3, source.find().size) + } +} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkSourceTest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinksTest.kt similarity index 77% rename from feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkSourceTest.kt rename to feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinksTest.kt index 6b05f5eb..7ac6285a 100644 --- a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkSourceTest.kt +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinksTest.kt @@ -2,6 +2,7 @@ package com.jocmp.feedfinder.sources import com.jocmp.feedfinder.Request import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.TestRequest import com.jocmp.feedfinder.testFile import com.jocmp.feedfinder.testResource import kotlinx.coroutines.runBlocking @@ -11,7 +12,7 @@ import java.net.URL import kotlin.test.assertEquals import kotlin.test.assertTrue -class MetaLinkSourceTest { +class MetaLinksTest { @Test fun `it finds a single link`() = runBlocking { val feedURL = "http://feeds.arstechnica.com/arstechnica/index" @@ -24,7 +25,7 @@ class MetaLinkSourceTest { feedURL to testResource("arstechnica_feed.xml") ) - val source = MetaLinkSource(response, TestRequest(sites)) + val source = MetaLinks(response, TestRequest(sites)) val feed = source.find().first() assertTrue(feed.isValid()) @@ -44,18 +45,10 @@ class MetaLinkSourceTest { feedURL to testResource("theverge_feed.xml") ) - val source = MetaLinkSource(response, TestRequest(sites)) + val source = MetaLinks(response, TestRequest(sites)) val feed = source.find().first() assertTrue(feed.isValid()) assertEquals(expected = URL(feedURL), actual = feed.feedURL) } } - -private class TestRequest(val sites: Map) : Request { - override suspend fun fetch(url: URL): Response { - val body = File(sites[url.toString()]!!).readText() - - return Response(url = url, body = body) - } -} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLSourceTest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLTest.kt similarity index 67% rename from feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLSourceTest.kt rename to feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLTest.kt index a9900861..070693bf 100644 --- a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLSourceTest.kt +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLTest.kt @@ -5,17 +5,14 @@ import kotlinx.coroutines.runBlocking import org.junit.Test import java.io.File import java.net.URL -import kotlin.math.exp import kotlin.test.assertEquals -import kotlin.test.assertFalse -import kotlin.test.assertTrue -class XMLSourceTest { +class XMLTest { @Test fun `it parses from an XML source`() = runBlocking { val body = File("src/test/resources/arstechnica_feed.xml").readText() - val feeds = XMLSource(Response(url = URL("https://arstechnica.com"), body = body)).find() + val feeds = XML(Response(url = URL("https://arstechnica.com"), body = body)).find() assertEquals(expected = 1, actual = feeds.size) }