Skip to content

Commit

Permalink
Handle body link feeds
Browse files Browse the repository at this point in the history
  • Loading branch information
jocmp committed Jan 23, 2024
1 parent 99819a3 commit 55f0a55
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 24 deletions.
10 changes: 6 additions & 4 deletions feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package com.jocmp.feedfinder

import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.sources.MetaLinkSource
import com.jocmp.feedfinder.sources.BodyLinks
import com.jocmp.feedfinder.sources.MetaLinks
import com.jocmp.feedfinder.sources.Source
import com.jocmp.feedfinder.sources.XMLSource
import com.jocmp.feedfinder.sources.XML
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.net.MalformedURLException
Expand Down Expand Up @@ -41,8 +42,9 @@ class FeedFinder internal constructor(

private fun sources(response: Response): List<Source> {
return listOf(
XMLSource(response),
MetaLinkSource(response = response, request = request),
XML(response),
MetaLinks(response = response, request = request),
BodyLinks(response = response, request = request),
)
}

Expand Down
41 changes: 41 additions & 0 deletions feedfinder/src/main/java/com/jocmp/feedfinder/sources/BodyLinks.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.DefaultRequest
import com.jocmp.feedfinder.Request
import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import org.jsoup.nodes.Element
import java.net.URL

internal class BodyLinks(
private val response: Response,
private val request: Request = DefaultRequest()
) : Source {
override suspend fun find(): List<Feed> {
val document = response.findDocument() ?: return emptyList()

return coroutineScope {
document.select("a")
.filter { element -> isCandidate(element) }
.map { async { request.fetch(url = URL(it.absUrl("href"))) } }
.awaitAll()
.mapNotNull { response ->
(response.parse() as? Parser.Result.ParsedFeed)?.feed
}
}
}

private fun isCandidate(anchor: Element): Boolean {
val href = anchor.attr("href")
return href.isNotBlank() &&
TYPES.any { type -> href.contains(type) }
}

companion object {
private val TYPES = listOf("feed", "xml", "rss", "atom")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ import com.jocmp.feedfinder.Request
import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser
import org.jsoup.nodes.Element
import java.net.URL
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import org.jsoup.nodes.Element
import java.net.URL

internal class MetaLinkSource(
internal class MetaLinks(
private val response: Response,
private val request: Request = DefaultRequest()
) : Source {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser.Result.ParsedFeed

internal class XMLSource(private val response: Response): Source {
internal class XML(private val response: Response): Source {
override suspend fun find(): List<Feed> {
val result = response.parse()

Expand Down
12 changes: 12 additions & 0 deletions feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.jocmp.feedfinder

import java.io.File
import java.net.URL

internal class TestRequest(val sites: Map<String, String>) : Request {
override suspend fun fetch(url: URL): Response {
val body = File(sites[url.toString()]!!).readText()

return Response(url = url, body = body)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.TestRequest
import com.jocmp.feedfinder.testResource
import kotlinx.coroutines.runBlocking
import org.junit.Test
import java.net.URL
import kotlin.test.assertEquals

class BodyLinksTest {
val document = """
<a href="/feed">RSS</a>
<a href="/xml">RSS</a>
<a href="/atom">RSS</a>
<a href="/rss">RSS</a>
""".trimIndent()

@Test
fun `finds candidate links in the document body`() = runBlocking {
val response = Response(
url = URL("https://example.com"), body = document
)

val sites = mapOf(
"https://example.com/feed" to testResource("arstechnica_feed.xml"),
"https://example.com/xml" to testResource("arstechnica_feed.xml"),
"https://example.com/atom" to testResource("arstechnica_feed.xml"),
"https://example.com/rss" to testResource("arstechnica_feed.xml"),
)

val source = BodyLinks(response, TestRequest(sites))
assertEquals(expected = 4, source.find().size)
}

@Test
fun `should skip HTML links`() = runBlocking {
val response = Response(
url = URL("https://example.com"), body = document
)

val sites = mapOf(
"https://example.com/feed" to testResource("arstechnica_feed.xml"),
"https://example.com/xml" to testResource("arstechnica_feed.xml"),
"https://example.com/atom" to testResource("arstechnica_feed.xml"),
"https://example.com/rss" to testResource("arstechnica.html"),
)

val source = BodyLinks(response, TestRequest(sites))
assertEquals(expected = 3, source.find().size)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.Request
import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.TestRequest
import com.jocmp.feedfinder.testFile
import com.jocmp.feedfinder.testResource
import kotlinx.coroutines.runBlocking
Expand All @@ -11,7 +12,7 @@ import java.net.URL
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class MetaLinkSourceTest {
class MetaLinksTest {
@Test
fun `it finds a single link`() = runBlocking {
val feedURL = "http://feeds.arstechnica.com/arstechnica/index"
Expand All @@ -24,7 +25,7 @@ class MetaLinkSourceTest {
feedURL to testResource("arstechnica_feed.xml")
)

val source = MetaLinkSource(response, TestRequest(sites))
val source = MetaLinks(response, TestRequest(sites))
val feed = source.find().first()

assertTrue(feed.isValid())
Expand All @@ -44,18 +45,10 @@ class MetaLinkSourceTest {
feedURL to testResource("theverge_feed.xml")
)

val source = MetaLinkSource(response, TestRequest(sites))
val source = MetaLinks(response, TestRequest(sites))
val feed = source.find().first()

assertTrue(feed.isValid())
assertEquals(expected = URL(feedURL), actual = feed.feedURL)
}
}

private class TestRequest(val sites: Map<String, String>) : Request {
override suspend fun fetch(url: URL): Response {
val body = File(sites[url.toString()]!!).readText()

return Response(url = url, body = body)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,14 @@ import kotlinx.coroutines.runBlocking
import org.junit.Test
import java.io.File
import java.net.URL
import kotlin.math.exp
import kotlin.test.assertEquals
import kotlin.test.assertFalse
import kotlin.test.assertTrue

class XMLSourceTest {
class XMLTest {
@Test
fun `it parses from an XML source`() = runBlocking {
val body = File("src/test/resources/arstechnica_feed.xml").readText()

val feeds = XMLSource(Response(url = URL("https://arstechnica.com"), body = body)).find()
val feeds = XML(Response(url = URL("https://arstechnica.com"), body = body)).find()

assertEquals(expected = 1, actual = feeds.size)
}
Expand Down

0 comments on commit 55f0a55

Please sign in to comment.