-
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
121 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
feedfinder/src/main/java/com/jocmp/feedfinder/sources/BodyLinks.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package com.jocmp.feedfinder.sources | ||
|
||
import com.jocmp.feedfinder.DefaultRequest | ||
import com.jocmp.feedfinder.Request | ||
import com.jocmp.feedfinder.Response | ||
import com.jocmp.feedfinder.parser.Feed | ||
import com.jocmp.feedfinder.parser.Parser | ||
import kotlinx.coroutines.async | ||
import kotlinx.coroutines.awaitAll | ||
import kotlinx.coroutines.coroutineScope | ||
import org.jsoup.nodes.Element | ||
import java.net.URL | ||
|
||
internal class BodyLinks( | ||
private val response: Response, | ||
private val request: Request = DefaultRequest() | ||
) : Source { | ||
override suspend fun find(): List<Feed> { | ||
val document = response.findDocument() ?: return emptyList() | ||
|
||
return coroutineScope { | ||
document.select("a") | ||
.filter { element -> isCandidate(element) } | ||
.map { async { request.fetch(url = URL(it.absUrl("href"))) } } | ||
.awaitAll() | ||
.mapNotNull { response -> | ||
(response.parse() as? Parser.Result.ParsedFeed)?.feed | ||
} | ||
} | ||
} | ||
|
||
private fun isCandidate(anchor: Element): Boolean { | ||
val href = anchor.attr("href") | ||
return href.isNotBlank() && | ||
TYPES.any { type -> href.contains(type) } | ||
} | ||
|
||
companion object { | ||
private val TYPES = listOf("feed", "xml", "rss", "atom") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
12 changes: 12 additions & 0 deletions
12
feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package com.jocmp.feedfinder | ||
|
||
import java.io.File | ||
import java.net.URL | ||
|
||
internal class TestRequest(val sites: Map<String, String>) : Request { | ||
override suspend fun fetch(url: URL): Response { | ||
val body = File(sites[url.toString()]!!).readText() | ||
|
||
return Response(url = url, body = body) | ||
} | ||
} |
52 changes: 52 additions & 0 deletions
52
feedfinder/src/test/java/com/jocmp/feedfinder/sources/BodyLinksTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package com.jocmp.feedfinder.sources | ||
|
||
import com.jocmp.feedfinder.Response | ||
import com.jocmp.feedfinder.TestRequest | ||
import com.jocmp.feedfinder.testResource | ||
import kotlinx.coroutines.runBlocking | ||
import org.junit.Test | ||
import java.net.URL | ||
import kotlin.test.assertEquals | ||
|
||
class BodyLinksTest { | ||
val document = """ | ||
<a href="/feed">RSS</a> | ||
<a href="/xml">RSS</a> | ||
<a href="/atom">RSS</a> | ||
<a href="/rss">RSS</a> | ||
""".trimIndent() | ||
|
||
@Test | ||
fun `finds candidate links in the document body`() = runBlocking { | ||
val response = Response( | ||
url = URL("https://example.com"), body = document | ||
) | ||
|
||
val sites = mapOf( | ||
"https://example.com/feed" to testResource("arstechnica_feed.xml"), | ||
"https://example.com/xml" to testResource("arstechnica_feed.xml"), | ||
"https://example.com/atom" to testResource("arstechnica_feed.xml"), | ||
"https://example.com/rss" to testResource("arstechnica_feed.xml"), | ||
) | ||
|
||
val source = BodyLinks(response, TestRequest(sites)) | ||
assertEquals(expected = 4, source.find().size) | ||
} | ||
|
||
@Test | ||
fun `should skip HTML links`() = runBlocking { | ||
val response = Response( | ||
url = URL("https://example.com"), body = document | ||
) | ||
|
||
val sites = mapOf( | ||
"https://example.com/feed" to testResource("arstechnica_feed.xml"), | ||
"https://example.com/xml" to testResource("arstechnica_feed.xml"), | ||
"https://example.com/atom" to testResource("arstechnica_feed.xml"), | ||
"https://example.com/rss" to testResource("arstechnica.html"), | ||
) | ||
|
||
val source = BodyLinks(response, TestRequest(sites)) | ||
assertEquals(expected = 3, source.find().size) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters