Skip to content

Commit

Permalink
Add a scraper for Pillowfort.social
Browse files Browse the repository at this point in the history
Closes #5
  • Loading branch information
SeinopSys committed Jul 21, 2020
1 parent 824375e commit 1f279e1
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 1 deletion.
1 change: 1 addition & 0 deletions lib/philomena/scrapers.ex
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defmodule Philomena.Scrapers do
@scrapers [
Philomena.Scrapers.Deviantart,
Philomena.Scrapers.Pillowfort,
Philomena.Scrapers.Twitter,
Philomena.Scrapers.Tumblr,
Philomena.Scrapers.Raw
Expand Down
48 changes: 48 additions & 0 deletions lib/philomena/scrapers/pillowfort.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
defmodule Philomena.Scrapers.Pillowfort do
@url_regex ~r|\Ahttps?://www.pillowfort.social/posts/(\d+)/?|

@spec can_handle?(URI.t(), String.t()) :: true | false
def can_handle?(_uri, url) do
String.match?(url, @url_regex)
end

def scrape(_uri, url) do
api_response!(url)
|> extract_data()
end

defp extract_data(post) do
if Enum.count(post["media"]) > 0 do
images =
post["media"]
|> Enum.filter(fn(x) -> x["media_type"] == "picture" and x["url"] end)
|> Enum.map(
&%{
url: &1["url"],
camo_url: Camo.Image.image_url(&1["url"])
}
)

%{
source_url: "https://www.pillowfort.social/posts/#{post["id"]}",
author_name: post["username"],
description: HtmlSanitizeEx.strip_tags(post["content"]),
images: images
}
end
end

def api_response!(url) do
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)

api_url =
"https://www.pillowfort.social/posts/#{post_id}/json/"

url = "https://www.pillowfort.social/posts/#{post_id}"

Philomena.Http.get!(api_url, [{"Accept", "application/json"}])
|> Map.get(:body)
|> Jason.decode!()
|> Map.put("url", url)
end
end
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ defmodule Philomena.MixProject do
{:mint, "~> 1.1"},
{:libcluster, "~> 3.2"},
{:exq, "~> 0.13"},
{:dialyxir, "~> 1.0", only: :dev, runtime: false}
{:dialyxir, "~> 1.0", only: :dev, runtime: false},
{:html_sanitize_ex, "~> 1.3.0-rc3"}
]
end

Expand Down
2 changes: 2 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"gen_smtp": {:hex, :gen_smtp, "0.15.0", "9f51960c17769b26833b50df0b96123605a8024738b62db747fece14eb2fbfcc", [:rebar3], [], "hexpm", "29bd14a88030980849c7ed2447b8db6d6c9278a28b11a44cafe41b791205440f"},
"gettext": {:hex, :gettext, "0.18.0", "406d6b9e0e3278162c2ae1de0a60270452c553536772167e2d701f028116f870", [:mix], [], "hexpm", "c3f850be6367ebe1a08616c2158affe4a23231c70391050bf359d5f92f66a571"},
"hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"},
"html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm", "abfb393ad888d57700f4d0f119c2643c8a9d98856f9b8a92001be7efad1419d6"},
"httpoison": {:hex, :httpoison, "1.7.0", "abba7d086233c2d8574726227b6c2c4f6e53c4deae7fe5f6de531162ce9929a0", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "975cc87c845a103d3d1ea1ccfd68a2700c211a434d8428b10c323dc95dc5b980"},
"idna": {:hex, :idna, "6.0.1", "1d038fb2e7668ce41fbf681d2c45902e52b3cb9e9c77b55334353b222c2ee50c", [:rebar3], [{:unicode_util_compat, "0.5.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "a02c8a1c4fd601215bb0b0324c8a6986749f807ce35f25449ec9e69758708122"},
"inet_cidr": {:hex, :inet_cidr, "1.0.4", "a05744ab7c221ca8e395c926c3919a821eb512e8f36547c062f62c4ca0cf3d6e", [:mix], [], "hexpm", "64a2d30189704ae41ca7dbdd587f5291db5d1dda1414e0774c29ffc81088c1bc"},
Expand All @@ -39,6 +40,7 @@
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm", "6cbe761d6a0ca5a31a0931bf4c63204bceb64538e664a8ecf784a9a6f3b875f1"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},
"mint": {:hex, :mint, "1.1.0", "1fd0189edd9e3ffdbd7fcd8bc3835902b987a63ec6c4fd1aa8c2a56e2165f252", [:mix], [{:castore, "~> 0.1.0", [hex: :castore, repo: "hexpm", optional: true]}], "hexpm", "5bfd316c3789340b682d5679a8116bcf2112e332447bdc20c1d62909ee45f48d"},
"mochiweb": {:hex, :mochiweb, "2.20.1", "e4dbd0ed716f076366ecf62ada5755a844e1d95c781e8c77df1d4114be868cdf", [:rebar3], [], "hexpm", "d1aeee7870470d2fa9eae0b3d5ab6c33801aa2d82b10e9dade885c5c921b36aa"},
"neotoma": {:hex, :neotoma, "1.7.3", "d8bd5404b73273989946e4f4f6d529e5c2088f5fa1ca790b4dbe81f4be408e61", [:rebar], [], "hexpm", "2da322b9b1567ffa0706a7f30f6bbbde70835ae44a1050615f4b4a3d436e0f28"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.3", "def21c10a9ed70ce22754fdeea0810dafd53c2db3219a0cd54cf5526377af1c6", [:mix], [], "hexpm", "589b5af56f4afca65217a1f3eb3fee7e79b09c40c742fddc1c312b3ac0b3399f"},
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm", "17ef63abde837ad30680ea7f857dd9e7ced9476cdd7b0394432af4bfc241b960"},
Expand Down

0 comments on commit 1f279e1

Please sign in to comment.