Skip to content

Commit 0b420a1

Browse files
committed
Added summarize_existing
1 parent 9cfc681 commit 0b420a1

File tree

5 files changed

+55
-9
lines changed

5 files changed

+55
-9
lines changed

lib/readability.ex

+3-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ defmodule Readability do
109109

110110
html_tree = case url do
111111
nil -> html_tree
112-
_ -> html_tree |> Helper.to_absolute(url)
112+
_ ->
113+
html_tree |> Helper.to_absolute(url)
113114
end
114115

115116
article_tree = html_tree |> ArticleBuilder.build(opts)
@@ -119,6 +120,7 @@ defmodule Readability do
119120
article_html: readable_html(article_tree),
120121
article_text: readable_text(article_tree)
121122
}
123+
122124
end
123125

124126
@doc """

lib/readability/helper.ex

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ defmodule Readability.Helper do
140140
{"img", attrs} ->
141141
attr_map = Enum.into(attrs, %{})
142142

143-
src = (Map.get(attr_map, "rel:bf_image_src") || Map.get(attr_map, "data-src") || Map.get(attr_map, "src"))
143+
src = (Map.get(attr_map, "rel:bf_image_src") || Map.get(attr_map, "data-original") || Map.get(attr_map, "data-src") || Map.get(attr_map, "src"))
144144

145145
if src do
146146
src = URI.merge(url, src) |> to_string

mix.exs

+8
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,18 @@ defmodule Readability.Mixfile do
4040
#
4141
# Type "mix help deps" for more examples and options
4242
defp deps do
43+
<<<<<<< 9cfc6815facb50206a004eae94456543bb37f31a
4344
[{:floki, "~> 0.13.1"},
4445
{:httpoison, "~> 0.11.0"},
4546
{:ex_doc, "~> 0.14", only: :dev},
4647
{:credo, "~> 0.6.1", only: [:dev, :test]},
48+
=======
49+
[{:floki, "~> 0.11.0"},
50+
{:httpoison, "~> 0.11.0"},
51+
{:earmark, "~> 0.1", only: :dev},
52+
{:ex_doc, "~> 0.11", only: :dev},
53+
{:credo, "~> 0.3", only: [:dev, :test]},
54+
>>>>>>> Added summarize_existing
4755
{:dialyxir, "~> 0.3", only: [:dev]}
4856
]
4957
end

mix.lock

+16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
<<<<<<< 9cfc6815facb50206a004eae94456543bb37f31a
12
%{"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], []},
23
"certifi": {:hex, :certifi, "0.7.0", "861a57f3808f7eb0c2d1802afeaae0fa5de813b0df0979153cbafcd853ababaf", [:rebar3], []},
34
"credo": {:hex, :credo, "0.6.1", "a941e2591bd2bd2055dc92b810c174650b40b8290459c89a835af9d59ac4a5f8", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, optional: false]}]},
@@ -7,12 +8,27 @@
78
"floki": {:hex, :floki, "0.13.2", "14a4fce7303664b4788e7851a00679c9d8952fa9c8cdd63e11a9e2fa402819b4", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, optional: false]}]},
89
"hackney": {:hex, :hackney, "1.6.5", "8c025ee397ac94a184b0743c73b33b96465e85f90a02e210e86df6cbafaa5065", [:rebar3], [{:certifi, "0.7.0", [hex: :certifi, optional: false]}, {:idna, "1.2.0", [hex: :idna, optional: false]}, {:metrics, "1.0.1", [hex: :metrics, optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, optional: false]}, {:ssl_verify_fun, "1.1.1", [hex: :ssl_verify_fun, optional: false]}]},
910
"hound": {:hex, :hound, "1.0.3", "bf1859fcb855bf7a3b84c632ba68f04c43bfeb16efebf0080b6c1efb960c30c6", [:mix], [{:hackney, "~> 1.5", [hex: :hackney, optional: false]}, {:poison, ">= 1.4.0", [hex: :poison, optional: false]}]},
11+
=======
12+
%{"bunt": {:hex, :bunt, "0.1.6", "5d95a6882f73f3b9969fdfd1953798046664e6f77ec4e486e6fafc7caad97c6f", [:mix], []},
13+
"certifi": {:hex, :certifi, "0.7.0", "861a57f3808f7eb0c2d1802afeaae0fa5de813b0df0979153cbafcd853ababaf", [:rebar3], []},
14+
"credo": {:hex, :credo, "0.4.5", "5c5daaf50a2a96068c0f21b6fbd382d206702efa8836a946eeab0b8ac25f5f22", [:mix], [{:bunt, "~> 0.1.6", [hex: :bunt, optional: false]}]},
15+
"dialyxir": {:hex, :dialyxir, "0.3.5", "eaba092549e044c76f83165978979f60110dc58dd5b92fd952bf2312f64e9b14", [:mix], []},
16+
"earmark": {:hex, :earmark, "0.2.1", "ba6d26ceb16106d069b289df66751734802777a3cbb6787026dd800ffeb850f3", [:mix], []},
17+
"ex_doc": {:hex, :ex_doc, "0.12.0", "b774aabfede4af31c0301aece12371cbd25995a21bb3d71d66f5c2fe074c603f", [:mix], [{:earmark, "~> 0.2", [hex: :earmark, optional: false]}]},
18+
"floki": {:hex, :floki, "0.11.0", "b4532ab64d67225f13f5626e4ba1b8cf3ee9d5bd48017075bb975e1522efd32d", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, optional: false]}]},
19+
"hackney": {:hex, :hackney, "1.6.5", "8c025ee397ac94a184b0743c73b33b96465e85f90a02e210e86df6cbafaa5065", [:rebar3], [{:certifi, "0.7.0", [hex: :certifi, optional: false]}, {:idna, "1.2.0", [hex: :idna, optional: false]}, {:metrics, "1.0.1", [hex: :metrics, optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, optional: false]}, {:ssl_verify_fun, "1.1.1", [hex: :ssl_verify_fun, optional: false]}]},
20+
>>>>>>> Added summarize_existing
1021
"httpoison": {:hex, :httpoison, "0.11.0", "b9240a9c44fc46fcd8618d17898859ba09a3c1b47210b74316c0ffef10735e76", [:mix], [{:hackney, "~> 1.6.3", [hex: :hackney, optional: false]}]},
1122
"idna": {:hex, :idna, "1.2.0", "ac62ee99da068f43c50dc69acf700e03a62a348360126260e87f2b54eced86b2", [:rebar3], []},
1223
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], []},
1324
"mimerl": {:hex, :mimerl, "1.0.2", "993f9b0e084083405ed8252b99460c4f0563e41729ab42d9074fd5e52439be88", [:rebar3], []},
1425
"mochiweb": {:hex, :mochiweb, "2.15.0", "e1daac474df07651e5d17cc1e642c4069c7850dc4508d3db7263a0651330aacc", [:rebar3], []},
26+
<<<<<<< 9cfc6815facb50206a004eae94456543bb37f31a
1527
"poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], []},
1628
"poolboy": {:hex, :poolboy, "1.5.1", "6b46163901cfd0a1b43d692657ed9d7e599853b3b21b95ae5ae0a777cf9b6ca8", [:rebar], []},
1729
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.1", "28a4d65b7f59893bc2c7de786dec1e1555bd742d336043fe644ae956c3497fbe", [:make, :rebar], []},
1830
"wallaby": {:hex, :wallaby, "0.14.0", "476a97a7d592fbd0e3875337c02afeee1ae059e8247791d99b90286ad97917b0", [:mix], [{:httpoison, "~> 0.9", [hex: :httpoison, optional: false]}, {:poison, ">= 1.4.0", [hex: :poison, optional: false]}, {:poolboy, "~> 1.5", [hex: :poolboy, optional: false]}]}}
31+
=======
32+
"mochiweb_html": {:hex, :mochiweb_html, "2.15.0", "d7402e967d7f9f2912f8befa813c37be62d5eeeddbbcb6fe986c44e01460d497", [:rebar3], []},
33+
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.1", "28a4d65b7f59893bc2c7de786dec1e1555bd742d336043fe644ae956c3497fbe", [:make, :rebar], []}}
34+
>>>>>>> Added summarize_existing

test/readability_test.exs

+27-7
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ defmodule ReadabilityTest do
33

44
test "readability for NY Times" do
55
html = TestHelper.read_fixture("nytimes.html")
6-
opts = [clean_conditionally: false]
7-
nytimes = Readability.article(html, opts)
6+
nytimes = Readability.article(html)
87

98
nytimes_html = Readability.readable_html(nytimes)
109
assert nytimes_html =~ ~r/^<div><div class="story-body"><figure id="media-100000004245260" class="media photo lede layout-large-horizontal"><div class="image"><img src="https:\/\/static01.nyt/
@@ -32,8 +31,7 @@ defmodule ReadabilityTest do
3231

3332
test "readability for medium" do
3433
html = TestHelper.read_fixture("medium.html")
35-
opts = [clean_conditionally: false]
36-
medium = Readability.article(html, opts)
34+
medium = Readability.article(html)
3735

3836
medium_html = Readability.readable_html(medium)
3937

@@ -48,8 +46,7 @@ defmodule ReadabilityTest do
4846

4947
test "readability for medium 2" do
5048
html = TestHelper.read_fixture("medium2.html")
51-
opts = [clean_conditionally: false]
52-
medium = Readability.article(html, opts)
49+
medium = Readability.article(html)
5350

5451
medium_html = Readability.readable_html(medium)
5552

@@ -86,6 +83,28 @@ defmodule ReadabilityTest do
8683
assert pubmed_text =~ ~r/with different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.$/
8784
end
8885

86+
test "summarize_existing for pubmed" do
87+
html = TestHelper.read_fixture("pubmed.html")
88+
summary = Readability.summarize_existing(html)
89+
90+
assert summary.article_html =~ ~r/^<div><div class=""><h4>BACKGROUND AND OBJECTIVES: <\/h4><p><abstracttext>Although strict blood pressure/
91+
assert summary.article_html =~ ~r/different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.<\/abstracttext><\/p><\/div><\/div>$/
92+
93+
assert summary.article_text =~ ~r/^BACKGROUND AND OBJECTIVES: \nAlthough strict blood pressure/
94+
assert summary.article_text =~ ~r/with different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.$/
95+
end
96+
97+
test "summarize_existing with url" do
98+
html = TestHelper.read_fixture("pubmed.html")
99+
summary = Readability.summarize_existing(html, [url: "http://test.com"])
100+
101+
assert summary.article_html =~ ~r/^<div><div class=""><h4>BACKGROUND AND OBJECTIVES: <\/h4><p><abstracttext>Although strict blood pressure/
102+
assert summary.article_html =~ ~r/different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.<\/abstracttext><\/p><\/div><\/div>$/
103+
104+
assert summary.article_text =~ ~r/^BACKGROUND AND OBJECTIVES: \nAlthough strict blood pressure/
105+
assert summary.article_text =~ ~r/with different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.$/
106+
end
107+
89108
test "readability for tomaz" do
90109
html = TestHelper.read_fixture("tomaz.html")
91110
tomaz = Readability.article(html)
@@ -119,12 +138,12 @@ defmodule ReadabilityTest do
119138
assert html =~ "So I’m not a big fan out too much automation when it comes to social media, but there are times when it can be useful and save you a lot of time"
120139
end
121140

141+
122142
# test "readability for buzzfeed (url)" do
123143
# html = Readability.summarize("https://www.buzzfeed.com/salvadorhernandez/fbi-obtains-passcode-to-iphone-in-new-york-drops-case-agains").article_html
124144
# assert html =~ "In New York, as in San Bernardino, an imminent courtroom battle was averted"
125145
# end
126146

127-
128147
# test "readability for newyorker url" do
129148
# html = Readability.summarize("http://www.newyorker.com/magazine/2017/01/09/the-vertical-farm").article_html
130149

@@ -149,4 +168,5 @@ defmodule ReadabilityTest do
149168
# assert html =~ "deportation and murder at the hands of the German Nazis from 1943. In the far south, a terminal,"
150169
# end
151170

171+
152172
end

0 commit comments

Comments
 (0)