From df22622ab9000cef7c972ebec403acb98feb3b2e Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Tue, 24 Dec 2024 10:43:44 -0800 Subject: [PATCH] skip flaky tests Signed-off-by: Sarah Yurick --- tests/test_download.py | 53 ++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/tests/test_download.py b/tests/test_download.py index 677853ff..889e3ccc 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -82,6 +82,9 @@ def test_resiliparse_extract_text(self): assert result == expected + @pytest.mark.skip( + reason="Skipping until we figure out how to get this to a non flaky state" + ) def test_common_crawl_urls(self): start_snapshot = "2021-04" end_snapshot = "2021-10" @@ -103,6 +106,9 @@ def test_incorrect_snapshot_order(self): start_snapshot = "2021-10" urls = get_common_crawl_urls(start_snapshot, end_snapshot) + @pytest.mark.skip( + reason="Skipping until we figure out how to get this to a non flaky state" + ) def test_common_crawl_news_urls(self): start_snapshot = "2021-04" end_snapshot = "2021-10" @@ -111,22 +117,13 @@ def test_common_crawl_news_urls(self): assert ( urls[0] == "https://data.commoncrawl.org/crawl-data/CC-NEWS/2021/04/CC-NEWS-20210401004522-01022.warc.gz" - ) or ( - # Flaky test - urls[0] - == "https://data.commoncrawl.org/crawl-data/CC-NEWS/2021/05/CC-NEWS-20210501004458-01527.warc.gz" ) assert ( urls[-1] == "https://data.commoncrawl.org/crawl-data/CC-NEWS/2021/10/CC-NEWS-20211031225258-00089.warc.gz" - ) or ( - # Flaky test - urls[-1] - == "https://data.commoncrawl.org/crawl-data/CC-NEWS/2021/09/CC-NEWS-20210930225622-00754.warc.gz" ) - # Flaky test - assert len(urls) == 3838 or len(urls) == 3275 + assert len(urls) == 3838 def test_incorrect_snapshot_order_news(self): with pytest.raises(ValueError): @@ -134,29 +131,23 @@ def test_incorrect_snapshot_order_news(self): start_snapshot = "2021-10" urls = get_common_crawl_urls(start_snapshot, end_snapshot, news=True) + @pytest.mark.skip( + reason="Skipping until we figure out how to get this to a non flaky state" + ) def test_uneven_common_crawl_range(self): - try: - start_snapshot = "2021-03" - end_snapshot = "2021-11" - urls = get_common_crawl_urls(start_snapshot, end_snapshot) + start_snapshot = "2021-03" + end_snapshot = "2021-11" + urls = get_common_crawl_urls(start_snapshot, end_snapshot) - assert ( - urls[0] - == "https://data.commoncrawl.org/crawl-data/CC-MAIN-2021-10/segments/1614178347293.1/warc/CC-MAIN-20210224165708-20210224195708-00000.warc.gz" - ) - assert ( - urls[-1] - == "https://data.commoncrawl.org/crawl-data/CC-MAIN-2021-04/segments/1610704847953.98/warc/CC-MAIN-20210128134124-20210128164124-00799.warc.gz" - ) - assert len(urls) == 143840 - - except Exception as exception_string: - # We expect this flaky error - if "JSONDecodeError" in exception_string: - pass - # Else, something else is going on that needs to debugged - else: - assert False + assert ( + urls[0] + == "https://data.commoncrawl.org/crawl-data/CC-MAIN-2021-10/segments/1614178347293.1/warc/CC-MAIN-20210224165708-20210224195708-00000.warc.gz" + ) + assert ( + urls[-1] + == "https://data.commoncrawl.org/crawl-data/CC-MAIN-2021-04/segments/1610704847953.98/warc/CC-MAIN-20210128134124-20210128164124-00799.warc.gz" + ) + assert len(urls) == 143840 def test_no_urls(self): with pytest.raises(ValueError):