From 24ec7cb693b5096db3e3fc7a7ce82ac06f0c61f8 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Sun, 13 Dec 2020 22:09:35 -0500 Subject: [PATCH 1/4] add: added _IGTVMapping --- instascrape/core/_mappings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/instascrape/core/_mappings.py b/instascrape/core/_mappings.py index 36fa67d..04e7452 100644 --- a/instascrape/core/_mappings.py +++ b/instascrape/core/_mappings.py @@ -184,6 +184,10 @@ class _ReelMapping(_PostMapping): } ) + +class _IGTVMapping(_PostMapping): + mapping = _PostMapping.return_mapping().copy() + class _ProfileMapping(_GeneralMapping): """Mapping specific to Instagram profile pages""" From 592e8f543169d1d179456227688b71988f5f326c Mon Sep 17 00:00:00 2001 From: chris-greening Date: Sun, 13 Dec 2020 22:09:48 -0500 Subject: [PATCH 2/4] add: added import of IGTV --- instascrape/scrapers/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/instascrape/scrapers/__init__.py b/instascrape/scrapers/__init__.py index 6893593..4d7583a 100644 --- a/instascrape/scrapers/__init__.py +++ b/instascrape/scrapers/__init__.py @@ -10,3 +10,4 @@ from instascrape.scrapers.comment import * from instascrape.scrapers.location import * from instascrape.scrapers.reel import * +from instascrape.scrapers.igtv import * From 9e19df796e16e68093b682b4882b9ed558e9e866 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Sun, 13 Dec 2020 22:10:01 -0500 Subject: [PATCH 3/4] add: added IGTV scraper --- instascrape/scrapers/igtv.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 instascrape/scrapers/igtv.py diff --git a/instascrape/scrapers/igtv.py b/instascrape/scrapers/igtv.py new file mode 100644 index 0000000..0735091 --- /dev/null +++ b/instascrape/scrapers/igtv.py @@ -0,0 +1,10 @@ +from instascrape.scrapers.post import Post +from instascrape.core._mappings import _IGTVMapping + + +class IGTV(Post): + _Mapping = _IGTVMapping + + @staticmethod + def _url_from_suburl(suburl: str) -> str: + return f"https://www.instagram.com/tv/{suburl}/" From 71efe02178a71f1c27066cad9da0c676c2a608d3 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Sun, 13 Dec 2020 22:10:18 -0500 Subject: [PATCH 4/4] add: added IGTV test --- tests/scrapers/test_igtv.py | 78 +++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tests/scrapers/test_igtv.py diff --git a/tests/scrapers/test_igtv.py b/tests/scrapers/test_igtv.py new file mode 100644 index 0000000..a6927ab --- /dev/null +++ b/tests/scrapers/test_igtv.py @@ -0,0 +1,78 @@ +import csv +import datetime +import json +import re +import os + +import pytest +from bs4 import BeautifulSoup +import requests + +from instascrape import IGTV + + +class TestIGTV: + @pytest.fixture + def url(self): + return "https://www.instagram.com/tv/CIrIIMYl8VQ/" + + @pytest.fixture + def get_request(self, url): + return requests.get(url, headers={"User-Agent": "user-agent: Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57"}) + + @pytest.fixture + def page_instance(self, url): + random_google_igtv = IGTV(url) + random_google_igtv.scrape() + return random_google_igtv + + def test_from_html(self, get_request, page_instance): + igtv_html = get_request.text + igtv_obj = IGTV(igtv_html) + igtv_obj.scrape() + assert igtv_obj.likes == page_instance.likes + + def test_from_soup(self, get_request, page_instance): + igtv_html = get_request.text + igtv_soup = BeautifulSoup(igtv_html, features='lxml') + igtv_obj = IGTV(igtv_soup) + igtv_obj.scrape() + assert igtv_obj.likes == page_instance.likes + + def test_to_dict(self, page_instance): + assert isinstance(page_instance.to_dict(), dict) + + def test_embed(self, page_instance): + html_embed = page_instance.embed() + embed_copied_from_instagram = '
' + assert html_embed == embed_copied_from_instagram + + @pytest.mark.file_io + def test_to_json(self, page_instance, tmpdir): + file = tmpdir.join("data.json") + page_instance.to_json(fp=str(file)) + with open(str(file), "r") as injson: + json_dict = json.load(injson) + assert page_instance['shortcode'] == json_dict['shortcode'] + + @pytest.mark.file_io + def test_to_csv(self, page_instance, tmpdir): + + # write to CSV + file = tmpdir.join("data.csv") + page_instance.to_csv(fp=str(file)) + + # reread the csv + with open(str(file), mode="r") as infile: + reader = csv.reader(infile) + csv_dict = {row[0]: row[1] for row in reader} + + assert page_instance['shortcode'] == csv_dict['shortcode'] + + @pytest.mark.file_io + def test_download_photo(self, page_instance, tmpdir): + + # donwload photo + file = tmpdir.join("image.jpg") + page_instance.download(fp=str(file)) + assert os.path.exists(file)