From d77bcf7495110297e4ca47c10b9423f2438f9d95 Mon Sep 17 00:00:00 2001 From: tom Date: Mon, 13 May 2024 15:20:49 +0200 Subject: [PATCH 1/2] Scraper for Isis forums --- .../get_all_forum_id.cpython-310.pyc | Bin 0 -> 1184 bytes SeleniumCrawler/IsisForums/config.json | 4 ++ .../IsisForums/forum_id_saved.json | 1 + SeleniumCrawler/IsisForums/forum_scraper.py | 37 ++++++++++++++ .../IsisForums/get_all_forum_id.py | 47 ++++++++++++++++++ 5 files changed, 89 insertions(+) create mode 100644 SeleniumCrawler/IsisForums/__pycache__/get_all_forum_id.cpython-310.pyc create mode 100644 SeleniumCrawler/IsisForums/config.json create mode 100644 SeleniumCrawler/IsisForums/forum_id_saved.json create mode 100644 SeleniumCrawler/IsisForums/forum_scraper.py create mode 100644 SeleniumCrawler/IsisForums/get_all_forum_id.py diff --git a/SeleniumCrawler/IsisForums/__pycache__/get_all_forum_id.cpython-310.pyc b/SeleniumCrawler/IsisForums/__pycache__/get_all_forum_id.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab6c440876b397ef2635e28a51ce79e1f0e9162a GIT binary patch literal 1184 zcmZ`(&u`Q?6t)v*CX;5mm3lxRSlGRwIW$>>KnTI|W7?IF5G(Br`{*|V>lkhpT#J?!jr+M%-HBK7_75f}yy<8Z=OM z-+9|_9E}g0+Bw9Hy9wF|T>GG`@UI|sW)6Do^#uA0)R5X;N6)S`UX5$-3~D+xI&kaG zA+P`u?Y-@O8Jlb2^(z;YP^e-?|yz%jkgp-r!4K@zpMdIG=5Hym(h4A% zNU3pOFm3uXUeJVPEN6vQ(jGSx4o9P8wDE9b`1tofOdn`Ts2=QGX2YA^f zuI~oehxq`ogB=|F=X@e7p}UNKUdW%oY0xw0)vN;?jjc=G^x=l30SXF_cRH+Pv**oidj!!@mV5t;*Q1_Ip?McNC%!hiwJ_1NWL# A*8l(j literal 0 HcmV?d00001 diff --git a/SeleniumCrawler/IsisForums/config.json b/SeleniumCrawler/IsisForums/config.json new file mode 100644 index 00000000..47b375ff --- /dev/null +++ b/SeleniumCrawler/IsisForums/config.json @@ -0,0 +1,4 @@ +{ + "username": "kleintom", + "password": "DZG6XF3icji3N8g((" +} diff --git a/SeleniumCrawler/IsisForums/forum_id_saved.json b/SeleniumCrawler/IsisForums/forum_id_saved.json new file mode 100644 index 00000000..85100016 --- /dev/null +++ b/SeleniumCrawler/IsisForums/forum_id_saved.json @@ -0,0 +1 @@ +[{"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}, {"course_id": "31624", "forum_ids": ["1435490", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "f=81945", "1435490", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407", "1626839", "1438284", "1438285", "1438331", "1438340", "1438348", "1438356", "1438364", "1438375", "1438382", "1438390", "1438399", "1438415", "1438422", "1438407"]}] \ No newline at end of file diff --git a/SeleniumCrawler/IsisForums/forum_scraper.py b/SeleniumCrawler/IsisForums/forum_scraper.py new file mode 100644 index 00000000..638dfecd --- /dev/null +++ b/SeleniumCrawler/IsisForums/forum_scraper.py @@ -0,0 +1,37 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +import json +from get_all_forum_id import get_all_forum_id + +with open('../config.json') as config_file: + config_data = json.load(config_file) + +# Extract username and password from config data +USERNAME_TOKEN = config_data['username'] +PASSWORD_TOKEN = config_data['password'] + +driver = webdriver.Chrome() + +driver.get("https://isis.tu-berlin.de/login/index.php") + +title = driver.title + +driver.implicitly_wait(0.5) + +tu_login_button = driver.find_element(by=By.ID, value="shibbolethbutton") +tu_login_button.click() + +title_new = driver.title +print(title_new) +username_login = driver.find_element(by=By.ID, value="username") +password_login = driver.find_element(by=By.ID, value="password") + +username_login.send_keys(USERNAME_TOKEN) +password_login.send_keys(PASSWORD_TOKEN) + +final_login_button = driver.find_element(by=By.ID, value="login-button") +final_login_button.click() + +title_second =driver.title + +get_all_forum_id(driver) \ No newline at end of file diff --git a/SeleniumCrawler/IsisForums/get_all_forum_id.py b/SeleniumCrawler/IsisForums/get_all_forum_id.py new file mode 100644 index 00000000..32d3dc45 --- /dev/null +++ b/SeleniumCrawler/IsisForums/get_all_forum_id.py @@ -0,0 +1,47 @@ +from selenium.webdriver.common.by import By +import json + + +def get_all_forum_id(driver): + base_url = "https://isis.tu-berlin.de/course/view.php?id=" + + # Open the course ID JSON file + with open("../../course_id_saved.json", 'r') as f: + course_id_data = json.load(f) + + # Create a dictionary for all forum IDs + forum_id_dict = [] + + # Iterate over each entry in the course ID database + for course_id in course_id_data: + course_url = base_url + course_id + driver.get(course_url) + + # CSS Selector for all forum links + links = driver.find_elements(By.CSS_SELECTOR, "a[href^='https://isis.tu-berlin.de/mod/forum/view']") + + # Create a list for all forum IDs + forum_ids = [] + + # Iterate over each link + for link in links: + link_url = link.get_attribute("href") + print(link_url) + + # Extract the forum ID from the link + forum_id = link_url[-7:] + + # Add the ID to the forum list + forum_ids.append(forum_id) + + # Add the list to the course list + forum_id_dict.append({ + "course_id": course_id, + "forum_ids": forum_ids + }) + + forum_file = "forum_id_saved.json" + + # Save the forum ID JSON file + with open(forum_file, 'w') as f: + json.dump(forum_id_dict, f) From b51ee254e2e0ac3aaca139dfd3285f68a7557807 Mon Sep 17 00:00:00 2001 From: tom Date: Mon, 13 May 2024 15:24:28 +0200 Subject: [PATCH 2/2] Scraper for Isis forums --- SeleniumCrawler/IsisForums/config.json | 4 ---- SeleniumCrawler/IsisForums/forum_scraper.py | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 SeleniumCrawler/IsisForums/config.json diff --git a/SeleniumCrawler/IsisForums/config.json b/SeleniumCrawler/IsisForums/config.json deleted file mode 100644 index 47b375ff..00000000 --- a/SeleniumCrawler/IsisForums/config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "username": "kleintom", - "password": "DZG6XF3icji3N8g((" -} diff --git a/SeleniumCrawler/IsisForums/forum_scraper.py b/SeleniumCrawler/IsisForums/forum_scraper.py index 638dfecd..fa489739 100644 --- a/SeleniumCrawler/IsisForums/forum_scraper.py +++ b/SeleniumCrawler/IsisForums/forum_scraper.py @@ -3,6 +3,10 @@ import json from get_all_forum_id import get_all_forum_id + +# Note that this file is preliminary. +# Eventually, we will run all scraper files with one scraper.py + with open('../config.json') as config_file: config_data = json.load(config_file) @@ -32,6 +36,6 @@ final_login_button = driver.find_element(by=By.ID, value="login-button") final_login_button.click() -title_second =driver.title +title_second = driver.title get_all_forum_id(driver) \ No newline at end of file