-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_active_podcasts.py
82 lines (68 loc) · 2.66 KB
/
check_active_podcasts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
# Function to read websites from a file
def read_websites_from_file(file_path):
try:
with open(file_path, 'r') as file:
websites = [line.strip() for line in file.readlines() if line.strip()]
return websites
except Exception as e:
print(f"Error reading file: {e}")
return []
# Function to get the last modified date from headers or meta tags
def get_last_modified(url):
try:
response = requests.get(url)
response.raise_for_status()
# Check the 'Last-Modified' header
last_modified = response.headers.get('Last-Modified')
if last_modified:
return datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")
# If no header, check the meta tags in HTML
soup = BeautifulSoup(response.content, 'html.parser')
# Attempt to find meta tags that indicate last modified time
meta_date = soup.find('meta', {'http-equiv': 'last-modified'}) or soup.find('meta', {'name': 'last-modified'})
if meta_date and meta_date.get('content'):
return datetime.strptime(meta_date['content'], "%Y-%m-%dT%H:%M:%S")
except Exception as e:
print(f"Error checking {url}: {e}")
return None
# Function to check all websites and build a table
def check_websites(websites):
data = []
now = datetime.now()
thirty_days_ago = now - timedelta(days=30)
for site in websites:
last_modified = get_last_modified(site)
if last_modified:
last_modified_str = last_modified.strftime("%Y-%m-%d %H:%M:%S")
is_active = "Yes" if last_modified >= thirty_days_ago else "No"
else:
last_modified_str = "Unknown"
is_active = "Unknown"
data.append({
'Website': site,
'Last Checked': now.strftime("%Y-%m-%d %H:%M:%S"),
'Last Updated': last_modified_str,
'Active': is_active
})
# Create a pandas DataFrame
df = pd.DataFrame(data)
return df
# Main function to read websites from file, check them, and save to CSV
def main():
file_path = 'websites.txt' # Change this path if your file is located elsewhere
output_csv = 'podcast_update.csv'
websites = read_websites_from_file(file_path)
if not websites:
print("No websites found or error reading file.")
return
df = check_websites(websites)
# Save the DataFrame to a CSV file
df.to_csv(output_csv, index=False)
print(f"Data saved to {output_csv}")
# Run the main function
if __name__ == "__main__":
main()