-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsitemap.py
75 lines (62 loc) · 2.59 KB
/
sitemap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pymongo
from pymongo import MongoClient
import xml.etree.ElementTree as ET
from datetime import datetime
db = MongoClient("mongodb://a:[email protected]:27017")["games"]
COLLECTION_NAME = "stories"
SITEMAP_DIR = "sitemaps/"
SITEMAP_INDEX_FILE = SITEMAP_DIR + "sitemap_index.xml"
ITEMS_PER_SITEMAP = 10000 # Maximum 50,000 URLs per sitemap as per Google guidelines
# Connect to MongoDB
collection = db[COLLECTION_NAME]
# Fetch URLs from MongoDB
documents = collection.find({},{"_id":1,"created_at":1}).sort({"created_at":-1})
#import pdb;pdb.set_trace()
# Function to format date
def format_date(date):
return date.strftime("%Y-%m-%d")
# Function to create a single sitemap
def create_sitemap(documents, file_path):
urlset = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
for doc in documents:
url = ET.SubElement(urlset, "url")
loc = ET.SubElement(url, "loc")
loc.text = "https://storiez.today/id?id="+str(doc["_id"])
if "created_at" in doc:
lastmod = ET.SubElement(url, "lastmod")
timestamp_seconds = doc["created_at"] / 1000.0
import datetime
lastmod1 = datetime.datetime.fromtimestamp(timestamp_seconds)
formatted_date = lastmod1.strftime('%Y-%m-%dT%H:%M:%S')
lastmod.text = formatted_date
tree = ET.ElementTree(urlset)
tree.write(file_path, encoding="utf-8", xml_declaration=True)
# Create directory for sitemaps
import os
os.makedirs(SITEMAP_DIR, exist_ok=True)
# Split documents into chunks and create individual sitemaps
sitemaps = []
chunk = []
for i, doc in enumerate(documents):
chunk.append(doc)
if len(chunk) == ITEMS_PER_SITEMAP:
file_path = f"{SITEMAP_DIR}sitemap_{len(sitemaps) + 1}.xml"
create_sitemap(chunk, file_path)
sitemaps.append(file_path)
chunk = []
# Create a sitemap for any remaining documents
if chunk:
file_path = f"{SITEMAP_DIR}sitemap_{len(sitemaps) + 1}.xml"
create_sitemap(chunk, file_path)
sitemaps.append(file_path)
# Create sitemap index file
sitemapindex = ET.Element("sitemapindex", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
for sitemap in sitemaps:
sitemap_element = ET.SubElement(sitemapindex, "sitemap")
loc = ET.SubElement(sitemap_element, "loc")
#sitemap = sitemap.replace("/var/www/html","https://storiez.today")
loc.text = "https://storiez.today"+sitemap
tree = ET.ElementTree(sitemapindex)
tree.write(SITEMAP_INDEX_FILE, encoding="utf-8", xml_declaration=True)
print(f"Sitemap index created at {SITEMAP_INDEX_FILE}")
print(f"{len(sitemaps)} sitemaps created.")