I recently received a message on LinkedIn about creating sitemaps for large websites with thousands of pages. Free sitemap generators like xml-sitemaps.com and mysitemapgenerator.com only work for a limited number of URLs, If you are doing SEO for custom websites not using any CMS will can require your technical SEO skills, here you can’t take benefit of SEO plugins that create automatically for you, so you’ll need a different solution for bigger sites custom sites.
I know two ways to create sitemaps for large sites: manual creation using CSV and Notepad, and automated creation using Python. Today, I’ll share the Python method, which requires basic Python knowledge.
I’ll share three Python scripts:
- Single File Script: Creates one XML file for a list of URLs (suitable for up to 25,000 URLs).
- Category Script: Divides sitemaps into categories (works for even 1 million URLs).
- Alphabetical Script: Divides sitemaps into 26 files, one for each letter of the alphabet (also works for 1 million URLs).
You can run this code in Google Colab or In Your Local System by Pycharm!
Script 1:
import pandas as pd
from lxml import etree
df = pd.read_csv('list.csv') # Assuming the URLs are in the first column
# XML Sitemap structure
urlset = etree.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
for url in df.iloc[:, 0]: # Loop through the first column containing URLs
url_element = etree.SubElement(urlset, "url")
loc = etree.SubElement(url_element, "loc")
loc.text = url
changefreq = etree.SubElement(url_element, "changefreq")
changefreq.text = "weekly"
priority = etree.SubElement(url_element, "priority")
priority.text = "1.0"
sitemap = etree.tostring(urlset, pretty_print=True, xml_declaration=True, encoding='UTF-8')
# Save the sitemap
with open("sitemap.xml", "wb") as file:
file.write(sitemap)
print("Sitemap generated and saved as sitemap.xml")
REQUIRED CSV
List.csv just place the list of urls in column A, (this list will have your list of urls that you want to get to get in the sitemap) now if you wonder where can i get my website list of urls, you need to export it from your database!
Script 2
import csv
import os
from collections import defaultdict
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom
def prettify(elem):
"""Return a pretty-printed XML string for the Element."""
rough_string = tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
def create_sitemap(urls):
"""Create sitemap XML for a list of URLs."""
urlset = Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
for url in urls:
url_elem = SubElement(urlset, 'url')
loc = SubElement(url_elem, 'loc')
loc.text = url
changefreq = SubElement(url_elem, 'changefreq')
changefreq.text = 'daily'
return prettify(urlset)
def read_urls_from_csv(file_path):
"""Read URLs from a CSV file."""
with open(file_path, newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
return [(row[0], row[1]) for row in reader if len(row) >= 2] # Ensure the row has at least two columns
def main():
csv_file_path = 'list.csv' # Path to your CSV file
url_data = read_urls_from_csv(csv_file_path)
# Organize URLs by category
url_dict = defaultdict(list)
for category, url in url_data:
url_dict[category].append(url)
# Create and save sitemaps
for category, urls in url_dict.items():
sitemap = create_sitemap(urls)
file_name = f'{category}.xml'
with open(file_name, 'w', encoding='utf-8') as file:
file.write(sitemap)
print(f'Sitemap for {category} saved as {file_name}')
if __name__ == '__main__':
main()
REQUIRED CSV
List.csv just place the Title/Value of categores in Column A & urls next to respective Category column B, (you need to export database, and in csv you can apply filter of two columns category and list of slugs/urls of your website)!
Script 3
import csv
import os
from collections import defaultdict
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom
def prettify(elem):
"""Return a pretty-printed XML string for the Element."""
rough_string = tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
def create_sitemap(urls):
"""Create sitemap XML for a list of URLs."""
urlset = Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
for url in urls:
url_elem = SubElement(urlset, 'url')
loc = SubElement(url_elem, 'loc')
loc.text = url
changefreq = SubElement(url_elem, 'changefreq')
changefreq.text = 'daily'
return prettify(urlset)
def read_urls_from_csv(file_path):
"""Read URLs from a CSV file."""
with open(file_path, newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
return [row[0] for row in reader if row] # Ensure the row is not empty
def main():
csv_file_path = 'list.csv' # Path to your CSV file
urls = read_urls_from_csv(csv_file_path)
# Organize URLs by first letter after domain
url_dict = defaultdict(list)
for url in urls:
parts = url.split('/')
if parts[-1]: # Ensure the last part is not empty
letter = parts[-1][0].lower()
else:
letter = parts[-2][0].lower() if len(parts) > 1 else 'other'
url_dict[letter].append(url)
# Create and save sitemaps
for letter, urls in url_dict.items():
sitemap = create_sitemap(urls)
file_name = f'stores-{letter}.xml' # Corrected file name with underscore
with open(file_name, 'w', encoding='utf-8') as file:
file.write(sitemap)
print(f'Sitemap for {letter} saved as {file_name}')
if __name__ == '__main__':
main()
REQUIRED CSV
List.csv just place the urls Column A, script will use it’s own logic to create sitemaps by aplahabet order(just export list of urls from your database).