How to Create Manual Sitemaps For Large Custom Websites

I recently received a message on LinkedIn about creating sitemaps for large websites with thousands of pages. Free sitemap generators like xml-sitemaps.com and mysitemapgenerator.com only work for a limited number of URLs, If you are doing SEO for custom websites not using any CMS will can require your technical SEO skills, here you can’t take benefit of SEO plugins that create automatically for you, so you’ll need a different solution for bigger sites custom sites.

I know two ways to create sitemaps for large sites: manual creation using CSV and Notepad, and automated creation using Python. Today, I’ll share the Python method, which requires basic Python knowledge.

I’ll share three Python scripts:

  1. Single File Script: Creates one XML file for a list of URLs (suitable for up to 25,000 URLs).
  2. Category Script: Divides sitemaps into categories (works for even 1 million URLs).
  3. Alphabetical Script: Divides sitemaps into 26 files, one for each letter of the alphabet (also works for 1 million URLs).

You can run this code in Google Colab or In Your Local System by Pycharm!

Script 1:

import pandas as pd
from lxml import etree


df = pd.read_csv('list.csv')  # Assuming the URLs are in the first column

# XML Sitemap structure
urlset = etree.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")

for url in df.iloc[:, 0]:  # Loop through the first column containing URLs
    url_element = etree.SubElement(urlset, "url")
    loc = etree.SubElement(url_element, "loc")
    loc.text = url
    changefreq = etree.SubElement(url_element, "changefreq")
    changefreq.text = "weekly"
    priority = etree.SubElement(url_element, "priority")
    priority.text = "1.0"


sitemap = etree.tostring(urlset, pretty_print=True, xml_declaration=True, encoding='UTF-8')

# Save the sitemap
with open("sitemap.xml", "wb") as file:
    file.write(sitemap)

print("Sitemap generated and saved as sitemap.xml")

REQUIRED CSV

List.csv just place the list of urls in column A, (this list will have your list of urls that you want to get to get in the sitemap) now if you wonder where can i get my website list of urls, you need to export it from your database!

Script 2

import csv
import os
from collections import defaultdict
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom

def prettify(elem):
    """Return a pretty-printed XML string for the Element."""
    rough_string = tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent=" ")

def create_sitemap(urls):
    """Create sitemap XML for a list of URLs."""
    urlset = Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
    for url in urls:
        url_elem = SubElement(urlset, 'url')
        loc = SubElement(url_elem, 'loc')
        loc.text = url
        changefreq = SubElement(url_elem, 'changefreq')
        changefreq.text = 'daily'
    return prettify(urlset)

def read_urls_from_csv(file_path):
    """Read URLs from a CSV file."""
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        return [(row[0], row[1]) for row in reader if len(row) >= 2]  # Ensure the row has at least two columns

def main():
    csv_file_path = 'list.csv'  # Path to your CSV file
    url_data = read_urls_from_csv(csv_file_path)

    # Organize URLs by category
    url_dict = defaultdict(list)
    for category, url in url_data:
        url_dict[category].append(url)

    # Create and save sitemaps
    for category, urls in url_dict.items():
        sitemap = create_sitemap(urls)
        file_name = f'{category}.xml'
        with open(file_name, 'w', encoding='utf-8') as file:
            file.write(sitemap)
        print(f'Sitemap for {category} saved as {file_name}')

if __name__ == '__main__':
    main()

REQUIRED CSV

List.csv just place the Title/Value of categores in Column A & urls next to respective Category column B, (you need to export database, and in csv you can apply filter of two columns category and list of slugs/urls of your website)!

Script 3

import csv
import os
from collections import defaultdict
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom

def prettify(elem):
    """Return a pretty-printed XML string for the Element."""
    rough_string = tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent=" ")

def create_sitemap(urls):
    """Create sitemap XML for a list of URLs."""
    urlset = Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
    for url in urls:
        url_elem = SubElement(urlset, 'url')
        loc = SubElement(url_elem, 'loc')
        loc.text = url
        changefreq = SubElement(url_elem, 'changefreq')
        changefreq.text = 'daily'
    return prettify(urlset)

def read_urls_from_csv(file_path):
    """Read URLs from a CSV file."""
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        return [row[0] for row in reader if row]  # Ensure the row is not empty

def main():
    csv_file_path = 'list.csv'  # Path to your CSV file
    urls = read_urls_from_csv(csv_file_path)

    # Organize URLs by first letter after domain
    url_dict = defaultdict(list)
    for url in urls:
        parts = url.split('/')
        if parts[-1]:  # Ensure the last part is not empty
            letter = parts[-1][0].lower()
        else:
            letter = parts[-2][0].lower() if len(parts) > 1 else 'other'
        url_dict[letter].append(url)

    # Create and save sitemaps
    for letter, urls in url_dict.items():
        sitemap = create_sitemap(urls)
        file_name = f'stores-{letter}.xml'  # Corrected file name with underscore
        with open(file_name, 'w', encoding='utf-8') as file:
            file.write(sitemap)
        print(f'Sitemap for {letter} saved as {file_name}')

if __name__ == '__main__':
    main()

REQUIRED CSV

List.csv just place the urls Column A, script will use it’s own logic to create sitemaps by aplahabet order(just export list of urls from your database).

Share This Article
Follow:
is a creative writer & a BBA Student from Karachi Pakistan. He is Co-Admin at Mobilemall.pk. Mostly share ideas about Mobile Phones, Technology, SEO, SEM, PPC, etc.
Exit mobile version