Optimizing Your Next.js Sitemap with next-sitemap: A Complete Guide

Created: 2025-06-23
Updated: 2025-06-27
14 min read

Introduction

A well-configured sitemap is crucial for SEO success, helping search engines discover and index your content efficiently. However, many Next.js websites have poorly optimized sitemaps that include build artifacts, static assets, and other files that shouldn't be indexed.

In this post, I'll walk you through optimizing your Next.js sitemap using next-sitemap, sharing the exact configuration I use for this website and the reasoning behind each decision.

Why Sitemaps Matter for SEO

Before diving into the technical details, let's understand why sitemaps are essential:

  • Content Discovery: Help search engines find all your pages, especially dynamic content
  • Crawl Efficiency: Guide crawlers to prioritize important content
  • Metadata Communication: Provide information about page importance, update frequency, and modification dates
  • Performance: Reduce server load by preventing crawlers from accessing unnecessary files

The Problem with Default Configurations

Most Next.js websites using next-sitemap start with a basic configuration like this:

javascript
1/** @type {import('next-sitemap').IConfig} */ 2module.exports = { 3 siteUrl: 'https://example.com', 4 generateRobotsTxt: true, 5};

While this works, it often results in sitemaps that include:

  • Next.js build artifacts (/_next/static/chunks/...)
  • Image files and static assets
  • API routes that shouldn't be indexed
  • Component files and internal routes

My Optimized Configuration

Here's the complete next-sitemap.config.js configuration I use for this website:

javascript
1/** @type {import('next-sitemap').IConfig} */ 2module.exports = { 3 siteUrl: 'https://www.yiminyang.dev', 4 generateIndexSitemap: false, 5 generateRobotsTxt: true, 6 exclude: [ 7 '/blocked', 8 '/blocked/*', 9 '/api/*', 10 '/_next/*', // Next.js build artifacts 11 '/static/*', // Static assets 12 '*.js', // JavaScript files 13 '*.css', // CSS files 14 '*.map', // Source maps 15 '*.json', // JSON files (manifests, etc.) 16 '*.ico', // Favicon files 17 '*.png', // Image files 18 '*.jpg', // Image files 19 '*.jpeg', // Image files 20 '*.gif', // Image files 21 '*.svg', // SVG files (unless they're pages) 22 '*.webp', // Image files 23 '/playground/games/memory-card-game/memorycardgame', // Component file, not page 24 '/playground/games/whack-a-mole/whack-a-mole', // Component file, not page 25 '/playground/text-transformations/*/[A-Z]*', // Component files (capitalized) 26 '/playground/tools/qr-code-generator/[content]/*', // Dynamic route internals 27 ], 28 robotsTxtOptions: { 29 policies: [ 30 { 31 userAgent: '*', 32 disallow: ['/blocked', '/api', '/_next', '/static'], 33 allow: ['/playground', '/blog', '/talks'], 34 }, 35 ], 36 additionalSitemaps: ['https://www.yiminyang.dev/sitemap.xml'], 37 }, 38 changefreq: 'weekly', 39 priority: 0.7, 40 sitemapSize: 5000, 41 // Custom transformation for specific pages 42 transform: async (config, path) => { 43 const pathPriorities = { 44 main: { paths: ['/', '/about', '/blog', '/talks'], priority: 1.0, changefreq: 'weekly' }, 45 content: { paths: ['/blog/', '/talks/'], priority: 0.8, changefreq: 'monthly' }, 46 playground: { paths: ['/playground/'], priority: 0.6, changefreq: 'monthly' }, 47 }; 48 49 for (const [, { paths, priority, changefreq }] of Object.entries(pathPriorities)) { 50 if ( 51 paths.some((p) => { 52 if (p === '/') { 53 // Special case: root path only matches exactly 54 return path === '/'; 55 } 56 return path === p || (p.endsWith('/') && path.startsWith(p)); 57 }) 58 ) { 59 return { 60 loc: path, 61 changefreq, 62 priority, 63 lastmod: new Date().toISOString(), 64 }; 65 } 66 } 67 68 return { 69 loc: path, 70 changefreq: config.changefreq, 71 priority: config.priority, 72 lastmod: new Date().toISOString(), 73 }; 74 }, 75 additionalPaths: async () => { 76 const fs = require('fs'); 77 const path = require('path'); 78 const result = []; 79 80 // Helper function to safely process files 81 const safeProcess = async (description, processor) => { 82 try { 83 await processor(); 84 } catch (error) { 85 console.warn(`Could not load ${description} for sitemap:`, error.message); 86 } 87 }; 88 89 // Add blog posts 90 await safeProcess('blog posts', () => { 91 const matter = require('gray-matter'); 92 const postsDir = path.join(process.cwd(), 'public', 'content', 'blog', 'posts'); 93 94 if (!fs.existsSync(postsDir)) return; 95 96 fs.readdirSync(postsDir) 97 .filter((file) => file.endsWith('.mdx')) 98 .forEach((file) => { 99 try { 100 const filePath = path.join(postsDir, file); 101 const { data } = matter(fs.readFileSync(filePath, 'utf8')); 102 103 if (data.publish !== false) { 104 const slug = file.replace(/\.mdx$/, '').toLowerCase(); 105 result.push({ 106 loc: `/blog/${slug}`, 107 changefreq: 'monthly', 108 priority: 0.8, 109 lastmod: data.modifiedDate || data.date || new Date().toISOString(), 110 }); 111 } 112 } catch (error) { 113 console.warn(`Could not process blog post ${file}:`, error.message); 114 } 115 }); 116 }); 117 118 // Add talks with improved regex handling 119 await safeProcess('talks', () => { 120 const talksPath = path.join(process.cwd(), 'lib', 'data', 'talks.ts'); 121 122 if (!fs.existsSync(talksPath)) return; 123 124 const content = fs.readFileSync(talksPath, 'utf8'); 125 const talkMatches = content.match(/{\s*id:\s*\d+,[\s\S]*?}/g) || []; 126 127 talkMatches.forEach((match) => { 128 try { 129 const extractField = (field) => { 130 // Improved regex to handle apostrophes within quoted strings 131 const doubleQuoteMatch = match.match(new RegExp(`${field}:\\s*"((?:[^"\\\\]|\\\\.)*)"`)); 132 const singleQuoteMatch = match.match(new RegExp(`${field}:\\s*'((?:[^'\\\\]|\\\\.)*)'`)); 133 const backtickMatch = match.match(new RegExp(`${field}:\\s*\`((?:[^\`\\\\]|\\\\.)*)\``)); 134 135 return doubleQuoteMatch?.[1] || singleQuoteMatch?.[1] || backtickMatch?.[1]; 136 }; 137 138 const title = extractField('title'); 139 if (!title) return; 140 141 const date = extractField('date') || new Date().toISOString(); 142 const slug = 143 extractField('slug') || 144 title 145 .toLowerCase() 146 .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens 147 .replace(/\s+/g, '-') // Replace spaces with hyphens 148 .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen 149 .trim() 150 .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens 151 152 result.push({ 153 loc: `/talks/${slug}`, 154 changefreq: 'monthly', 155 priority: 0.8, 156 lastmod: new Date(date).toISOString(), 157 }); 158 } catch (error) { 159 console.warn('Could not process talk:', error.message); 160 } 161 }); 162 }); 163 164 return result; 165 }, 166};

Key Configuration Decisions

1. Comprehensive Exclusions

The exclude array is crucial for keeping your sitemap clean:

javascript
1exclude: [ 2 '/_next/*', // Next.js build artifacts 3 '*.js', // JavaScript files 4 '*.css', // CSS files 5 '*.png', // Image files 6 // ... other static assets 7];

Why this matters: Without these exclusions, your sitemap might include URLs like:

  • /_next/static/chunks/240a8089e20a3158.js
  • /favicon.ico
  • /apple-icon.png

These files shouldn't be indexed by search engines as they're not content pages.

2. Data-Driven Priority System with Critical Bug Fix

I use a data-driven approach in the transform function that includes a critical fix for root path matching:

javascript
1transform: async (config, path) => { 2 const pathPriorities = { 3 main: { paths: ['/', '/about', '/blog', '/talks'], priority: 1.0, changefreq: 'weekly' }, 4 content: { paths: ['/blog/', '/talks/'], priority: 0.8, changefreq: 'monthly' }, 5 playground: { paths: ['/playground/'], priority: 0.6, changefreq: 'monthly' }, 6 }; 7 8 for (const [, { paths, priority, changefreq }] of Object.entries(pathPriorities)) { 9 if ( 10 paths.some((p) => { 11 if (p === '/') { 12 // Special case: root path only matches exactly 13 return path === '/'; 14 } 15 return path === p || (p.endsWith('/') && path.startsWith(p)); 16 }) 17 ) { 18 return { 19 loc: path, 20 changefreq, 21 priority, 22 lastmod: new Date().toISOString(), 23 }; 24 } 25 } 26 27 return { 28 loc: path, 29 changefreq: config.changefreq, 30 priority: config.priority, 31 lastmod: new Date().toISOString(), 32 }; 33};

Critical Bug Fix: The special handling for the root path (/) is essential. Without it, the root path would match ALL paths (since every path starts with /), causing incorrect priority assignments. This bug can significantly impact your SEO by giving wrong priorities to your pages.

This data-driven approach reduces code duplication and makes it easy to adjust priorities and change frequencies for different content types.

3. Robust Dynamic Content Handling with Error Recovery

The additionalPaths function uses a sophisticated approach with centralized error handling:

javascript
1additionalPaths: async () => { 2 const fs = require('fs'); 3 const path = require('path'); 4 const result = []; 5 6 // Helper function to safely process files 7 const safeProcess = async (description, processor) => { 8 try { 9 await processor(); 10 } catch (error) { 11 console.warn(`Could not load ${description} for sitemap:`, error.message); 12 } 13 }; 14 15 // Add blog posts 16 await safeProcess('blog posts', () => { 17 const matter = require('gray-matter'); 18 const postsDir = path.join(process.cwd(), 'public', 'content', 'blog', 'posts'); 19 20 if (!fs.existsSync(postsDir)) return; 21 22 fs.readdirSync(postsDir) 23 .filter((file) => file.endsWith('.mdx')) 24 .forEach((file) => { 25 try { 26 const filePath = path.join(postsDir, file); 27 const { data } = matter(fs.readFileSync(filePath, 'utf8')); 28 29 if (data.publish !== false) { 30 const slug = file.replace(/\.mdx$/, '').toLowerCase(); 31 result.push({ 32 loc: `/blog/${slug}`, 33 changefreq: 'monthly', 34 priority: 0.8, 35 lastmod: data.modifiedDate || data.date || new Date().toISOString(), 36 }); 37 } 38 } catch (error) { 39 console.warn(`Could not process blog post ${file}:`, error.message); 40 } 41 }); 42 }); 43 44 // Add talks with improved regex handling 45 await safeProcess('talks', () => { 46 const talksPath = path.join(process.cwd(), 'lib', 'data', 'talks.ts'); 47 48 if (!fs.existsSync(talksPath)) return; 49 50 const content = fs.readFileSync(talksPath, 'utf8'); 51 const talkMatches = content.match(/{\s*id:\s*\d+,[\s\S]*?}/g) || []; 52 53 talkMatches.forEach((match) => { 54 try { 55 const extractField = (field) => { 56 // Improved regex to handle apostrophes within quoted strings 57 const doubleQuoteMatch = match.match(new RegExp(`${field}:\\s*"((?:[^"\\\\]|\\\\.)*)"`)); 58 const singleQuoteMatch = match.match(new RegExp(`${field}:\\s*'((?:[^'\\\\]|\\\\.)*)'`)); 59 const backtickMatch = match.match(new RegExp(`${field}:\\s*\`((?:[^\`\\\\]|\\\\.)*)\``)); 60 61 return doubleQuoteMatch?.[1] || singleQuoteMatch?.[1] || backtickMatch?.[1]; 62 }; 63 64 const title = extractField('title'); 65 if (!title) return; 66 67 const date = extractField('date') || new Date().toISOString(); 68 const slug = 69 extractField('slug') || 70 title 71 .toLowerCase() 72 .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens 73 .replace(/\s+/g, '-') // Replace spaces with hyphens 74 .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen 75 .trim() 76 .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens 77 78 result.push({ 79 loc: `/talks/${slug}`, 80 changefreq: 'monthly', 81 priority: 0.8, 82 lastmod: new Date(date).toISOString(), 83 }); 84 } catch (error) { 85 console.warn('Could not process talk:', error.message); 86 } 87 }); 88 }); 89 90 return result; 91};

Key improvements:

  • safeProcess helper: Centralizes error handling to prevent build failures
  • Improved regex: Handles apostrophes within quoted strings correctly
  • Consistent slug generation: Uses the same logic as the Next.js app to prevent duplicate entries
  • Multiple content sources: Handles both blog posts and talks with appropriate error recovery

4. Enhanced robots.txt

The robotsTxtOptions configuration creates a comprehensive robots.txt:

txt
1User-agent: * 2Allow: /playground 3Allow: /blog 4Allow: /talks 5Disallow: /blocked 6Disallow: /api 7Disallow: /_next 8Disallow: /static 9 10Sitemap: https://www.yiminyang.dev/sitemap.xml

This explicitly tells crawlers what to index and what to avoid.

Results and Benefits

After implementing this configuration and fixing critical bugs, my website's sitemap went from including unwanted build artifacts to a clean, focused list of 59 relevant URLs:

  • 4 Main pages (priority 1.0)
  • 8 Blog posts (priority 0.8)
  • 29 Talk pages (priority 0.8)
  • 13 Playground tools (priority 0.6)
  • 5 Other pages (legal, contact, etc.)

Critical Bug Fixes Applied:

  • Fixed root path matching that was causing incorrect priority assignments
  • Resolved duplicate entries caused by inconsistent slug generation
  • Improved regex handling for apostrophes in titles

SEO Benefits

  1. Cleaner crawling: Search engines focus on actual content, not build artifacts
  2. Better prioritization: Important pages get higher priority scores (fixed the root path bug)
  3. Accurate metadata: Real publication dates instead of build timestamps
  4. No duplicate entries: Consistent slug generation prevents confusion
  5. Reduced server load: Fewer unnecessary requests from crawlers

Performance Benefits

  1. Smaller sitemap files: Only relevant URLs are included
  2. Faster generation: Efficient exclusion patterns and error handling
  3. Better caching: Static sitemap generation during build
  4. Robust error recovery: Build doesn't fail if content sources are unavailable

Common Pitfalls to Avoid

1. Including Build Artifacts

Always exclude /_next/* and static assets. These files change with every build and shouldn't be indexed.

2. Ignoring Dynamic Content

Don't forget to handle dynamic routes like [slug] pages. Use additionalPaths to include them.

3. Wrong Priorities

Avoid giving all pages the same priority. Use a tiered system that reflects your content hierarchy.

4. Missing Error Handling

Always wrap dynamic content discovery in try-catch blocks to prevent build failures.

5. CRITICAL: Root Path Matching Bug

One of the most dangerous bugs in sitemap configurations is improper root path handling. Here's the complete context:

The Problem: Without special handling, the root path / will match ALL paths because every path starts with /. Here's what happens:

javascript
1// This logic has a fatal flaw 2const pathPriorities = { 3 main: { paths: ['/', '/about', '/blog'], priority: 1.0 }, 4 content: { paths: ['/blog/', '/talks/'], priority: 0.8 }, 5}; 6 7for (const [, { paths, priority }] of Object.entries(pathPriorities)) { 8 if (paths.some((p) => path.startsWith(p))) { 9 // BUG IS HERE! 10 return { priority }; 11 } 12} 13 14// What happens: 15// path = '/blog/my-post' 16// '/blog/my-post'.startsWith('/') → true (WRONG!) 17// Root path '/' matches everything, so ALL pages get priority 1.0

The Fix: Handle the root path as a special case that only matches exactly:

javascript
1// Special case for root path 2for (const [, { paths, priority, changefreq }] of Object.entries(pathPriorities)) { 3 if ( 4 paths.some((p) => { 5 if (p === '/') { 6 // Special case: root path only matches exactly 7 return path === '/'; 8 } 9 // For other paths, use normal prefix matching 10 return path === p || (p.endsWith('/') && path.startsWith(p)); 11 }) 12 ) { 13 return { 14 loc: path, 15 changefreq, 16 priority, 17 lastmod: new Date().toISOString(), 18 }; 19 } 20} 21 22// Now it works correctly: 23// path = '/' → matches '/' exactly → priority 1.0 ✓ 24// path = '/blog/my-post' → doesn't match '/' → continues to check '/blog/' → priority 0.8 ✓

Why this matters: Without this fix, ALL your pages would get the wrong priority (usually the first one in your list), which can severely impact SEO rankings.

6. Regex Issues with Apostrophes

When parsing dynamic content, simple regex patterns can break on apostrophes:

javascript
1// Breaks on "Here's How" 2const match = content.match(/title:\s*['"`](.*?)['"`]/);

The fix: Use separate patterns for each quote type:

javascript
1const doubleQuoteMatch = match.match(/title:\s*"((?:[^"\\]|\\.)*)"/); 2const singleQuoteMatch = match.match(/title:\s*'((?:[^'\\]|\\.)*)'/); 3const backtickMatch = match.match(/title:\s*`((?:[^`\\]|\\.)*)`/);

7. Inconsistent Slug Generation

If your sitemap generates slugs differently than your Next.js app, you'll get duplicate entries:

javascript
1// Simple replacement 2const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-'); 3 4// Match your app's logic 5const slug = title 6 .toLowerCase() 7 .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens 8 .replace(/\s+/g, '-') // Replace spaces with hyphens 9 .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen 10 .trim() 11 .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens

Advanced Tips

1. Conditional Content Inclusion

javascript
1if (data.publish !== false && !data.draft) { 2 result.push({ 3 loc: `/blog/${slug}`, 4 // ... rest of config 5 }); 6}

2. Custom Change Frequencies

javascript
1// More frequent updates for time-sensitive content 2if (path.includes('/news/')) { 3 return { 4 changefreq: 'daily', 5 priority: 0.9, 6 }; 7}

Monitoring and Maintenance

1. Google Search Console

Submit your sitemap to Google Search Console and monitor:

  • Index coverage
  • Crawl errors
  • Sitemap processing status

2. Regular Audits

Periodically check your sitemap for:

  • Unwanted URLs
  • Missing important pages
  • Incorrect priorities or dates

3. Automated Testing

Consider adding tests to verify your sitemap configuration:

javascript
1// Example test 2test('sitemap excludes build artifacts', () => { 3 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8'); 4 expect(sitemap).not.toContain('/_next/'); 5 expect(sitemap).not.toContain('.js'); 6});

Debugging and Testing Your Sitemap

Based on the critical bugs I discovered in my own configuration, here's how to properly test your sitemap:

1. Test Priority Assignments

Create a simple test to verify your transform function works correctly:

javascript
1// Test your transform function 2const testPaths = ['/', '/blog', '/blog/test-post', '/playground/tool']; 3testPaths.forEach(async (path) => { 4 const result = await transform(config, path); 5 console.log(`${path}: priority ${result.priority}, changefreq ${result.changefreq}`); 6});

Expected output:

  • /: priority 1.0, changefreq weekly
  • /blog: priority 1.0, changefreq weekly
  • /blog/test-post: priority 0.8, changefreq monthly
  • /playground/tool: priority 0.6, changefreq monthly

2. Check for Duplicate Entries

javascript
1// Check for duplicates in your sitemap 2const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8'); 3const urls = sitemap.match(/<loc>(.*?)<\/loc>/g) || []; 4const uniqueUrls = new Set(urls); 5 6if (urls.length !== uniqueUrls.size) { 7 console.error('Duplicate URLs found in sitemap!'); 8 // Find duplicates 9 const duplicates = urls.filter((url, index) => urls.indexOf(url) !== index); 10 console.log('Duplicates:', duplicates); 11}

3. Validate Content Sources

Test that your dynamic content discovery works:

javascript
1// Test your additionalPaths function 2const paths = await additionalPaths(); 3console.log(`Found ${paths.length} dynamic paths`); 4paths.forEach((path) => { 5 console.log(`${path.loc}: priority ${path.priority}`); 6});

4. Manual Sitemap Inspection

Always manually review your generated public/sitemap.xml:

  1. Check URL count: Does it match your expectations?
  2. Verify priorities: Are main pages getting priority 1.0?
  3. Look for unwanted URLs: Any build artifacts or component files?
  4. Check for duplicates: Same content with different URLs?

5. Unit Testing Your Configuration

Here's a comprehensive test suite for your sitemap config:

javascript
1describe('Sitemap Configuration', () => { 2 test('excludes build artifacts', () => { 3 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8'); 4 expect(sitemap).not.toContain('/_next/'); 5 expect(sitemap).not.toContain('.js'); 6 expect(sitemap).not.toContain('.css'); 7 }); 8 9 test('includes main pages with correct priority', () => { 10 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8'); 11 expect(sitemap).toContain('<priority>1.0</priority>'); 12 }); 13 14 test('no duplicate URLs', () => { 15 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8'); 16 const urls = sitemap.match(/<loc>(.*?)<\/loc>/g) || []; 17 const uniqueUrls = new Set(urls); 18 expect(urls.length).toBe(uniqueUrls.size); 19 }); 20 21 test('root path gets correct priority', async () => { 22 const result = await transform(config, '/'); 23 expect(result.priority).toBe(1.0); 24 expect(result.changefreq).toBe('weekly'); 25 }); 26 27 test('blog posts get correct priority', async () => { 28 const result = await transform(config, '/blog/test-post'); 29 expect(result.priority).toBe(0.8); 30 expect(result.changefreq).toBe('monthly'); 31 }); 32});

Conclusion

A well-configured sitemap is a powerful SEO tool that helps search engines understand and index your content effectively. By excluding unwanted files, setting appropriate priorities, and handling dynamic content properly, you can significantly improve your website's search engine visibility.

The configuration I've shared has helped this website maintain clean, focused sitemaps that guide search engines to the most important content while avoiding unnecessary crawling of build artifacts and static assets.

Remember to regularly audit your sitemap and adjust the configuration as your website evolves. What works for one site might need tweaking for another, but the principles remain the same: keep it clean, prioritize correctly, and focus on content that matters to your users.

Resources