Optimizing Your Next.js Sitemap with next-sitemap: A Complete Guide
Introduction
A well-configured sitemap is crucial for SEO success, helping search engines discover and index your content efficiently. However, many Next.js websites have poorly optimized sitemaps that include build artifacts, static assets, and other files that shouldn't be indexed.
In this post, I'll walk you through optimizing your Next.js sitemap using next-sitemap
, sharing the exact configuration I use for this website and the reasoning behind each decision.
Why Sitemaps Matter for SEO
Before diving into the technical details, let's understand why sitemaps are essential:
- Content Discovery: Help search engines find all your pages, especially dynamic content
- Crawl Efficiency: Guide crawlers to prioritize important content
- Metadata Communication: Provide information about page importance, update frequency, and modification dates
- Performance: Reduce server load by preventing crawlers from accessing unnecessary files
The Problem with Default Configurations
Most Next.js websites using next-sitemap
start with a basic configuration like this:
1/** @type {import('next-sitemap').IConfig} */
2module.exports = {
3 siteUrl: 'https://example.com',
4 generateRobotsTxt: true,
5};
While this works, it often results in sitemaps that include:
- Next.js build artifacts (
/_next/static/chunks/...
) - Image files and static assets
- API routes that shouldn't be indexed
- Component files and internal routes
My Optimized Configuration
Here's the complete next-sitemap.config.js
configuration I use for this website:
1/** @type {import('next-sitemap').IConfig} */
2module.exports = {
3 siteUrl: 'https://www.yiminyang.dev',
4 generateIndexSitemap: false,
5 generateRobotsTxt: true,
6 exclude: [
7 '/blocked',
8 '/blocked/*',
9 '/api/*',
10 '/_next/*', // Next.js build artifacts
11 '/static/*', // Static assets
12 '*.js', // JavaScript files
13 '*.css', // CSS files
14 '*.map', // Source maps
15 '*.json', // JSON files (manifests, etc.)
16 '*.ico', // Favicon files
17 '*.png', // Image files
18 '*.jpg', // Image files
19 '*.jpeg', // Image files
20 '*.gif', // Image files
21 '*.svg', // SVG files (unless they're pages)
22 '*.webp', // Image files
23 '/playground/games/memory-card-game/memorycardgame', // Component file, not page
24 '/playground/games/whack-a-mole/whack-a-mole', // Component file, not page
25 '/playground/text-transformations/*/[A-Z]*', // Component files (capitalized)
26 '/playground/tools/qr-code-generator/[content]/*', // Dynamic route internals
27 ],
28 robotsTxtOptions: {
29 policies: [
30 {
31 userAgent: '*',
32 disallow: ['/blocked', '/api', '/_next', '/static'],
33 allow: ['/playground', '/blog', '/talks'],
34 },
35 ],
36 additionalSitemaps: ['https://www.yiminyang.dev/sitemap.xml'],
37 },
38 changefreq: 'weekly',
39 priority: 0.7,
40 sitemapSize: 5000,
41 // Custom transformation for specific pages
42 transform: async (config, path) => {
43 const pathPriorities = {
44 main: { paths: ['/', '/about', '/blog', '/talks'], priority: 1.0, changefreq: 'weekly' },
45 content: { paths: ['/blog/', '/talks/'], priority: 0.8, changefreq: 'monthly' },
46 playground: { paths: ['/playground/'], priority: 0.6, changefreq: 'monthly' },
47 };
48
49 for (const [, { paths, priority, changefreq }] of Object.entries(pathPriorities)) {
50 if (
51 paths.some((p) => {
52 if (p === '/') {
53 // Special case: root path only matches exactly
54 return path === '/';
55 }
56 return path === p || (p.endsWith('/') && path.startsWith(p));
57 })
58 ) {
59 return {
60 loc: path,
61 changefreq,
62 priority,
63 lastmod: new Date().toISOString(),
64 };
65 }
66 }
67
68 return {
69 loc: path,
70 changefreq: config.changefreq,
71 priority: config.priority,
72 lastmod: new Date().toISOString(),
73 };
74 },
75 additionalPaths: async () => {
76 const fs = require('fs');
77 const path = require('path');
78 const result = [];
79
80 // Helper function to safely process files
81 const safeProcess = async (description, processor) => {
82 try {
83 await processor();
84 } catch (error) {
85 console.warn(`Could not load ${description} for sitemap:`, error.message);
86 }
87 };
88
89 // Add blog posts
90 await safeProcess('blog posts', () => {
91 const matter = require('gray-matter');
92 const postsDir = path.join(process.cwd(), 'public', 'content', 'blog', 'posts');
93
94 if (!fs.existsSync(postsDir)) return;
95
96 fs.readdirSync(postsDir)
97 .filter((file) => file.endsWith('.mdx'))
98 .forEach((file) => {
99 try {
100 const filePath = path.join(postsDir, file);
101 const { data } = matter(fs.readFileSync(filePath, 'utf8'));
102
103 if (data.publish !== false) {
104 const slug = file.replace(/\.mdx$/, '').toLowerCase();
105 result.push({
106 loc: `/blog/${slug}`,
107 changefreq: 'monthly',
108 priority: 0.8,
109 lastmod: data.modifiedDate || data.date || new Date().toISOString(),
110 });
111 }
112 } catch (error) {
113 console.warn(`Could not process blog post ${file}:`, error.message);
114 }
115 });
116 });
117
118 // Add talks with improved regex handling
119 await safeProcess('talks', () => {
120 const talksPath = path.join(process.cwd(), 'lib', 'data', 'talks.ts');
121
122 if (!fs.existsSync(talksPath)) return;
123
124 const content = fs.readFileSync(talksPath, 'utf8');
125 const talkMatches = content.match(/{\s*id:\s*\d+,[\s\S]*?}/g) || [];
126
127 talkMatches.forEach((match) => {
128 try {
129 const extractField = (field) => {
130 // Improved regex to handle apostrophes within quoted strings
131 const doubleQuoteMatch = match.match(new RegExp(`${field}:\\s*"((?:[^"\\\\]|\\\\.)*)"`));
132 const singleQuoteMatch = match.match(new RegExp(`${field}:\\s*'((?:[^'\\\\]|\\\\.)*)'`));
133 const backtickMatch = match.match(new RegExp(`${field}:\\s*\`((?:[^\`\\\\]|\\\\.)*)\``));
134
135 return doubleQuoteMatch?.[1] || singleQuoteMatch?.[1] || backtickMatch?.[1];
136 };
137
138 const title = extractField('title');
139 if (!title) return;
140
141 const date = extractField('date') || new Date().toISOString();
142 const slug =
143 extractField('slug') ||
144 title
145 .toLowerCase()
146 .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
147 .replace(/\s+/g, '-') // Replace spaces with hyphens
148 .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen
149 .trim()
150 .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens
151
152 result.push({
153 loc: `/talks/${slug}`,
154 changefreq: 'monthly',
155 priority: 0.8,
156 lastmod: new Date(date).toISOString(),
157 });
158 } catch (error) {
159 console.warn('Could not process talk:', error.message);
160 }
161 });
162 });
163
164 return result;
165 },
166};
Key Configuration Decisions
1. Comprehensive Exclusions
The exclude
array is crucial for keeping your sitemap clean:
1exclude: [
2 '/_next/*', // Next.js build artifacts
3 '*.js', // JavaScript files
4 '*.css', // CSS files
5 '*.png', // Image files
6 // ... other static assets
7];
Why this matters: Without these exclusions, your sitemap might include URLs like:
/_next/static/chunks/240a8089e20a3158.js
/favicon.ico
/apple-icon.png
These files shouldn't be indexed by search engines as they're not content pages.
2. Data-Driven Priority System with Critical Bug Fix
I use a data-driven approach in the transform
function that includes a critical fix for root path matching:
1transform: async (config, path) => {
2 const pathPriorities = {
3 main: { paths: ['/', '/about', '/blog', '/talks'], priority: 1.0, changefreq: 'weekly' },
4 content: { paths: ['/blog/', '/talks/'], priority: 0.8, changefreq: 'monthly' },
5 playground: { paths: ['/playground/'], priority: 0.6, changefreq: 'monthly' },
6 };
7
8 for (const [, { paths, priority, changefreq }] of Object.entries(pathPriorities)) {
9 if (
10 paths.some((p) => {
11 if (p === '/') {
12 // Special case: root path only matches exactly
13 return path === '/';
14 }
15 return path === p || (p.endsWith('/') && path.startsWith(p));
16 })
17 ) {
18 return {
19 loc: path,
20 changefreq,
21 priority,
22 lastmod: new Date().toISOString(),
23 };
24 }
25 }
26
27 return {
28 loc: path,
29 changefreq: config.changefreq,
30 priority: config.priority,
31 lastmod: new Date().toISOString(),
32 };
33};
Critical Bug Fix: The special handling for the root path (/
) is essential. Without it, the root path would match ALL paths (since every path starts with /
), causing incorrect priority assignments. This bug can significantly impact your SEO by giving wrong priorities to your pages.
This data-driven approach reduces code duplication and makes it easy to adjust priorities and change frequencies for different content types.
3. Robust Dynamic Content Handling with Error Recovery
The additionalPaths
function uses a sophisticated approach with centralized error handling:
1additionalPaths: async () => {
2 const fs = require('fs');
3 const path = require('path');
4 const result = [];
5
6 // Helper function to safely process files
7 const safeProcess = async (description, processor) => {
8 try {
9 await processor();
10 } catch (error) {
11 console.warn(`Could not load ${description} for sitemap:`, error.message);
12 }
13 };
14
15 // Add blog posts
16 await safeProcess('blog posts', () => {
17 const matter = require('gray-matter');
18 const postsDir = path.join(process.cwd(), 'public', 'content', 'blog', 'posts');
19
20 if (!fs.existsSync(postsDir)) return;
21
22 fs.readdirSync(postsDir)
23 .filter((file) => file.endsWith('.mdx'))
24 .forEach((file) => {
25 try {
26 const filePath = path.join(postsDir, file);
27 const { data } = matter(fs.readFileSync(filePath, 'utf8'));
28
29 if (data.publish !== false) {
30 const slug = file.replace(/\.mdx$/, '').toLowerCase();
31 result.push({
32 loc: `/blog/${slug}`,
33 changefreq: 'monthly',
34 priority: 0.8,
35 lastmod: data.modifiedDate || data.date || new Date().toISOString(),
36 });
37 }
38 } catch (error) {
39 console.warn(`Could not process blog post ${file}:`, error.message);
40 }
41 });
42 });
43
44 // Add talks with improved regex handling
45 await safeProcess('talks', () => {
46 const talksPath = path.join(process.cwd(), 'lib', 'data', 'talks.ts');
47
48 if (!fs.existsSync(talksPath)) return;
49
50 const content = fs.readFileSync(talksPath, 'utf8');
51 const talkMatches = content.match(/{\s*id:\s*\d+,[\s\S]*?}/g) || [];
52
53 talkMatches.forEach((match) => {
54 try {
55 const extractField = (field) => {
56 // Improved regex to handle apostrophes within quoted strings
57 const doubleQuoteMatch = match.match(new RegExp(`${field}:\\s*"((?:[^"\\\\]|\\\\.)*)"`));
58 const singleQuoteMatch = match.match(new RegExp(`${field}:\\s*'((?:[^'\\\\]|\\\\.)*)'`));
59 const backtickMatch = match.match(new RegExp(`${field}:\\s*\`((?:[^\`\\\\]|\\\\.)*)\``));
60
61 return doubleQuoteMatch?.[1] || singleQuoteMatch?.[1] || backtickMatch?.[1];
62 };
63
64 const title = extractField('title');
65 if (!title) return;
66
67 const date = extractField('date') || new Date().toISOString();
68 const slug =
69 extractField('slug') ||
70 title
71 .toLowerCase()
72 .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
73 .replace(/\s+/g, '-') // Replace spaces with hyphens
74 .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen
75 .trim()
76 .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens
77
78 result.push({
79 loc: `/talks/${slug}`,
80 changefreq: 'monthly',
81 priority: 0.8,
82 lastmod: new Date(date).toISOString(),
83 });
84 } catch (error) {
85 console.warn('Could not process talk:', error.message);
86 }
87 });
88 });
89
90 return result;
91};
Key improvements:
safeProcess
helper: Centralizes error handling to prevent build failures- Improved regex: Handles apostrophes within quoted strings correctly
- Consistent slug generation: Uses the same logic as the Next.js app to prevent duplicate entries
- Multiple content sources: Handles both blog posts and talks with appropriate error recovery
4. Enhanced robots.txt
The robotsTxtOptions
configuration creates a comprehensive robots.txt:
1User-agent: *
2Allow: /playground
3Allow: /blog
4Allow: /talks
5Disallow: /blocked
6Disallow: /api
7Disallow: /_next
8Disallow: /static
9
10Sitemap: https://www.yiminyang.dev/sitemap.xml
This explicitly tells crawlers what to index and what to avoid.
Results and Benefits
After implementing this configuration and fixing critical bugs, my website's sitemap went from including unwanted build artifacts to a clean, focused list of 59 relevant URLs:
- 4 Main pages (priority 1.0)
- 8 Blog posts (priority 0.8)
- 29 Talk pages (priority 0.8)
- 13 Playground tools (priority 0.6)
- 5 Other pages (legal, contact, etc.)
Critical Bug Fixes Applied:
- Fixed root path matching that was causing incorrect priority assignments
- Resolved duplicate entries caused by inconsistent slug generation
- Improved regex handling for apostrophes in titles
SEO Benefits
- Cleaner crawling: Search engines focus on actual content, not build artifacts
- Better prioritization: Important pages get higher priority scores (fixed the root path bug)
- Accurate metadata: Real publication dates instead of build timestamps
- No duplicate entries: Consistent slug generation prevents confusion
- Reduced server load: Fewer unnecessary requests from crawlers
Performance Benefits
- Smaller sitemap files: Only relevant URLs are included
- Faster generation: Efficient exclusion patterns and error handling
- Better caching: Static sitemap generation during build
- Robust error recovery: Build doesn't fail if content sources are unavailable
Common Pitfalls to Avoid
1. Including Build Artifacts
Always exclude /_next/*
and static assets. These files change with every build and shouldn't be indexed.
2. Ignoring Dynamic Content
Don't forget to handle dynamic routes like [slug]
pages. Use additionalPaths
to include them.
3. Wrong Priorities
Avoid giving all pages the same priority. Use a tiered system that reflects your content hierarchy.
4. Missing Error Handling
Always wrap dynamic content discovery in try-catch blocks to prevent build failures.
5. CRITICAL: Root Path Matching Bug
One of the most dangerous bugs in sitemap configurations is improper root path handling. Here's the complete context:
The Problem: Without special handling, the root path /
will match ALL paths because every path starts with /
. Here's what happens:
1// This logic has a fatal flaw
2const pathPriorities = {
3 main: { paths: ['/', '/about', '/blog'], priority: 1.0 },
4 content: { paths: ['/blog/', '/talks/'], priority: 0.8 },
5};
6
7for (const [, { paths, priority }] of Object.entries(pathPriorities)) {
8 if (paths.some((p) => path.startsWith(p))) {
9 // BUG IS HERE!
10 return { priority };
11 }
12}
13
14// What happens:
15// path = '/blog/my-post'
16// '/blog/my-post'.startsWith('/') → true (WRONG!)
17// Root path '/' matches everything, so ALL pages get priority 1.0
The Fix: Handle the root path as a special case that only matches exactly:
1// Special case for root path
2for (const [, { paths, priority, changefreq }] of Object.entries(pathPriorities)) {
3 if (
4 paths.some((p) => {
5 if (p === '/') {
6 // Special case: root path only matches exactly
7 return path === '/';
8 }
9 // For other paths, use normal prefix matching
10 return path === p || (p.endsWith('/') && path.startsWith(p));
11 })
12 ) {
13 return {
14 loc: path,
15 changefreq,
16 priority,
17 lastmod: new Date().toISOString(),
18 };
19 }
20}
21
22// Now it works correctly:
23// path = '/' → matches '/' exactly → priority 1.0 ✓
24// path = '/blog/my-post' → doesn't match '/' → continues to check '/blog/' → priority 0.8 ✓
Why this matters: Without this fix, ALL your pages would get the wrong priority (usually the first one in your list), which can severely impact SEO rankings.
6. Regex Issues with Apostrophes
When parsing dynamic content, simple regex patterns can break on apostrophes:
1// Breaks on "Here's How"
2const match = content.match(/title:\s*['"`](.*?)['"`]/);
The fix: Use separate patterns for each quote type:
1const doubleQuoteMatch = match.match(/title:\s*"((?:[^"\\]|\\.)*)"/);
2const singleQuoteMatch = match.match(/title:\s*'((?:[^'\\]|\\.)*)'/);
3const backtickMatch = match.match(/title:\s*`((?:[^`\\]|\\.)*)`/);
7. Inconsistent Slug Generation
If your sitemap generates slugs differently than your Next.js app, you'll get duplicate entries:
1// Simple replacement
2const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-');
3
4// Match your app's logic
5const slug = title
6 .toLowerCase()
7 .replace(/[^\w\s-]/g, '') // Remove special characters except spaces and hyphens
8 .replace(/\s+/g, '-') // Replace spaces with hyphens
9 .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen
10 .trim()
11 .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens
Advanced Tips
1. Conditional Content Inclusion
1if (data.publish !== false && !data.draft) {
2 result.push({
3 loc: `/blog/${slug}`,
4 // ... rest of config
5 });
6}
2. Custom Change Frequencies
1// More frequent updates for time-sensitive content
2if (path.includes('/news/')) {
3 return {
4 changefreq: 'daily',
5 priority: 0.9,
6 };
7}
Monitoring and Maintenance
1. Google Search Console
Submit your sitemap to Google Search Console and monitor:
- Index coverage
- Crawl errors
- Sitemap processing status
2. Regular Audits
Periodically check your sitemap for:
- Unwanted URLs
- Missing important pages
- Incorrect priorities or dates
3. Automated Testing
Consider adding tests to verify your sitemap configuration:
1// Example test
2test('sitemap excludes build artifacts', () => {
3 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8');
4 expect(sitemap).not.toContain('/_next/');
5 expect(sitemap).not.toContain('.js');
6});
Debugging and Testing Your Sitemap
Based on the critical bugs I discovered in my own configuration, here's how to properly test your sitemap:
1. Test Priority Assignments
Create a simple test to verify your transform function works correctly:
1// Test your transform function
2const testPaths = ['/', '/blog', '/blog/test-post', '/playground/tool'];
3testPaths.forEach(async (path) => {
4 const result = await transform(config, path);
5 console.log(`${path}: priority ${result.priority}, changefreq ${result.changefreq}`);
6});
Expected output:
/
: priority 1.0, changefreq weekly/blog
: priority 1.0, changefreq weekly/blog/test-post
: priority 0.8, changefreq monthly/playground/tool
: priority 0.6, changefreq monthly
2. Check for Duplicate Entries
1// Check for duplicates in your sitemap
2const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8');
3const urls = sitemap.match(/<loc>(.*?)<\/loc>/g) || [];
4const uniqueUrls = new Set(urls);
5
6if (urls.length !== uniqueUrls.size) {
7 console.error('Duplicate URLs found in sitemap!');
8 // Find duplicates
9 const duplicates = urls.filter((url, index) => urls.indexOf(url) !== index);
10 console.log('Duplicates:', duplicates);
11}
3. Validate Content Sources
Test that your dynamic content discovery works:
1// Test your additionalPaths function
2const paths = await additionalPaths();
3console.log(`Found ${paths.length} dynamic paths`);
4paths.forEach((path) => {
5 console.log(`${path.loc}: priority ${path.priority}`);
6});
4. Manual Sitemap Inspection
Always manually review your generated public/sitemap.xml
:
- Check URL count: Does it match your expectations?
- Verify priorities: Are main pages getting priority 1.0?
- Look for unwanted URLs: Any build artifacts or component files?
- Check for duplicates: Same content with different URLs?
5. Unit Testing Your Configuration
Here's a comprehensive test suite for your sitemap config:
1describe('Sitemap Configuration', () => {
2 test('excludes build artifacts', () => {
3 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8');
4 expect(sitemap).not.toContain('/_next/');
5 expect(sitemap).not.toContain('.js');
6 expect(sitemap).not.toContain('.css');
7 });
8
9 test('includes main pages with correct priority', () => {
10 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8');
11 expect(sitemap).toContain('<priority>1.0</priority>');
12 });
13
14 test('no duplicate URLs', () => {
15 const sitemap = fs.readFileSync('public/sitemap.xml', 'utf8');
16 const urls = sitemap.match(/<loc>(.*?)<\/loc>/g) || [];
17 const uniqueUrls = new Set(urls);
18 expect(urls.length).toBe(uniqueUrls.size);
19 });
20
21 test('root path gets correct priority', async () => {
22 const result = await transform(config, '/');
23 expect(result.priority).toBe(1.0);
24 expect(result.changefreq).toBe('weekly');
25 });
26
27 test('blog posts get correct priority', async () => {
28 const result = await transform(config, '/blog/test-post');
29 expect(result.priority).toBe(0.8);
30 expect(result.changefreq).toBe('monthly');
31 });
32});
Conclusion
A well-configured sitemap is a powerful SEO tool that helps search engines understand and index your content effectively. By excluding unwanted files, setting appropriate priorities, and handling dynamic content properly, you can significantly improve your website's search engine visibility.
The configuration I've shared has helped this website maintain clean, focused sitemaps that guide search engines to the most important content while avoiding unnecessary crawling of build artifacts and static assets.
Remember to regularly audit your sitemap and adjust the configuration as your website evolves. What works for one site might need tweaking for another, but the principles remain the same: keep it clean, prioritize correctly, and focus on content that matters to your users.