We have two site map files sitemapA.xml and sitemapB.xml and we want to compare the two to see whether sitemapA.xml contains all the URLs declared in sitemapB.xml. If any of the URLs are not in sitemapA.xml then we log the URL.
Here's a minimal sitemap example:
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://www.e.com/</loc>
<lastmod>2006-05-01</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
</urlset>
Approach
We will be using the package xml2js to load xml.
npm i -D xml2js
This is the node script that will be used to compare the two site map files. It reads in each file, parses the XML into Javascript objects, indexes sitemapA.xml content using a Javascript map, and examines whether all the URLs in sitemapB.xml are contained in the m index:
const { promises: fs } = require("fs");
var parser = new xml2js.Parser(/* options */);
async function compare() {
const xmlA = await fs.readFile( './sitemapA.xml','utf8')
const xmlB = await fs.readFile( './sitemapB.xml','utf8')
const jsA = await parser.parseStringPromise(xmlA)
const jsB = await parser.parseStringPromise(xmlB)
const m = new Map();
jsA.urlset.url.forEach(v=>{
m.set(v.loc[0], v.loc[0])
})
jsB.urlset.url.forEach(v=>{
if (!m.get(v.loc[0])) {
console.log(v.loc[0])
}
})
}
compare();