We have two site map files sitemapA.xml
and sitemapB.xml
and we want to compare the two to see whether sitemapA.xml
contains all the URL
s declared in sitemapB.xml
. If any of the URL
s are not in sitemapA.xml
then we log the URL
.
Here's a minimal sitemap example:
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://www.e.com/</loc>
<lastmod>2006-05-01</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
</urlset>
Approach
We will be using the package xml2js
to load xml.
npm i -D xml2js
This is the node script that will be used to compare the two site map files. It reads in each file, parses the XML into Javascript objects, indexes sitemapA.xml
content using a Javascript map, and examines whether all the URL
s in sitemapB.xml
are contained in the m
index:
const { promises: fs } = require("fs");
var parser = new xml2js.Parser(/* options */);
async function compare() {
const xmlA = await fs.readFile( './sitemapA.xml','utf8')
const xmlB = await fs.readFile( './sitemapB.xml','utf8')
const jsA = await parser.parseStringPromise(xmlA)
const jsB = await parser.parseStringPromise(xmlB)
const m = new Map();
jsA.urlset.url.forEach(v=>{
m.set(v.loc[0], v.loc[0])
})
jsB.urlset.url.forEach(v=>{
if (!m.get(v.loc[0])) {
console.log(v.loc[0])
}
})
}
compare();