Error handling different RSS feeds

This commit is contained in:
Logan Cusano
2023-03-17 23:27:12 -04:00
parent c8b90ca220
commit edef8a4c7d
3 changed files with 115 additions and 40 deletions

View File

@@ -25,6 +25,52 @@ let parser = new Parser({
var feedStorage = new FeedStorage(); var feedStorage = new FeedStorage();
var postStorage = new PostStorage(); var postStorage = new PostStorage();
// Initiate a running array of objects to keep track of sources that have no feeds/posts
/*
var runningPostsToRemove = [{
"{SOURCE URL}": {NUMBER OF TIMES IT'S BEEN REMOVED}
}]
*/
var runningPostsToRemove = {};
const sourceFailureLimit = process.env.SOURCE_FAILURE_LIMIT ?? 3;
/**
*
* @param {*} sourceURL
*/
exports.removeSource = function removeSource(sourceURL) {
log.INFO("Removing source URL: ", sourceURL);
if (!sourceURL in runningPostsToRemove) {runningPostsToRemove[sourceURL] = 1; return;}
if (runningPostsToRemove[sourceURL] < sourceFailureLimit) {runningPostsToRemove[sourceURL] += 1; return;}
feedStorage.getRecordBy('link', sourceURL, (err, record) => {
if (err) log.ERROR("Error getting record from feedStorage", err);
if (!record) log.ERROR("No source returned from feedStorage");
feedStorage.destroy(record.id, (err, results) => {
if (err) log.ERROR("Error removing ID from results", err);
if (!results) log.WARN("No results from remove entry");
log.DEBUG("Source exceeded the limit of retries and has been removed", sourceURL);
return;
})
})
}
/**
* Unset a source URL from deletion if the source has not already been deleted
* @param {*} sourceURL The source URL to be unset from deletion
* @returns {*}
*/
exports.unsetRemoveSource = function unsetRemoveSource(sourceURL) {
log.INFO("Unsetting source URL from deletion (if not already deleted): ", sourceURL);
if (!sourceURL in runningPostsToRemove) return;
if (runningPostsToRemove[sourceURL] > sourceFailureLimit) return delete runningPostsToRemove[sourceURL];
}
/** /**
* Adds or updates new source url to configured storage * Adds or updates new source url to configured storage
* @constructor * @constructor
@@ -105,29 +151,31 @@ exports.updateFeeds = (client) => {
tempFeedStorage.getAllRecords(async (err, records) => { tempFeedStorage.getAllRecords(async (err, records) => {
// Load the posts from each RSS source // Load the posts from each RSS source
for (const source of records) { for (const source of records) {
sourcePromiseArray.push(new Promise((resolve, reject) => { sourcePromiseArray.push(new Promise((resolve, reject) => {
log.DEBUG('Record title: ', source.title); log.DEBUG('Record title: ', source.title);
log.DEBUG('Record link: ', source.link); log.DEBUG('Record link: ', source.link);
log.DEBUG('Record category: ', source.category); log.DEBUG('Record category: ', source.category);
log.DEBUG('Record guild ID: ', source.guild_id); log.DEBUG('Record guild ID: ', source.guild_id);
log.DEBUG('Record channel ID: ', source.channel_id); log.DEBUG('Record channel ID: ', source.channel_id);
// Parse the RSS feed // Parse the RSS feed
parser.parseURL(source.link, async (err, parsedFeed) => { parser.parseURL(source.link, async (err, parsedFeed) => {
if (err) { if (err) {
log.ERROR("Parser Error: ", source, err); log.ERROR("Parser Error: ", runningPostsToRemove, source, err);
// Call the wrapper to make sure the site isn't just down at the time it checks and is back up the next time
this.removeSource(source.link);
reject; reject;
} }
try { try {
log.DEBUG("Parsed Feed Keys", Object.keys(parsedFeed), parsedFeed?.title); if (parsedFeed?.items){
if (parsedFeed?.items){ this.unsetRemoveSource(source.link);
for (const post of parsedFeed.items){ for (const post of parsedFeed.items){
recordPromiseArray.push(new Promise((recordResolve, recordReject) => { recordPromiseArray.push(new Promise((recordResolve, recordReject) => {
log.DEBUG("Parsed Source Keys", Object.keys(post), post?.title); log.DEBUG("Parsed Source Keys", Object.keys(post), post?.title);
log.VERBOSE("Post from feed: ", post); log.VERBOSE("Post from feed: ", post);
if (!post.title || !post.link || !post.pubDate) return recordReject("Missing information from the post"); if (!post.title || !post.link) return recordReject("Missing information from the post");
if (!post.content || !post['content:encoded']) log.WARN("There is no content for post: ", post.title); if (!post.content || !post['content:encoded']) log.WARN("There is no content for post: ", post.title);
post.postId = post.postId ?? post.guid ?? post.id ?? libUtils.returnHash(post.title, post.link, post.pubDate); post.postId = post.postId ?? post.guid ?? post.id ?? libUtils.returnHash(post.title, post.link, post.pubDate ?? Date.now());
tempPostStorage.getRecordBy('post_guid', post.postId, (err, existingRecord) => { tempPostStorage.getRecordBy('post_guid', post.postId, (err, existingRecord) => {
if (err) throw err; if (err) throw err;
@@ -152,15 +200,19 @@ exports.updateFeeds = (client) => {
log.DEBUG("Saved results: ", saveResults); log.DEBUG("Saved results: ", saveResults);
return recordResolve("Saved results", saveResults); return recordResolve("Saved results", saveResults);
} }
}); });
}) })
}) })
})) }))
} }
} }
else {
this.removeSource(source.link);
}
} }
catch (err) { catch (err) {
log.ERROR("Error Parsing Feed: ", source.link, err); log.ERROR("Error Parsing Feed: ", source.link, err);
this.removeSource(source.link);
throw err; throw err;
} }
Promise.all(recordPromiseArray).then((values) => { Promise.all(recordPromiseArray).then((values) => {

View File

@@ -75,16 +75,21 @@ exports.onError = (error) => {
} }
exports.sendPost = (post, source, channel, callback) => { exports.sendPost = (post, source, channel, callback) => {
log.DEBUG("Sending post from source: ", post, source); log.DEBUG("Sending post from source: ", post, source);
// Reset the content parameter with the encoded parameter const postTitle = String(post.title).substring(0, 150);
post.content = parse(post['content:encoded'] ?? post.content);
const postTitle = post.title;
const postLink = post.link; const postLink = post.link;
// Get the post content and trim it to length or add a placeholder if necessary let postContent;
var postText = String(post.content.text);
if (postText.length >= 300) postText = `${postText.slice(0, 300).substring(0, Math.min(String(post.content.text).length, String(post.content.text).lastIndexOf(" ")))}...`; if (post.content) {
else if (postText.length === 0) postText = `*This post has no content* [Direct Link](${post.link})`; // Reset the content parameter with the encoded parameter
var postContent = postText; post.content = parse(post['content:encoded'] ?? post.content);
// Get the post content and trim it to length or add a placeholder if necessary
var postText = String(post.content.text);
if (postText.length >= 300) postText = `${postText.slice(0, 300).substring(0, Math.min(String(post.content.text).length, String(post.content.text).lastIndexOf(" ")))}...`;
else if (postText.length === 0) postText = `*This post has no content* [Direct Link](${post.link})`;
postContent = postText;
}
// Check for embedded youtube videos and add the first four as links // Check for embedded youtube videos and add the first four as links
const ytVideos = String(post.content).match(youtubeVideoRegex); const ytVideos = String(post.content).match(youtubeVideoRegex);
if (ytVideos) { if (ytVideos) {
@@ -97,23 +102,27 @@ exports.sendPost = (post, source, channel, callback) => {
log.DEBUG("Post content: ", postContent); log.DEBUG("Post content: ", postContent);
const postId = post.postId; const postId = post.postId;
const postPubDate = new Date(post.pubDate).toISOString() ?? new Date().toISOString(); if (!post.pubDate) post.pubDate = Date.now();
const postPubDate = new Date(post.pubDate).toISOString();
var postSourceLink = source.title; var postSourceLink = source.title;
var postImage = post.image ?? undefined; var postImage = post.image ?? undefined;
if (!postImage){ if (!postImage){
const linksInPost = post.content.querySelectorAll("a"); if (post.content){
if (linksInPost) { const linksInPost = post.content.querySelectorAll("a");
log.DEBUG("Found links in post:", linksInPost); if (linksInPost) {
for (const link of linksInPost) { log.DEBUG("Found links in post:", linksInPost);
// Check to see if this link is a youtube video that was already found, if so skip it for (const link of linksInPost) {
if (ytVideos?.includes(link)) continue; // Check to see if this link is a youtube video that was already found, if so skip it
const images = String(link.getAttribute("href")).match(imageRegex); if (ytVideos?.includes(link)) continue;
log.DEBUG("Images found in post:", images); const images = String(link.getAttribute("href")).match(imageRegex);
if (images) { log.DEBUG("Images found in post:", images);
postImage = images[0]; if (images) {
} postImage = images[0];
} }
}
}
} }
} }
@@ -123,15 +132,17 @@ exports.sendPost = (post, source, channel, callback) => {
.setColor(0x0099FF) .setColor(0x0099FF)
.setTitle(postTitle) .setTitle(postTitle)
.setURL(postLink) .setURL(postLink)
.addFields({ name: "Post Content", value: postContent, inline: false }) .addFields({ name: 'Source', value: postSourceLink, inline: true })
.addFields({ name: 'Published', value: postPubDate, inline: true }) .addFields({ name: 'Published', value: postPubDate, inline: true });
.addFields({ name: 'Source', value: postSourceLink, inline: true });
// TODO - If there is more than one image, create a canvas and post the created canvas // TODO - If there is more than one image, create a canvas and post the created canvas
if (postImage) { if (postImage) {
log.DEBUG("Image from post:", postImage); log.DEBUG("Image from post:", postImage);
rssMessage.setImage(postImage); rssMessage.setImage(postImage);
} }
//Add the main content if it's present
if (postContent) rssMessage.addFields({ name: "Post Content", value: postContent, inline: false })
channel.send({ embeds: [rssMessage] }); channel.send({ embeds: [rssMessage] });
@@ -140,7 +151,7 @@ exports.sendPost = (post, source, channel, callback) => {
return callback(undefined, true); return callback(undefined, true);
} }
catch (err){ catch (err){
log.ERROR("Error sending message: ", err); log.ERROR("Error sending message: ", postTitle, postId, postContent, postPubDate, err);
return callback(err, undefined); return callback(err, undefined);
} }
} }
@@ -148,3 +159,15 @@ exports.sendPost = (post, source, channel, callback) => {
exports.returnHash = (...stringsIncluded) => { exports.returnHash = (...stringsIncluded) => {
return crypto.createHash('sha1').update(`${stringsIncluded.join("-<<??//\\\\??>>-")}`).digest("base64"); return crypto.createHash('sha1').update(`${stringsIncluded.join("-<<??//\\\\??>>-")}`).digest("base64");
} }
/**
* Check if a key exists in an array of objects
* @param {*} key The key to search for
* @param {*} array The object to search for the key
* @returns {boolean} If the key exists in the object
*/
exports.checkForKeyInArrayOfObjects = (key, array) => {
return array.filter(function (o) {
return o.hasOwnProperty(key);
}).length > 0;
}

View File

@@ -18,7 +18,7 @@ exports.DebugBuilder = class DebugBuilder {
this.ERROR = (...messageParts) => { this.ERROR = (...messageParts) => {
const error = debug(`${appName}:${fileName}:ERROR`); const error = debug(`${appName}:${fileName}:ERROR`);
error(messageParts); error(messageParts);
if (process.env.EXIT_ON_ERROR) setTimeout(process.exit, process.env.EXIT_ON_ERROR_DELAY ?? 0); if (process.env.EXIT_ON_ERROR && process.env.EXIT_ON_ERROR > 0) setTimeout(process.exit, process.env.EXIT_ON_ERROR_DELAY ?? 0);
} }
} }
} }