Files
drb-server/rss-manager/feedHandler.mjs
Logan Cusano 4e71c7b167 Initial RSS implementation
- Added debug command to trigger RSS feed update from discord
2024-05-22 00:17:36 -04:00

157 lines
5.4 KiB
JavaScript

import { createFeed, getAllFeeds, getFeedByLink, updateFeedByLink, deleteFeedByLink, createPost, getPostByPostId } from '../modules/mongo-wrappers/mongoFeedsWrappers.mjs';
import crypto from 'crypto';
import { sendPost } from '../discordBot/modules/rssWrappers.mjs';
import { DebugBuilder } from "../modules/debugger.mjs";
import UserAgent from "user-agents";
process.env.USER_AGENT_STRING = new UserAgent({ platform: 'Win32' }).toString();
// Initiate the parser
import Parser from 'rss-parser';
const parser = new Parser({
headers: {
'User-Agent': process.env.USER_AGENT_STRING,
"Accept": "application/rss+xml,application/xhtml+xml,application/xml"
}
});
const log = new DebugBuilder("server", "feedHandler");
const runningPostsToRemove = {}; // Assuming this is a global state variable
const sourceFailureLimit = 5; // Define your source failure limit here
export const returnHash = (...stringsIncluded) => {
return crypto.createHash('sha1').update(`${stringsIncluded.join("-<<??//\\\\??>>-")}`).digest("base64");
};
export const updateFeeds = async (client) => {
if (!client) throw new Error("Client object not passed");
try {
const records = await getAllFeeds();
const sourcePromiseArray = records.map(async (source) => {
log.DEBUG('Record title:', source.title);
log.DEBUG('Record link:', source.link);
log.DEBUG('Record category:', source.category);
log.DEBUG('Record guild ID:', source.guild_id);
log.DEBUG('Record channel ID:', source.channel_id);
try {
const parsedFeed = await parser.parseURL(source.link);
if (parsedFeed?.items) {
await Promise.all(parsedFeed.items.reverse().map(async (post) => {
log.DEBUG("Parsed Source Keys", Object.keys(post), post?.title);
log.VERBOSE("Post from feed:", post);
if (!post.title || !post.link) throw new Error("Missing information from the post");
if (!post.content || !post['content:encoded']) log.WARN("There is no content for post:", post.title);
post.postId = post.postId ?? post.guid ?? post.id ?? returnHash(post.title, post.link, post.pubDate ?? Date.now());
const existingRecord = await getPostByPostId(post.postId);
log.DEBUG("Existing post record:", existingRecord);
if (!existingRecord) {
const channel = client.channels.cache.get(source.channel_id);
const sendResults = await sendPost(post, source, channel);
if (!sendResults) throw new Error("No sending results from sending a post");
log.DEBUG("Saving post to database:", sendResults, post.title, source.channel_id);
//await createPost(post);
log.DEBUG("Saved post:", post);
}
}));
} else {
await deleteFeedByLink(source.link);
}
} catch (err) {
log.ERROR("Parser Error:", source, err);
await removeSource(source.link);
throw err;
}
});
await Promise.all(sourcePromiseArray);
log.DEBUG("All sources finished");
} catch (error) {
log.ERROR("Error updating feeds:", error);
throw error;
}
};
/**
* Adds or updates new source URL to configured storage.
* @param {string} title - Title/Name of the RSS feed.
* @param {string} link - URL of RSS feed.
* @param {string} category - Category of RSS feed.
* @param {string} guildId - Guild ID of RSS feed.
* @param {string} channelId - Channel ID of RSS feed.
* @param {function} callback - Callback function.
*/
export const addSource = async (title, link, category, guildId, channelId, callback) => {
try {
const feed = {
title,
link,
category,
guild_id: guildId,
channel_id: channelId
};
const record = await createFeed(feed);
log.DEBUG("Record ID:", record);
return callback(null, record);
} catch (err) {
log.ERROR("Error in create:", err);
return callback(err, null);
}
};
/**
* Wrapper for feeds that cause errors. By default it will wait over a day for the source to come back online before deleting it.
* @param {string} sourceURL - The URL of the feed source causing issues.
*/
export const removeSource = async (sourceURL) => {
log.INFO("Removing source URL:", sourceURL);
if (!runningPostsToRemove[sourceURL]) {
runningPostsToRemove[sourceURL] = { count: 1, timestamp: Date.now(), ignoredAttempts: 0 };
return;
}
const backoffDateTimeDifference = (Date.now() - runningPostsToRemove[sourceURL].timestamp);
const backoffWaitTime = (runningPostsToRemove[sourceURL].count * 30000);
log.DEBUG("Datetime", runningPostsToRemove[sourceURL], backoffDateTimeDifference, backoffWaitTime);
if (backoffDateTimeDifference <= backoffWaitTime) {
runningPostsToRemove[sourceURL].ignoredAttempts += 1;
return;
}
if (runningPostsToRemove[sourceURL].count < sourceFailureLimit) {
runningPostsToRemove[sourceURL].count += 1;
runningPostsToRemove[sourceURL].timestamp = Date.now();
return;
}
try {
const record = await getFeedByLink(sourceURL);
if (!record) {
log.ERROR("No source returned from feedStorage");
return;
}
const results = await deleteFeedByLink(sourceURL);
if (!results) {
log.WARN("No results from remove entry");
return;
}
log.DEBUG("Source exceeded the limit of retries and has been removed", sourceURL);
} catch (err) {
log.ERROR("Error removing source from feedStorage", err);
}
};