Initial RSS implementation
- Added debug command to trigger RSS feed update from discord
This commit is contained in:
156
rss-manager/feedHandler.mjs
Normal file
156
rss-manager/feedHandler.mjs
Normal file
@@ -0,0 +1,156 @@
|
||||
import { createFeed, getAllFeeds, getFeedByLink, updateFeedByLink, deleteFeedByLink, createPost, getPostByPostId } from '../modules/mongo-wrappers/mongoFeedsWrappers.mjs';
|
||||
import crypto from 'crypto';
|
||||
import { sendPost } from '../discordBot/modules/rssWrappers.mjs';
|
||||
import { DebugBuilder } from "../modules/debugger.mjs";
|
||||
import UserAgent from "user-agents";
|
||||
|
||||
process.env.USER_AGENT_STRING = new UserAgent({ platform: 'Win32' }).toString();
|
||||
|
||||
// Initiate the parser
|
||||
import Parser from 'rss-parser';
|
||||
const parser = new Parser({
|
||||
headers: {
|
||||
'User-Agent': process.env.USER_AGENT_STRING,
|
||||
"Accept": "application/rss+xml,application/xhtml+xml,application/xml"
|
||||
}
|
||||
});
|
||||
|
||||
const log = new DebugBuilder("server", "feedHandler");
|
||||
const runningPostsToRemove = {}; // Assuming this is a global state variable
|
||||
const sourceFailureLimit = 5; // Define your source failure limit here
|
||||
|
||||
export const returnHash = (...stringsIncluded) => {
|
||||
return crypto.createHash('sha1').update(`${stringsIncluded.join("-<<??//\\\\??>>-")}`).digest("base64");
|
||||
};
|
||||
|
||||
export const updateFeeds = async (client) => {
|
||||
if (!client) throw new Error("Client object not passed");
|
||||
|
||||
try {
|
||||
const records = await getAllFeeds();
|
||||
|
||||
const sourcePromiseArray = records.map(async (source) => {
|
||||
log.DEBUG('Record title:', source.title);
|
||||
log.DEBUG('Record link:', source.link);
|
||||
log.DEBUG('Record category:', source.category);
|
||||
log.DEBUG('Record guild ID:', source.guild_id);
|
||||
log.DEBUG('Record channel ID:', source.channel_id);
|
||||
|
||||
try {
|
||||
const parsedFeed = await parser.parseURL(source.link);
|
||||
|
||||
if (parsedFeed?.items) {
|
||||
await Promise.all(parsedFeed.items.reverse().map(async (post) => {
|
||||
log.DEBUG("Parsed Source Keys", Object.keys(post), post?.title);
|
||||
log.VERBOSE("Post from feed:", post);
|
||||
|
||||
if (!post.title || !post.link) throw new Error("Missing information from the post");
|
||||
if (!post.content || !post['content:encoded']) log.WARN("There is no content for post:", post.title);
|
||||
|
||||
post.postId = post.postId ?? post.guid ?? post.id ?? returnHash(post.title, post.link, post.pubDate ?? Date.now());
|
||||
|
||||
const existingRecord = await getPostByPostId(post.postId);
|
||||
log.DEBUG("Existing post record:", existingRecord);
|
||||
|
||||
if (!existingRecord) {
|
||||
const channel = client.channels.cache.get(source.channel_id);
|
||||
const sendResults = await sendPost(post, source, channel);
|
||||
|
||||
if (!sendResults) throw new Error("No sending results from sending a post");
|
||||
|
||||
log.DEBUG("Saving post to database:", sendResults, post.title, source.channel_id);
|
||||
//await createPost(post);
|
||||
log.DEBUG("Saved post:", post);
|
||||
}
|
||||
}));
|
||||
} else {
|
||||
await deleteFeedByLink(source.link);
|
||||
}
|
||||
} catch (err) {
|
||||
log.ERROR("Parser Error:", source, err);
|
||||
await removeSource(source.link);
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all(sourcePromiseArray);
|
||||
log.DEBUG("All sources finished");
|
||||
} catch (error) {
|
||||
log.ERROR("Error updating feeds:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Adds or updates new source URL to configured storage.
|
||||
* @param {string} title - Title/Name of the RSS feed.
|
||||
* @param {string} link - URL of RSS feed.
|
||||
* @param {string} category - Category of RSS feed.
|
||||
* @param {string} guildId - Guild ID of RSS feed.
|
||||
* @param {string} channelId - Channel ID of RSS feed.
|
||||
* @param {function} callback - Callback function.
|
||||
*/
|
||||
export const addSource = async (title, link, category, guildId, channelId, callback) => {
|
||||
try {
|
||||
const feed = {
|
||||
title,
|
||||
link,
|
||||
category,
|
||||
guild_id: guildId,
|
||||
channel_id: channelId
|
||||
};
|
||||
const record = await createFeed(feed);
|
||||
log.DEBUG("Record ID:", record);
|
||||
return callback(null, record);
|
||||
} catch (err) {
|
||||
log.ERROR("Error in create:", err);
|
||||
return callback(err, null);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Wrapper for feeds that cause errors. By default it will wait over a day for the source to come back online before deleting it.
|
||||
* @param {string} sourceURL - The URL of the feed source causing issues.
|
||||
*/
|
||||
export const removeSource = async (sourceURL) => {
|
||||
log.INFO("Removing source URL:", sourceURL);
|
||||
|
||||
if (!runningPostsToRemove[sourceURL]) {
|
||||
runningPostsToRemove[sourceURL] = { count: 1, timestamp: Date.now(), ignoredAttempts: 0 };
|
||||
return;
|
||||
}
|
||||
|
||||
const backoffDateTimeDifference = (Date.now() - runningPostsToRemove[sourceURL].timestamp);
|
||||
const backoffWaitTime = (runningPostsToRemove[sourceURL].count * 30000);
|
||||
|
||||
log.DEBUG("Datetime", runningPostsToRemove[sourceURL], backoffDateTimeDifference, backoffWaitTime);
|
||||
|
||||
if (backoffDateTimeDifference <= backoffWaitTime) {
|
||||
runningPostsToRemove[sourceURL].ignoredAttempts += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (runningPostsToRemove[sourceURL].count < sourceFailureLimit) {
|
||||
runningPostsToRemove[sourceURL].count += 1;
|
||||
runningPostsToRemove[sourceURL].timestamp = Date.now();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const record = await getFeedByLink(sourceURL);
|
||||
if (!record) {
|
||||
log.ERROR("No source returned from feedStorage");
|
||||
return;
|
||||
}
|
||||
|
||||
const results = await deleteFeedByLink(sourceURL);
|
||||
if (!results) {
|
||||
log.WARN("No results from remove entry");
|
||||
return;
|
||||
}
|
||||
|
||||
log.DEBUG("Source exceeded the limit of retries and has been removed", sourceURL);
|
||||
} catch (err) {
|
||||
log.ERROR("Error removing source from feedStorage", err);
|
||||
}
|
||||
};
|
||||
37
rss-manager/rssController.mjs
Normal file
37
rss-manager/rssController.mjs
Normal file
@@ -0,0 +1,37 @@
|
||||
//Will handle updating feeds in all channels
|
||||
|
||||
import { DebugBuilder } from "../modules/debugger.mjs";
|
||||
import { updateFeeds } from "./feedHandler.mjs";
|
||||
import dotenv from 'dotenv';
|
||||
dotenv.config()
|
||||
|
||||
const log = new DebugBuilder("server", "rssController");
|
||||
|
||||
const refreshInterval = process.env.RSS_REFRESH_INTERVAL ?? 300000;
|
||||
|
||||
export class RSSController {
|
||||
constructor(client) {
|
||||
this.client = client;
|
||||
}
|
||||
|
||||
async start() {
|
||||
// Wait for the refresh period before starting RSS feeds, so the rest of the bot can start
|
||||
await new Promise(resolve => setTimeout(resolve, refreshInterval));
|
||||
|
||||
log.INFO("Starting RSS Controller");
|
||||
// Get initial feeds before starting the infinite loop
|
||||
await updateFeeds(this.client);
|
||||
|
||||
while(true) {
|
||||
// Wait for the refresh interval, then wait for the posts to return, then wait a quarter of the refresh interval to make sure everything is cleared up
|
||||
await new Promise(resolve => setTimeout(resolve, refreshInterval));
|
||||
await this.collectLatestPosts();
|
||||
await new Promise(resolve => setTimeout(resolve, refreshInterval / 4));
|
||||
}
|
||||
}
|
||||
|
||||
async collectLatestPosts() {
|
||||
log.INFO("Updating sources");
|
||||
await updateFeeds(this.client);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user