diff --git a/discordBot/modules/rssWrappers.mjs b/discordBot/modules/rssWrappers.mjs index 0f3e441..9b769e0 100644 --- a/discordBot/modules/rssWrappers.mjs +++ b/discordBot/modules/rssWrappers.mjs @@ -22,59 +22,49 @@ export class EmmeliaEmbedBuilder extends EmbedBuilder { export const sendPost = (post, source, channel) => { log.DEBUG("Sending post from source: ", post, source); + const postTitle = String(post.title).substring(0, 150); const postLink = post.link; - let postContent; - - if (post.content) { - // Reset the content parameter with the encoded parameter - post.content = parse(post['content:encoded'] ?? post.content); - // Get the post content and trim it to length or add a placeholder if necessary - let postText = String(post.content.text); - if (postText.length >= 3800) postText = `${postText.slice(0, 3800).substring(0, Math.min(postText.length, postText.lastIndexOf(" ")))} [...](${post.link})`; - else if (postText.length === 0) postText = `*This post has no content* [Direct Link](${post.link})`; + let postContent = `*This post has no content* [Direct Link](${post.link})`; + + if (post.content || post['content:encoded']) { + const content = post['content:encoded'] ?? post.content; + const parsedContent = parse(content); + let postText = parsedContent.text.trim(); + + if (postText.length >= 3800) { + postText = `${postText.slice(0, 3800).substring(0, postText.lastIndexOf(" "))} [...](${post.link})`; + } else if (postText.length === 0) { + postText = `*This post has no content* [Direct Link](${post.link})`; + } postContent = postText; - } else { - postContent = `*This post has no content* [Direct Link](${post.link})`; - } - // Check for embedded youtube videos and add the first four as links - const ytVideos = String(post.content).match(youtubeVideoRegex); - if (ytVideos) { - for (let ytVideo of ytVideos.slice(0, 4)) { - // If the video is an embed, replace the embed to make it watchable - if (ytVideo.includes("embed")) ytVideo = ytVideo.replace("embed/", "watch?v="); - postContent += `\nEmbeded Video from Post: [YouTube](${ytVideo})`; + // Check for embedded YouTube videos and add the first four as links + const ytVideos = content.match(youtubeVideoRegex); + if (ytVideos) { + ytVideos.slice(0, 4).forEach((ytVideo) => { + if (ytVideo.includes("embed")) ytVideo = ytVideo.replace("embed/", "watch?v="); + postContent += `\nEmbedded Video from Post: [YouTube](${ytVideo})`; + }); + } + + // Extract the first image link if available + const imageLinks = parsedContent.querySelectorAll("a") + .map(link => link.getAttribute("href")) + .filter(href => href && href.match(imageRegex)); + + if (imageLinks.length > 0) { + post.image = imageLinks[0]; } } - log.DEBUG("Post content: ", postContent); const postId = post.postId; - if (!post.pubDate) post.pubDate = Date.now(); - const postPubDate = new Date(post.pubDate).toISOString(); - + const postPubDate = new Date(post.pubDate || Date.now()).toISOString(); const postSourceLink = source.title; - let postImage = post.image ?? undefined; + const postImage = post.image; - if (!postImage) { - if (post.content) { - const linksInPost = post.content.querySelectorAll("a"); - if (linksInPost) { - log.DEBUG("Found links in post:", linksInPost); - for (const link of linksInPost) { - // Check to see if this link is a youtube video that was already found, if so skip it - if (ytVideos?.includes(link)) continue; - const images = String(link.getAttribute("href")).match(imageRegex); - log.DEBUG("Images found in post:", images); - if (images) { - postImage = images[0]; - } - } - } - } - } + log.DEBUG("Post content: ", postContent); - log.DEBUG("Sending an RSS post to discord", postTitle, postId, postContent); try { const rssMessage = new EmmeliaEmbedBuilder() .setColor(0x0099FF) @@ -83,20 +73,16 @@ export const sendPost = (post, source, channel) => { .addFields({ name: 'Source', value: postSourceLink, inline: true }) .addFields({ name: 'Published', value: postPubDate, inline: true }); - // TODO - If there is more than one image, create a canvas and post the created canvas if (postImage) { log.DEBUG("Image from post:", postImage); rssMessage.setImage(postImage); } - // Add the main content if it's present postContent = postContent.slice(0, 4090).trim(); if (postContent) rssMessage.setDescription(postContent); - const channelResponse = rssMessage; - - //const channelResponse = channel.send({ embeds: [rssMessage] }); - + const channelResponse = channel.send({ embeds: [rssMessage] }); + log.DEBUG("Channel send response", channelResponse); return channelResponse; diff --git a/rss-manager/feedHandler.mjs b/rss-manager/feedHandler.mjs index babcdd1..aae743b 100644 --- a/rss-manager/feedHandler.mjs +++ b/rss-manager/feedHandler.mjs @@ -3,11 +3,11 @@ import crypto from 'crypto'; import { sendPost } from '../discordBot/modules/rssWrappers.mjs'; import { DebugBuilder } from "../modules/debugger.mjs"; import UserAgent from "user-agents"; +import Parser from 'rss-parser'; +// Initialize the User-Agent string process.env.USER_AGENT_STRING = new UserAgent({ platform: 'Win32' }).toString(); -// Initiate the parser -import Parser from 'rss-parser'; const parser = new Parser({ headers: { 'User-Agent': process.env.USER_AGENT_STRING, @@ -16,11 +16,11 @@ const parser = new Parser({ }); const log = new DebugBuilder("server", "feedHandler"); -const runningPostsToRemove = {}; // Assuming this is a global state variable -const sourceFailureLimit = 5; // Define your source failure limit here +const runningPostsToRemove = {}; +const sourceFailureLimit = 5; export const returnHash = (...stringsIncluded) => { - return crypto.createHash('sha1').update(`${stringsIncluded.join("-<>-")}`).digest("base64"); + return crypto.createHash('sha1').update(stringsIncluded.join("-<>-")).digest("base64"); }; export const updateFeeds = async (client) => { @@ -30,102 +30,85 @@ export const updateFeeds = async (client) => { const records = await getAllFeeds(); const sourcePromiseArray = records.map(async (source) => { - log.DEBUG('Record title:', source.title); - log.DEBUG('Record link:', source.link); - log.DEBUG('Record category:', source.category); - log.DEBUG('Record guild ID:', source.guild_id); - log.DEBUG('Record channel ID:', source.channel_id); + log.DEBUG('Processing source:', source.title); try { const parsedFeed = await parser.parseURL(source.link); if (parsedFeed?.items) { await Promise.all(parsedFeed.items.reverse().map(async (post) => { - log.DEBUG("Parsed Source Keys", Object.keys(post), post?.title); - log.VERBOSE("Post from feed:", post); + log.DEBUG("Processing post:", post.title); - if (!post.title || !post.link) throw new Error("Missing information from the post"); - if (!post.content || !post['content:encoded']) log.WARN("There is no content for post:", post.title); + if (!post.title || !post.link) throw new Error("Missing title or link in the post"); + if (!post.content && !post['content:encoded']) log.WARN("No content for post:", post.title); post.postId = post.postId ?? post.guid ?? post.id ?? returnHash(post.title, post.link, post.pubDate ?? Date.now()); const existingRecord = await getPostByPostId(post.postId); - log.DEBUG("Existing post record:", existingRecord); - if (!existingRecord) { - const channel = client.channels.cache.get(source.channel_id); + const channel = client.channels.cache.get(source.channel_id); const sendResults = await sendPost(post, source, channel); + if (!sendResults) throw new Error("Failed to send post"); - if (!sendResults) throw new Error("No sending results from sending a post"); + log.DEBUG("Saving post to database:", post.title, source.channel_id); - log.DEBUG("Saving post to database:", sendResults, post.title, source.channel_id); - //await createPost(post); - log.DEBUG("Saved post:", post); + const postToSave = { + title: post.title, + link: post.link, + pubDate: post.pubDate, + author: post.author, + contentSnippet: post.contentSnippet, + id: post.id, + isoDate: post.isoDate, + postId: post.postId + }; + + await createPost(postToSave); + log.DEBUG("Post saved:", postToSave); } })); } else { await deleteFeedByLink(source.link); } } catch (err) { - log.ERROR("Parser Error:", source, err); + log.ERROR("Error processing source:", source.title, err); await removeSource(source.link); throw err; } }); await Promise.all(sourcePromiseArray); - log.DEBUG("All sources finished"); + log.DEBUG("All sources processed"); } catch (error) { log.ERROR("Error updating feeds:", error); throw error; } }; -/** - * Adds or updates new source URL to configured storage. - * @param {string} title - Title/Name of the RSS feed. - * @param {string} link - URL of RSS feed. - * @param {string} category - Category of RSS feed. - * @param {string} guildId - Guild ID of RSS feed. - * @param {string} channelId - Channel ID of RSS feed. - * @param {function} callback - Callback function. - */ export const addSource = async (title, link, category, guildId, channelId, callback) => { try { - const feed = { - title, - link, - category, - guild_id: guildId, - channel_id: channelId - }; + const feed = { title, link, category, guild_id: guildId, channel_id: channelId }; const record = await createFeed(feed); - log.DEBUG("Record ID:", record); - return callback(null, record); + log.DEBUG("Source added:", record); + callback(null, record); } catch (err) { - log.ERROR("Error in create:", err); - return callback(err, null); + log.ERROR("Error adding source:", err); + callback(err, null); } }; -/** - * Wrapper for feeds that cause errors. By default it will wait over a day for the source to come back online before deleting it. - * @param {string} sourceURL - The URL of the feed source causing issues. - */ export const removeSource = async (sourceURL) => { - log.INFO("Removing source URL:", sourceURL); - + log.INFO("Removing source:", sourceURL); + if (!runningPostsToRemove[sourceURL]) { runningPostsToRemove[sourceURL] = { count: 1, timestamp: Date.now(), ignoredAttempts: 0 }; return; } - const backoffDateTimeDifference = (Date.now() - runningPostsToRemove[sourceURL].timestamp); - const backoffWaitTime = (runningPostsToRemove[sourceURL].count * 30000); + const elapsedTime = Date.now() - runningPostsToRemove[sourceURL].timestamp; + const waitTime = runningPostsToRemove[sourceURL].count * 30000; - log.DEBUG("Datetime", runningPostsToRemove[sourceURL], backoffDateTimeDifference, backoffWaitTime); - - if (backoffDateTimeDifference <= backoffWaitTime) { + if (elapsedTime <= waitTime) { runningPostsToRemove[sourceURL].ignoredAttempts += 1; return; } @@ -139,18 +122,18 @@ export const removeSource = async (sourceURL) => { try { const record = await getFeedByLink(sourceURL); if (!record) { - log.ERROR("No source returned from feedStorage"); + log.ERROR("Source not found in storage"); return; } const results = await deleteFeedByLink(sourceURL); if (!results) { - log.WARN("No results from remove entry"); + log.WARN("Failed to remove source"); return; } - log.DEBUG("Source exceeded the limit of retries and has been removed", sourceURL); + log.DEBUG("Source removed after exceeding failure limit:", sourceURL); } catch (err) { - log.ERROR("Error removing source from feedStorage", err); + log.ERROR("Error removing source from storage:", err); } };