Compare commits
3 Commits
feature/#1
...
feature/#1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77deb3ba2b | ||
|
|
f4475dc9d7 | ||
|
|
c4650a9e99 |
@@ -40,7 +40,8 @@ module.exports = {
|
|||||||
.addStringOption(option =>
|
.addStringOption(option =>
|
||||||
option.setName("bot")
|
option.setName("bot")
|
||||||
.setDescription("The bot to disconnect from the server")
|
.setDescription("The bot to disconnect from the server")
|
||||||
.setAutocomplete(true)),
|
.setAutocomplete(true)
|
||||||
|
.setRequired(true)),
|
||||||
example: "leave",
|
example: "leave",
|
||||||
isPrivileged: false,
|
isPrivileged: false,
|
||||||
requiresTokens: false,
|
requiresTokens: false,
|
||||||
|
|||||||
@@ -32,17 +32,38 @@ var runningPostsToRemove = [{
|
|||||||
}]
|
}]
|
||||||
*/
|
*/
|
||||||
var runningPostsToRemove = {};
|
var runningPostsToRemove = {};
|
||||||
const sourceFailureLimit = process.env.SOURCE_FAILURE_LIMIT ?? 3;
|
const sourceFailureLimit = process.env.SOURCE_FAILURE_LIMIT ?? 15;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Wrapper for feeds that cause errors. By default it will wait over a day for the source to come back online before deleting it.
|
||||||
*
|
*
|
||||||
* @param {*} sourceURL
|
* @param {string} sourceURL The URL of the feed source causing issues
|
||||||
*/
|
*/
|
||||||
exports.removeSource = function removeSource(sourceURL) {
|
exports.removeSource = function removeSource(sourceURL) {
|
||||||
log.INFO("Removing source URL: ", sourceURL);
|
log.INFO("Removing source URL: ", sourceURL);
|
||||||
if (!sourceURL in runningPostsToRemove) {runningPostsToRemove[sourceURL] = 1; return;}
|
// Check to see if this is the first time this source has been attempted
|
||||||
|
if (!Object.keys(runningPostsToRemove).includes(sourceURL)) {
|
||||||
|
runningPostsToRemove[sourceURL] = { count: 1, timestamp: Date.now(), ignoredAttempts: 0 };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const backoffDateTimeDifference = (Date.now() - new Date(runningPostsToRemove[sourceURL].timestamp));
|
||||||
|
const backoffWaitTime = (runningPostsToRemove[sourceURL].count * 30000);
|
||||||
|
|
||||||
|
log.DEBUG("Datetime", runningPostsToRemove[sourceURL], backoffDateTimeDifference, backoffWaitTime);
|
||||||
|
|
||||||
|
// Check to see if the last error occurred within the backoff period or if we should try again
|
||||||
|
if (backoffDateTimeDifference <= backoffWaitTime) {
|
||||||
|
runningPostsToRemove[sourceURL].ignoredAttempts +=1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (runningPostsToRemove[sourceURL] < sourceFailureLimit) {runningPostsToRemove[sourceURL] += 1; return;}
|
// Increase the retry counter
|
||||||
|
if (runningPostsToRemove[sourceURL].count < sourceFailureLimit) {
|
||||||
|
runningPostsToRemove[sourceURL].count += 1;
|
||||||
|
runningPostsToRemove[sourceURL].timestamp = Date.now();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
feedStorage.getRecordBy('link', sourceURL, (err, record) => {
|
feedStorage.getRecordBy('link', sourceURL, (err, record) => {
|
||||||
if (err) log.ERROR("Error getting record from feedStorage", err);
|
if (err) log.ERROR("Error getting record from feedStorage", err);
|
||||||
@@ -62,13 +83,14 @@ exports.removeSource = function removeSource(sourceURL) {
|
|||||||
/**
|
/**
|
||||||
* Unset a source URL from deletion if the source has not already been deleted
|
* Unset a source URL from deletion if the source has not already been deleted
|
||||||
* @param {*} sourceURL The source URL to be unset from deletion
|
* @param {*} sourceURL The source URL to be unset from deletion
|
||||||
* @returns {*}
|
|
||||||
*/
|
*/
|
||||||
exports.unsetRemoveSource = function unsetRemoveSource(sourceURL) {
|
exports.unsetRemoveSource = function unsetRemoveSource(sourceURL) {
|
||||||
log.INFO("Unsetting source URL from deletion (if not already deleted): ", sourceURL);
|
log.INFO("Unsetting source URL from deletion (if not already deleted): ", sourceURL);
|
||||||
if (!sourceURL in runningPostsToRemove) return;
|
if (!Object.keys(runningPostsToRemove).includes(sourceURL)) return;
|
||||||
|
|
||||||
if (runningPostsToRemove[sourceURL] > sourceFailureLimit) return delete runningPostsToRemove[sourceURL];
|
delete runningPostsToRemove[sourceURL];
|
||||||
|
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
40
Server/modules/radioRecordingScraper/recordings_spider.py
Normal file
40
Server/modules/radioRecordingScraper/recordings_spider.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import scrapy
|
||||||
|
from scrapy.crawler import CrawlerProcess
|
||||||
|
|
||||||
|
class RecordingSpider(scrapy.Spider):
|
||||||
|
name = "recording-scraper"
|
||||||
|
start_urls = [
|
||||||
|
'https://radio.vpn.cusano.net/sdr/transmissions',
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
print("ASDASDD")
|
||||||
|
print(response)
|
||||||
|
for row in response.css("tr"):
|
||||||
|
if row.css('td.py-1'):
|
||||||
|
links = row.css('a')
|
||||||
|
rows = row.css('td.py-1')
|
||||||
|
print(row)
|
||||||
|
yield {
|
||||||
|
'device': rows[0],
|
||||||
|
'date': rows[1],
|
||||||
|
'duration': rows[2],
|
||||||
|
"frequency": rows[3],
|
||||||
|
"link": links[0].attrib["href"],
|
||||||
|
}
|
||||||
|
|
||||||
|
next_page_url = response.css("a.page-link > a::attr(href)").extract_first()
|
||||||
|
if next_page_url is not None:
|
||||||
|
yield scrapy.Request(response.urljoin(next_page_url))
|
||||||
|
|
||||||
|
|
||||||
|
process = CrawlerProcess(
|
||||||
|
settings={
|
||||||
|
"FEEDS": {
|
||||||
|
"items.json": {"format": "json"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
process.crawl(RecordingSpider)
|
||||||
|
process.start() # the script will block here until the crawling is finished
|
||||||
3
Server/modules/radioRecordingScraper/requirements.txt
Normal file
3
Server/modules/radioRecordingScraper/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
scrapy
|
||||||
|
fake-useragent
|
||||||
|
beautifulsoup4
|
||||||
Reference in New Issue
Block a user