DRB-CnC/Server/modules/radioRecordingScraper/recordings_spider.py

import scrapy
from scrapy.crawler import CrawlerProcess

class RecordingSpider(scrapy.Spider):
    name = "recording-scraper"
    start_urls = [
        'https://radio.vpn.cusano.net/sdr/transmissions',
    ]

    def parse(self, response):
        print("ASDASDD")
        print(response)
        for row in response.css("tr"):
            if row.css('td.py-1'):
                links = row.css('a')
                rows = row.css('td.py-1')
                print(row)
                yield {
                    'device': rows[0],
                    'date': rows[1],
                    'duration': rows[2],
                    "frequency": rows[3],
                    "link": links[0].attrib["href"],
                }

        next_page_url = response.css("a.page-link > a::attr(href)").extract_first()
        if next_page_url is not None:
            yield scrapy.Request(response.urljoin(next_page_url))


process = CrawlerProcess(
    settings={
        "FEEDS": {
            "items.json": {"format": "json"},
        },
    }
)

process.crawl(RecordingSpider)
process.start()  # the script will block here until the crawling is finished