import scrapy from scrapy.crawler import CrawlerProcess class RecordingSpider(scrapy.Spider): name = "recording-scraper" start_urls = [ 'https://radio.vpn.cusano.net/sdr/transmissions', ] def parse(self, response): print("ASDASDD") print(response) for row in response.css("tr"): if row.css('td.py-1'): links = row.css('a') rows = row.css('td.py-1') print(row) yield { 'device': rows[0], 'date': rows[1], 'duration': rows[2], "frequency": rows[3], "link": links[0].attrib["href"], } next_page_url = response.css("a.page-link > a::attr(href)").extract_first() if next_page_url is not None: yield scrapy.Request(response.urljoin(next_page_url)) process = CrawlerProcess( settings={ "FEEDS": { "items.json": {"format": "json"}, }, } ) process.crawl(RecordingSpider) process.start() # the script will block here until the crawling is finished