Initial recording scraper

This commit is contained in:
Logan Cusano
2023-06-17 17:33:24 -04:00
parent f4475dc9d7
commit 77deb3ba2b
2 changed files with 43 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
import scrapy
from scrapy.crawler import CrawlerProcess
class RecordingSpider(scrapy.Spider):
name = "recording-scraper"
start_urls = [
'https://radio.vpn.cusano.net/sdr/transmissions',
]
def parse(self, response):
print("ASDASDD")
print(response)
for row in response.css("tr"):
if row.css('td.py-1'):
links = row.css('a')
rows = row.css('td.py-1')
print(row)
yield {
'device': rows[0],
'date': rows[1],
'duration': rows[2],
"frequency": rows[3],
"link": links[0].attrib["href"],
}
next_page_url = response.css("a.page-link > a::attr(href)").extract_first()
if next_page_url is not None:
yield scrapy.Request(response.urljoin(next_page_url))
process = CrawlerProcess(
settings={
"FEEDS": {
"items.json": {"format": "json"},
},
}
)
process.crawl(RecordingSpider)
process.start() # the script will block here until the crawling is finished

View File

@@ -0,0 +1,3 @@
scrapy
fake-useragent
beautifulsoup4