commit ccc68b64a50473deca2573b98ee7d8cd6c6441fc Author: Dan Dembinski Date: Wed Apr 7 17:39:53 2021 -0400 Initial Commit. Downloads latest wsprnet log, unzips the csv loads the records into pandas, filters sent and received by callsign, checks if record already exists and adds it to correct table if it doesn't. Also working on adding some post processing cleanup and logging. Also has bat file that activates correct venv and then runs the script. diff --git a/databaseSetup b/databaseSetup new file mode 100644 index 0000000..7aea2d8 --- /dev/null +++ b/databaseSetup @@ -0,0 +1,41 @@ +create table Receive +( + SpotID int + constraint Receive_pk + primary key, + Timestamp TEXT, + Reporter TEXT, + reporterGrid TEXT, + SNR int, + Freq float, + CallSign TEXT, + Power int, + Grid TEXT, + Drift int, + Distance int, + Azimuth int, + Band int, + Version float, + Code int +); + +create table Sent +( + SpotID int + constraint Receive_pk + primary key, + Timestamp TEXT, + Reporter TEXT, + reporterGrid TEXT, + SNR int, + Freq float, + CallSign TEXT, + Power int, + Grid TEXT, + Drift int, + Distance int, + Azimuth int, + Band int, + Version float, + Code int +); \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..af3236b --- /dev/null +++ b/main.py @@ -0,0 +1,161 @@ +from bs4 import BeautifulSoup +import requests +import zipfile +from pandas import read_csv, set_option +from sqlalchemy import create_engine, Column, Integer, String, Float +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from os import remove +import logging + +logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO) + +callsign = 'KD8TUF' +linkArray = [] +testing = False + +set_option('display.max_rows', 3) +set_option('display.max_columns', None) +set_option('display.width', None) +set_option('display.max_colwidth', None) + +engine = create_engine('sqlite:///wspr.db') +engine.connect() +Base = declarative_base() + + +class Receive(Base): + __tablename__ = 'Receive' + rowId = Column(Integer) + SpotID = Column(Integer, primary_key=True, autoincrement=True) + Timestamp = Column(String) + Reporter = Column(String) + reporterGrid = Column(String) + SNR = Column(Integer) + Freq = Column(Float) + CallSign = Column(String) + Power = Column(Integer) + Grid = Column(String) + Drift = Column(Integer) + Distance = Column(Integer) + Azimuth = Column(Integer) + Band = Column(Integer) + Version = Column(Float) + Code = Column(Integer) + + +class Sent(Base): + __tablename__ = 'Sent' + rowId = Column(Integer) + SpotID = Column(Integer, primary_key=True, autoincrement=True) + Timestamp = Column(String) + Reporter = Column(String) + reporterGrid = Column(String) + SNR = Column(Integer) + Freq = Column(Float) + CallSign = Column(String) + Power = Column(Integer) + Grid = Column(String) + Drift = Column(Integer) + Distance = Column(Integer) + Azimuth = Column(Integer) + Band = Column(Integer) + Version = Column(String) + Code = Column(Integer) + + +Session = sessionmaker(bind=engine) +session = Session() + +# Original plan of scrapping olddb +# url = 'https://wsprnet.org/olddb' +# page = requests.get(url) +# soup = BeautifulSoup(page.content, 'html.parser') +# evenrow = soup.find_all(id='evenrow') +# for each in evenrow: +# optionvalue = each.find_all('option') +# for each in optionvalue: +# print(optionvalue[1]) +# # print(evenrow) + +if testing is False: + url = 'http://wsprnet.org/drupal/downloads' + page = requests.get(url) + soup = BeautifulSoup(page.content, 'html.parser') + results = soup.find_all("div", {"class": "field-item even"}) + + links = soup.find_all("a") + for link in links: + linkArray.append(link.get('href')) + ReportDownload = linkArray[-6:-5] + + download = requests.get(ReportDownload[0], stream=True) + with open('./download.zip', 'wb') as fd: + for chunk in download.iter_content(chunk_size=128): + fd.write(chunk) + print(ReportDownload[0]) +with zipfile.ZipFile('./download.zip', 'r') as zip: + zip.extractall('./') + filename = zip.namelist() + +DailyData = read_csv(filename[0]) +DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power', + 'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code'] + +meSee = DailyData[DailyData['Reporter'] == callsign] +ReceiveCount = 0 +for index, row in meSee.iterrows(): + if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: + pass + else: + session.add(Receive( + SpotID=row['SpotID'], + Timestamp=row['Timestamp'], + Reporter=row['Reporter'], + reporterGrid=row['Reporter Grid'], + SNR=row['SNR'], + Freq=row['Freq'], + CallSign=row['Call Sign'], + Power=row['Power'], + Grid=row['Grid'], + Drift=row['Drift'], + Distance=row['Distance'], + Azimuth=row['Azimuth'], + Band=row['Band'], + Version=row['Version'], + Code=row['Code']) + ) + ReceiveCount = ReceiveCount + 1 + +saw = DailyData[DailyData['Call Sign'] == callsign] +SentCount = 0 +for index, row in saw.iterrows(): + # print(row['SpotID']) + if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: + pass + else: + session.add(Sent( + SpotID=row['SpotID'], + Timestamp=row['Timestamp'], + Reporter=row['Reporter'], + reporterGrid=row['Reporter Grid'], + SNR=row['SNR'], + Freq=row['Freq'], + CallSign=row['Call Sign'], + Power=row['Power'], + Grid=row['Grid'], + Drift=row['Drift'], + Distance=row['Distance'], + Azimuth=row['Azimuth'], + Band=row['Band'], + Version=row['Version'], + Code=row['Code']) + ) + SentCount = SentCount+1 + + +session.commit() +remove('./download.zip') +remove(filename[0]) + +logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount) diff --git a/run.bat b/run.bat new file mode 100644 index 0000000..3dcbc5e --- /dev/null +++ b/run.bat @@ -0,0 +1,2 @@ +@echo off +cmd /k "cd /d C:\Users\ICYN3\Documents\dev\wspr\venv\Scripts & activate & cd /d C:\Users\ICYN3\Documents\dev\wspr & python main.py & exit"