Initial Commit. Downloads latest wsprnet log, unzips the csv loads the records into pandas, filters sent and received by callsign, checks if record already exists and adds it to correct table if it doesn't. Also working on adding some post processing cleanup and logging. Also has bat file that activates correct venv and then runs the script.

This commit is contained in:
2021-04-07 17:39:53 -04:00
commit ccc68b64a5
3 changed files with 204 additions and 0 deletions

41
databaseSetup Normal file
View File

@@ -0,0 +1,41 @@
create table Receive
(
SpotID int
constraint Receive_pk
primary key,
Timestamp TEXT,
Reporter TEXT,
reporterGrid TEXT,
SNR int,
Freq float,
CallSign TEXT,
Power int,
Grid TEXT,
Drift int,
Distance int,
Azimuth int,
Band int,
Version float,
Code int
);
create table Sent
(
SpotID int
constraint Receive_pk
primary key,
Timestamp TEXT,
Reporter TEXT,
reporterGrid TEXT,
SNR int,
Freq float,
CallSign TEXT,
Power int,
Grid TEXT,
Drift int,
Distance int,
Azimuth int,
Band int,
Version float,
Code int
);

161
main.py Normal file
View File

@@ -0,0 +1,161 @@
from bs4 import BeautifulSoup
import requests
import zipfile
from pandas import read_csv, set_option
from sqlalchemy import create_engine, Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from os import remove
import logging
logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
callsign = 'KD8TUF'
linkArray = []
testing = False
set_option('display.max_rows', 3)
set_option('display.max_columns', None)
set_option('display.width', None)
set_option('display.max_colwidth', None)
engine = create_engine('sqlite:///wspr.db')
engine.connect()
Base = declarative_base()
class Receive(Base):
__tablename__ = 'Receive'
rowId = Column(Integer)
SpotID = Column(Integer, primary_key=True, autoincrement=True)
Timestamp = Column(String)
Reporter = Column(String)
reporterGrid = Column(String)
SNR = Column(Integer)
Freq = Column(Float)
CallSign = Column(String)
Power = Column(Integer)
Grid = Column(String)
Drift = Column(Integer)
Distance = Column(Integer)
Azimuth = Column(Integer)
Band = Column(Integer)
Version = Column(Float)
Code = Column(Integer)
class Sent(Base):
__tablename__ = 'Sent'
rowId = Column(Integer)
SpotID = Column(Integer, primary_key=True, autoincrement=True)
Timestamp = Column(String)
Reporter = Column(String)
reporterGrid = Column(String)
SNR = Column(Integer)
Freq = Column(Float)
CallSign = Column(String)
Power = Column(Integer)
Grid = Column(String)
Drift = Column(Integer)
Distance = Column(Integer)
Azimuth = Column(Integer)
Band = Column(Integer)
Version = Column(String)
Code = Column(Integer)
Session = sessionmaker(bind=engine)
session = Session()
# Original plan of scrapping olddb
# url = 'https://wsprnet.org/olddb'
# page = requests.get(url)
# soup = BeautifulSoup(page.content, 'html.parser')
# evenrow = soup.find_all(id='evenrow')
# for each in evenrow:
# optionvalue = each.find_all('option')
# for each in optionvalue:
# print(optionvalue[1])
# # print(evenrow)
if testing is False:
url = 'http://wsprnet.org/drupal/downloads'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find_all("div", {"class": "field-item even"})
links = soup.find_all("a")
for link in links:
linkArray.append(link.get('href'))
ReportDownload = linkArray[-6:-5]
download = requests.get(ReportDownload[0], stream=True)
with open('./download.zip', 'wb') as fd:
for chunk in download.iter_content(chunk_size=128):
fd.write(chunk)
print(ReportDownload[0])
with zipfile.ZipFile('./download.zip', 'r') as zip:
zip.extractall('./')
filename = zip.namelist()
DailyData = read_csv(filename[0])
DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']
meSee = DailyData[DailyData['Reporter'] == callsign]
ReceiveCount = 0
for index, row in meSee.iterrows():
if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
pass
else:
session.add(Receive(
SpotID=row['SpotID'],
Timestamp=row['Timestamp'],
Reporter=row['Reporter'],
reporterGrid=row['Reporter Grid'],
SNR=row['SNR'],
Freq=row['Freq'],
CallSign=row['Call Sign'],
Power=row['Power'],
Grid=row['Grid'],
Drift=row['Drift'],
Distance=row['Distance'],
Azimuth=row['Azimuth'],
Band=row['Band'],
Version=row['Version'],
Code=row['Code'])
)
ReceiveCount = ReceiveCount + 1
saw = DailyData[DailyData['Call Sign'] == callsign]
SentCount = 0
for index, row in saw.iterrows():
# print(row['SpotID'])
if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
pass
else:
session.add(Sent(
SpotID=row['SpotID'],
Timestamp=row['Timestamp'],
Reporter=row['Reporter'],
reporterGrid=row['Reporter Grid'],
SNR=row['SNR'],
Freq=row['Freq'],
CallSign=row['Call Sign'],
Power=row['Power'],
Grid=row['Grid'],
Drift=row['Drift'],
Distance=row['Distance'],
Azimuth=row['Azimuth'],
Band=row['Band'],
Version=row['Version'],
Code=row['Code'])
)
SentCount = SentCount+1
session.commit()
remove('./download.zip')
remove(filename[0])
logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount)

2
run.bat Normal file
View File

@@ -0,0 +1,2 @@
@echo off
cmd /k "cd /d C:\Users\ICYN3\Documents\dev\wspr\venv\Scripts & activate & cd /d C:\Users\ICYN3\Documents\dev\wspr & python main.py & exit"