Initial Commit. Downloads latest wsprnet log, unzips the csv loads the records into pandas, filters sent and received by callsign, checks if record already exists and adds it to correct table if it doesn't. Also working on adding some post processing cleanup and logging. Also has bat file that activates correct venv and then runs the script.

This commit is contained in:
2021-04-07 17:39:53 -04:00
commit ccc68b64a5
3 changed files with 204 additions and 0 deletions

161
main.py Normal file
View File

@@ -0,0 +1,161 @@
from bs4 import BeautifulSoup
import requests
import zipfile
from pandas import read_csv, set_option
from sqlalchemy import create_engine, Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from os import remove
import logging
logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
callsign = 'KD8TUF'
linkArray = []
testing = False
set_option('display.max_rows', 3)
set_option('display.max_columns', None)
set_option('display.width', None)
set_option('display.max_colwidth', None)
engine = create_engine('sqlite:///wspr.db')
engine.connect()
Base = declarative_base()
class Receive(Base):
__tablename__ = 'Receive'
rowId = Column(Integer)
SpotID = Column(Integer, primary_key=True, autoincrement=True)
Timestamp = Column(String)
Reporter = Column(String)
reporterGrid = Column(String)
SNR = Column(Integer)
Freq = Column(Float)
CallSign = Column(String)
Power = Column(Integer)
Grid = Column(String)
Drift = Column(Integer)
Distance = Column(Integer)
Azimuth = Column(Integer)
Band = Column(Integer)
Version = Column(Float)
Code = Column(Integer)
class Sent(Base):
__tablename__ = 'Sent'
rowId = Column(Integer)
SpotID = Column(Integer, primary_key=True, autoincrement=True)
Timestamp = Column(String)
Reporter = Column(String)
reporterGrid = Column(String)
SNR = Column(Integer)
Freq = Column(Float)
CallSign = Column(String)
Power = Column(Integer)
Grid = Column(String)
Drift = Column(Integer)
Distance = Column(Integer)
Azimuth = Column(Integer)
Band = Column(Integer)
Version = Column(String)
Code = Column(Integer)
Session = sessionmaker(bind=engine)
session = Session()
# Original plan of scrapping olddb
# url = 'https://wsprnet.org/olddb'
# page = requests.get(url)
# soup = BeautifulSoup(page.content, 'html.parser')
# evenrow = soup.find_all(id='evenrow')
# for each in evenrow:
# optionvalue = each.find_all('option')
# for each in optionvalue:
# print(optionvalue[1])
# # print(evenrow)
if testing is False:
url = 'http://wsprnet.org/drupal/downloads'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find_all("div", {"class": "field-item even"})
links = soup.find_all("a")
for link in links:
linkArray.append(link.get('href'))
ReportDownload = linkArray[-6:-5]
download = requests.get(ReportDownload[0], stream=True)
with open('./download.zip', 'wb') as fd:
for chunk in download.iter_content(chunk_size=128):
fd.write(chunk)
print(ReportDownload[0])
with zipfile.ZipFile('./download.zip', 'r') as zip:
zip.extractall('./')
filename = zip.namelist()
DailyData = read_csv(filename[0])
DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']
meSee = DailyData[DailyData['Reporter'] == callsign]
ReceiveCount = 0
for index, row in meSee.iterrows():
if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
pass
else:
session.add(Receive(
SpotID=row['SpotID'],
Timestamp=row['Timestamp'],
Reporter=row['Reporter'],
reporterGrid=row['Reporter Grid'],
SNR=row['SNR'],
Freq=row['Freq'],
CallSign=row['Call Sign'],
Power=row['Power'],
Grid=row['Grid'],
Drift=row['Drift'],
Distance=row['Distance'],
Azimuth=row['Azimuth'],
Band=row['Band'],
Version=row['Version'],
Code=row['Code'])
)
ReceiveCount = ReceiveCount + 1
saw = DailyData[DailyData['Call Sign'] == callsign]
SentCount = 0
for index, row in saw.iterrows():
# print(row['SpotID'])
if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
pass
else:
session.add(Sent(
SpotID=row['SpotID'],
Timestamp=row['Timestamp'],
Reporter=row['Reporter'],
reporterGrid=row['Reporter Grid'],
SNR=row['SNR'],
Freq=row['Freq'],
CallSign=row['Call Sign'],
Power=row['Power'],
Grid=row['Grid'],
Drift=row['Drift'],
Distance=row['Distance'],
Azimuth=row['Azimuth'],
Band=row['Band'],
Version=row['Version'],
Code=row['Code'])
)
SentCount = SentCount+1
session.commit()
remove('./download.zip')
remove(filename[0])
logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount)