Cleaned up and added comments. Added requirements file. Now checks if the database/tables exist and creates them if they don't.

This commit is contained in:
2021-04-08 10:25:57 -04:00
parent ccc68b64a5
commit 6e5d503876
3 changed files with 52 additions and 72 deletions

View File

@@ -1,41 +0,0 @@
create table Receive
(
SpotID int
constraint Receive_pk
primary key,
Timestamp TEXT,
Reporter TEXT,
reporterGrid TEXT,
SNR int,
Freq float,
CallSign TEXT,
Power int,
Grid TEXT,
Drift int,
Distance int,
Azimuth int,
Band int,
Version float,
Code int
);
create table Sent
(
SpotID int
constraint Receive_pk
primary key,
Timestamp TEXT,
Reporter TEXT,
reporterGrid TEXT,
SNR int,
Freq float,
CallSign TEXT,
Power int,
Grid TEXT,
Drift int,
Distance int,
Azimuth int,
Band int,
Version float,
Code int
);

65
main.py
View File

@@ -8,19 +8,15 @@ from sqlalchemy.orm import sessionmaker
from os import remove from os import remove
import logging import logging
logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
callsign = 'KD8TUF' callsign = 'KD8TUF'
linkArray = [] linkArray = []
testing = False
logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
set_option('display.max_rows', 3) set_option('display.max_rows', 3)
set_option('display.max_columns', None) set_option('display.max_columns', None)
set_option('display.width', None) set_option('display.width', None)
set_option('display.max_colwidth', None) set_option('display.max_colwidth', None)
engine = create_engine('sqlite:///wspr.db')
engine.connect()
Base = declarative_base() Base = declarative_base()
@@ -64,46 +60,49 @@ class Sent(Base):
Code = Column(Integer) Code = Column(Integer)
engine = create_engine('sqlite:///wspr.db')
engine.connect()
Receive.__table__.create(bind=engine, checkfirst=True)
Sent.__table__.create(bind=engine, checkfirst=True)
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)
session = Session() session = Session()
# Original plan of scrapping olddb
# url = 'https://wsprnet.org/olddb'
# page = requests.get(url)
# soup = BeautifulSoup(page.content, 'html.parser')
# evenrow = soup.find_all(id='evenrow')
# for each in evenrow:
# optionvalue = each.find_all('option')
# for each in optionvalue:
# print(optionvalue[1])
# # print(evenrow)
if testing is False: url = 'http://wsprnet.org/drupal/downloads'
url = 'http://wsprnet.org/drupal/downloads' page = requests.get(url)
page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser')
soup = BeautifulSoup(page.content, 'html.parser') results = soup.find_all("div", {"class": "field-item even"})
results = soup.find_all("div", {"class": "field-item even"})
links = soup.find_all("a") #Find all the links on the page. Loop through them and get the href tag. Then just grab the one 6 up from the bottom.
for link in links: #This should be the most recent monthly zip archive.
linkArray.append(link.get('href')) links = soup.find_all("a")
ReportDownload = linkArray[-6:-5] for link in links:
linkArray.append(link.get('href'))
ReportDownload = linkArray[-6:-5]
download = requests.get(ReportDownload[0], stream=True) #Download the latest zip and save it to the working folder.
with open('./download.zip', 'wb') as fd: download = requests.get(ReportDownload[0], stream=True)
for chunk in download.iter_content(chunk_size=128): with open('./download.zip', 'wb') as fd:
fd.write(chunk) for chunk in download.iter_content(chunk_size=128):
print(ReportDownload[0]) fd.write(chunk)
# print(ReportDownload[0])
#unzip the file and grab the CSV filename
with zipfile.ZipFile('./download.zip', 'r') as zip: with zipfile.ZipFile('./download.zip', 'r') as zip:
zip.extractall('./') zip.extractall('./')
filename = zip.namelist() filename = zip.namelist()
#Load the CSV into Pandas and add in the column names
DailyData = read_csv(filename[0]) DailyData = read_csv(filename[0])
DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power', DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code'] 'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']
#Filter the Dataframe to show only reports that I received
meSee = DailyData[DailyData['Reporter'] == callsign] meSee = DailyData[DailyData['Reporter'] == callsign]
ReceiveCount = 0 ReceiveCount = 0
#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
#the Recieve table
for index, row in meSee.iterrows(): for index, row in meSee.iterrows():
if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
pass pass
@@ -127,8 +126,12 @@ for index, row in meSee.iterrows():
) )
ReceiveCount = ReceiveCount + 1 ReceiveCount = ReceiveCount + 1
#Filter the DataFrame to show only reports of stations that recieved me
saw = DailyData[DailyData['Call Sign'] == callsign] saw = DailyData[DailyData['Call Sign'] == callsign]
SentCount = 0 SentCount = 0
#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
#the Sent table
for index, row in saw.iterrows(): for index, row in saw.iterrows():
# print(row['SpotID']) # print(row['SpotID'])
if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
@@ -153,9 +156,9 @@ for index, row in saw.iterrows():
) )
SentCount = SentCount+1 SentCount = SentCount+1
#Commit databse. Cleanup old files and add new record count to log.
session.commit() session.commit()
remove('./download.zip') remove('./download.zip')
remove(filename[0]) remove(filename[0])
logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount) logging.info('Downloaded %s. Imported %i Received and %i Sent', ReportDownload[0], ReceiveCount, SentCount)

18
requirements Normal file
View File

@@ -0,0 +1,18 @@
beautifulsoup4==4.9.3
bs4==0.0.1
certifi==2020.12.5
chardet==4.0.0
greenlet==1.0.0
idna==2.10
importlib-metadata==3.10.0
numpy==1.20.2
pandas==1.2.3
python-dateutil==2.8.1
pytz==2021.1
requests==2.25.1
six==1.15.0
soupsieve==2.2.1
SQLAlchemy==1.4.6
typing-extensions==3.7.4.3
urllib3==1.26.4
zipp==3.4.1