From 6e5d5038762eb1c7725cad27a755f0e7b956ffe5 Mon Sep 17 00:00:00 2001 From: Dan Dembinski Date: Thu, 8 Apr 2021 10:25:57 -0400 Subject: [PATCH] Cleaned up and added comments. Added requirements file. Now checks if the database/tables exist and creates them if they don't. --- databaseSetup | 41 -------------------------------- main.py | 65 +++++++++++++++++++++++++++------------------------ requirements | 18 ++++++++++++++ 3 files changed, 52 insertions(+), 72 deletions(-) delete mode 100644 databaseSetup create mode 100644 requirements diff --git a/databaseSetup b/databaseSetup deleted file mode 100644 index 7aea2d8..0000000 --- a/databaseSetup +++ /dev/null @@ -1,41 +0,0 @@ -create table Receive -( - SpotID int - constraint Receive_pk - primary key, - Timestamp TEXT, - Reporter TEXT, - reporterGrid TEXT, - SNR int, - Freq float, - CallSign TEXT, - Power int, - Grid TEXT, - Drift int, - Distance int, - Azimuth int, - Band int, - Version float, - Code int -); - -create table Sent -( - SpotID int - constraint Receive_pk - primary key, - Timestamp TEXT, - Reporter TEXT, - reporterGrid TEXT, - SNR int, - Freq float, - CallSign TEXT, - Power int, - Grid TEXT, - Drift int, - Distance int, - Azimuth int, - Band int, - Version float, - Code int -); \ No newline at end of file diff --git a/main.py b/main.py index af3236b..5951697 100644 --- a/main.py +++ b/main.py @@ -8,19 +8,15 @@ from sqlalchemy.orm import sessionmaker from os import remove import logging -logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO) - callsign = 'KD8TUF' linkArray = [] -testing = False +logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO) set_option('display.max_rows', 3) set_option('display.max_columns', None) set_option('display.width', None) set_option('display.max_colwidth', None) -engine = create_engine('sqlite:///wspr.db') -engine.connect() Base = declarative_base() @@ -64,46 +60,49 @@ class Sent(Base): Code = Column(Integer) +engine = create_engine('sqlite:///wspr.db') +engine.connect() +Receive.__table__.create(bind=engine, checkfirst=True) +Sent.__table__.create(bind=engine, checkfirst=True) Session = sessionmaker(bind=engine) session = Session() -# Original plan of scrapping olddb -# url = 'https://wsprnet.org/olddb' -# page = requests.get(url) -# soup = BeautifulSoup(page.content, 'html.parser') -# evenrow = soup.find_all(id='evenrow') -# for each in evenrow: -# optionvalue = each.find_all('option') -# for each in optionvalue: -# print(optionvalue[1]) -# # print(evenrow) -if testing is False: - url = 'http://wsprnet.org/drupal/downloads' - page = requests.get(url) - soup = BeautifulSoup(page.content, 'html.parser') - results = soup.find_all("div", {"class": "field-item even"}) +url = 'http://wsprnet.org/drupal/downloads' +page = requests.get(url) +soup = BeautifulSoup(page.content, 'html.parser') +results = soup.find_all("div", {"class": "field-item even"}) - links = soup.find_all("a") - for link in links: - linkArray.append(link.get('href')) - ReportDownload = linkArray[-6:-5] +#Find all the links on the page. Loop through them and get the href tag. Then just grab the one 6 up from the bottom. +#This should be the most recent monthly zip archive. +links = soup.find_all("a") +for link in links: + linkArray.append(link.get('href')) +ReportDownload = linkArray[-6:-5] - download = requests.get(ReportDownload[0], stream=True) - with open('./download.zip', 'wb') as fd: - for chunk in download.iter_content(chunk_size=128): - fd.write(chunk) - print(ReportDownload[0]) +#Download the latest zip and save it to the working folder. +download = requests.get(ReportDownload[0], stream=True) +with open('./download.zip', 'wb') as fd: + for chunk in download.iter_content(chunk_size=128): + fd.write(chunk) +# print(ReportDownload[0]) + +#unzip the file and grab the CSV filename with zipfile.ZipFile('./download.zip', 'r') as zip: zip.extractall('./') filename = zip.namelist() +#Load the CSV into Pandas and add in the column names DailyData = read_csv(filename[0]) DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power', 'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code'] +#Filter the Dataframe to show only reports that I received meSee = DailyData[DailyData['Reporter'] == callsign] ReceiveCount = 0 + +#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the +#the Recieve table for index, row in meSee.iterrows(): if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: pass @@ -127,8 +126,12 @@ for index, row in meSee.iterrows(): ) ReceiveCount = ReceiveCount + 1 +#Filter the DataFrame to show only reports of stations that recieved me saw = DailyData[DailyData['Call Sign'] == callsign] SentCount = 0 + +#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the +#the Sent table for index, row in saw.iterrows(): # print(row['SpotID']) if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: @@ -153,9 +156,9 @@ for index, row in saw.iterrows(): ) SentCount = SentCount+1 - +#Commit databse. Cleanup old files and add new record count to log. session.commit() remove('./download.zip') remove(filename[0]) -logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount) +logging.info('Downloaded %s. Imported %i Received and %i Sent', ReportDownload[0], ReceiveCount, SentCount) diff --git a/requirements b/requirements new file mode 100644 index 0000000..f89c1d3 --- /dev/null +++ b/requirements @@ -0,0 +1,18 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2020.12.5 +chardet==4.0.0 +greenlet==1.0.0 +idna==2.10 +importlib-metadata==3.10.0 +numpy==1.20.2 +pandas==1.2.3 +python-dateutil==2.8.1 +pytz==2021.1 +requests==2.25.1 +six==1.15.0 +soupsieve==2.2.1 +SQLAlchemy==1.4.6 +typing-extensions==3.7.4.3 +urllib3==1.26.4 +zipp==3.4.1