Cleaned up and added comments. Added requirements file. Now checks if the database/tables exist and creates them if they don't.

2021-04-08 10:25:57 -04:00
parent ccc68b64a5
commit 6e5d503876
3 changed files with 52 additions and 72 deletions
--- a/41
+++ b/41
@@ -1,41 +0,0 @@
-create table Receive
-(
-    SpotID       int
-        constraint Receive_pk
-            primary key,
-    Timestamp    TEXT,
-    Reporter     TEXT,
-    reporterGrid TEXT,
-    SNR          int,
-    Freq         float,
-    CallSign     TEXT,
-    Power        int,
-    Grid         TEXT,
-    Drift        int,
-    Distance     int,
-    Azimuth      int,
-    Band         int,
-    Version      float,
-    Code         int
-);
-
-create table Sent
-(
-    SpotID       int
-        constraint Receive_pk
-            primary key,
-    Timestamp    TEXT,
-    Reporter     TEXT,
-    reporterGrid TEXT,
-    SNR          int,
-    Freq         float,
-    CallSign     TEXT,
-    Power        int,
-    Grid         TEXT,
-    Drift        int,
-    Distance     int,
-    Azimuth      int,
-    Band         int,
-    Version      float,
-    Code         int
-);
--- a/main.py
+++ b/main.py
@@ -8,19 +8,15 @@ from sqlalchemy.orm import sessionmaker
 from os import remove
 import logging

-logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
-
 callsign = 'KD8TUF'
 linkArray = []
-testing = False

+logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
 set_option('display.max_rows', 3)
 set_option('display.max_columns', None)
 set_option('display.width', None)
 set_option('display.max_colwidth', None)

-engine = create_engine('sqlite:///wspr.db')
-engine.connect()
 Base = declarative_base()


@@ -64,46 +60,49 @@ class Sent(Base):
    Code = Column(Integer)


+engine = create_engine('sqlite:///wspr.db')
+engine.connect()
+Receive.__table__.create(bind=engine, checkfirst=True)
+Sent.__table__.create(bind=engine, checkfirst=True)
 Session = sessionmaker(bind=engine)
 session = Session()

-# Original plan of scrapping olddb
-# url = 'https://wsprnet.org/olddb'
-# page = requests.get(url)
-# soup = BeautifulSoup(page.content, 'html.parser')
-# evenrow = soup.find_all(id='evenrow')
-# for each in evenrow:
-#     optionvalue = each.find_all('option')
-#     for each in optionvalue:
-#         print(optionvalue[1])
-#     # print(evenrow)

-if testing is False:
 url = 'http://wsprnet.org/drupal/downloads'
 page = requests.get(url)
 soup = BeautifulSoup(page.content, 'html.parser')
 results = soup.find_all("div", {"class": "field-item even"})

+#Find all the links on the page. Loop through them and get the href tag. Then just grab the one 6 up from the bottom.
+#This should be the most recent monthly zip archive.
 links = soup.find_all("a")
 for link in links:
    linkArray.append(link.get('href'))
 ReportDownload = linkArray[-6:-5]

+#Download the latest zip and save it to the working folder.
 download = requests.get(ReportDownload[0], stream=True)
 with open('./download.zip', 'wb') as fd:
    for chunk in download.iter_content(chunk_size=128):
        fd.write(chunk)
-    print(ReportDownload[0])
+# print(ReportDownload[0])
+
+#unzip the file and grab the CSV filename
 with zipfile.ZipFile('./download.zip', 'r') as zip:
    zip.extractall('./')
    filename = zip.namelist()

+#Load the CSV into Pandas and add in the column names
 DailyData = read_csv(filename[0])
 DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
                     'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']

+#Filter the Dataframe to show only reports that I received
 meSee = DailyData[DailyData['Reporter'] == callsign]
 ReceiveCount = 0
+
+#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
+#the Recieve table
 for index, row in meSee.iterrows():
    if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
        pass
@@ -127,8 +126,12 @@ for index, row in meSee.iterrows():
        )
        ReceiveCount = ReceiveCount + 1

+#Filter the DataFrame to show only reports of stations that recieved me
 saw = DailyData[DailyData['Call Sign'] == callsign]
 SentCount = 0
+
+#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
+#the Sent table
 for index, row in saw.iterrows():
    # print(row['SpotID'])
    if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
@@ -153,9 +156,9 @@ for index, row in saw.iterrows():
        )
        SentCount = SentCount+1

-
+#Commit databse. Cleanup old files and add new record count to log.
 session.commit()
 remove('./download.zip')
 remove(filename[0])

-logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount)
+logging.info('Downloaded %s. Imported %i Received and %i Sent', ReportDownload[0], ReceiveCount, SentCount)
--- a/18
+++ b/18
@@ -0,0 +1,18 @@
+beautifulsoup4==4.9.3
+bs4==0.0.1
+certifi==2020.12.5
+chardet==4.0.0
+greenlet==1.0.0
+idna==2.10
+importlib-metadata==3.10.0
+numpy==1.20.2
+pandas==1.2.3
+python-dateutil==2.8.1
+pytz==2021.1
+requests==2.25.1
+six==1.15.0
+soupsieve==2.2.1
+SQLAlchemy==1.4.6
+typing-extensions==3.7.4.3
+urllib3==1.26.4
+zipp==3.4.1