Cleaned up and added comments. Added requirements file. Now checks if the database/tables exist and creates them if they don't.

2021-04-08 10:25:57 -04:00
parent ccc68b64a5
commit 6e5d503876
3 changed files with 52 additions and 72 deletions
--- a/41
+++ b/41
@@ -1,41 +0,0 @@
 create table Receive
 (
    SpotID       int
        constraint Receive_pk
            primary key,
    Timestamp    TEXT,
    Reporter     TEXT,
    reporterGrid TEXT,
    SNR          int,
    Freq         float,
    CallSign     TEXT,
    Power        int,
    Grid         TEXT,
    Drift        int,
    Distance     int,
    Azimuth      int,
    Band         int,
    Version      float,
    Code         int
 );
 create table Sent
 (
    SpotID       int
        constraint Receive_pk
            primary key,
    Timestamp    TEXT,
    Reporter     TEXT,
    reporterGrid TEXT,
    SNR          int,
    Freq         float,
    CallSign     TEXT,
    Power        int,
    Grid         TEXT,
    Drift        int,
    Distance     int,
    Azimuth      int,
    Band         int,
    Version      float,
    Code         int
 );
--- a/main.py
+++ b/main.py
@@ -8,19 +8,15 @@ from sqlalchemy.orm import sessionmaker
 from os import remove
 import logging
 logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
 callsign = 'KD8TUF'
 linkArray = []
 testing = False
 logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
 set_option('display.max_rows', 3)
 set_option('display.max_columns', None)
 set_option('display.width', None)
 set_option('display.max_colwidth', None)
 engine = create_engine('sqlite:///wspr.db')
 engine.connect()
 Base = declarative_base()
@@ -64,46 +60,49 @@ class Sent(Base):
    Code = Column(Integer)
 engine = create_engine('sqlite:///wspr.db')
 engine.connect()
 Receive.__table__.create(bind=engine, checkfirst=True)
 Sent.__table__.create(bind=engine, checkfirst=True)
 Session = sessionmaker(bind=engine)
 session = Session()
 # Original plan of scrapping olddb
 # url = 'https://wsprnet.org/olddb'
 # page = requests.get(url)
 # soup = BeautifulSoup(page.content, 'html.parser')
 # evenrow = soup.find_all(id='evenrow')
 # for each in evenrow:
 #     optionvalue = each.find_all('option')
 #     for each in optionvalue:
 #         print(optionvalue[1])
 #     # print(evenrow)
-if testing is False:
+url = 'http://wsprnet.org/drupal/downloads'
-    url = 'http://wsprnet.org/drupal/downloads'
+page = requests.get(url)
-    page = requests.get(url)
+soup = BeautifulSoup(page.content, 'html.parser')
-    soup = BeautifulSoup(page.content, 'html.parser')
+results = soup.find_all("div", {"class": "field-item even"})
    results = soup.find_all("div", {"class": "field-item even"})
-    links = soup.find_all("a")
+#Find all the links on the page. Loop through them and get the href tag. Then just grab the one 6 up from the bottom.
-    for link in links:
+#This should be the most recent monthly zip archive.
-        linkArray.append(link.get('href'))
+links = soup.find_all("a")
-    ReportDownload = linkArray[-6:-5]
+for link in links:
    linkArray.append(link.get('href'))
 ReportDownload = linkArray[-6:-5]
-    download = requests.get(ReportDownload[0], stream=True)
+#Download the latest zip and save it to the working folder.
-    with open('./download.zip', 'wb') as fd:
+download = requests.get(ReportDownload[0], stream=True)
-        for chunk in download.iter_content(chunk_size=128):
+with open('./download.zip', 'wb') as fd:
-            fd.write(chunk)
+    for chunk in download.iter_content(chunk_size=128):
-    print(ReportDownload[0])
+        fd.write(chunk)
 # print(ReportDownload[0])
 #unzip the file and grab the CSV filename
 with zipfile.ZipFile('./download.zip', 'r') as zip:
    zip.extractall('./')
    filename = zip.namelist()
 #Load the CSV into Pandas and add in the column names
 DailyData = read_csv(filename[0])
 DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
                     'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']
 #Filter the Dataframe to show only reports that I received
 meSee = DailyData[DailyData['Reporter'] == callsign]
 ReceiveCount = 0
 #Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
 #the Recieve table
 for index, row in meSee.iterrows():
    if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
        pass
@@ -127,8 +126,12 @@ for index, row in meSee.iterrows():
        )
        ReceiveCount = ReceiveCount + 1
 #Filter the DataFrame to show only reports of stations that recieved me
 saw = DailyData[DailyData['Call Sign'] == callsign]
 SentCount = 0
 #Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
 #the Sent table
 for index, row in saw.iterrows():
    # print(row['SpotID'])
    if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
@@ -153,9 +156,9 @@ for index, row in saw.iterrows():
        )
        SentCount = SentCount+1
-
+#Commit databse. Cleanup old files and add new record count to log.
 session.commit()
 remove('./download.zip')
 remove(filename[0])
-logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount)
+logging.info('Downloaded %s. Imported %i Received and %i Sent', ReportDownload[0], ReceiveCount, SentCount)
--- a/18
+++ b/18
@@ -0,0 +1,18 @@
 beautifulsoup4==4.9.3
 bs4==0.0.1
 certifi==2020.12.5
 chardet==4.0.0
 greenlet==1.0.0
 idna==2.10
 importlib-metadata==3.10.0
 numpy==1.20.2
 pandas==1.2.3
 python-dateutil==2.8.1
 pytz==2021.1
 requests==2.25.1
 six==1.15.0
 soupsieve==2.2.1
 SQLAlchemy==1.4.6
 typing-extensions==3.7.4.3
 urllib3==1.26.4
 zipp==3.4.1