from bs4 import BeautifulSoup import requests import zipfile from pandas import read_csv, set_option from sqlalchemy import create_engine, Column, Integer, String, Float from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from os import remove import logging callsign = 'KD8TUF' linkArray = [] logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO) set_option('display.max_rows', 3) set_option('display.max_columns', None) set_option('display.width', None) set_option('display.max_colwidth', None) Base = declarative_base() class Receive(Base): __tablename__ = 'Receive' rowId = Column(Integer) SpotID = Column(Integer, primary_key=True, autoincrement=True) Timestamp = Column(String) Reporter = Column(String) reporterGrid = Column(String) SNR = Column(Integer) Freq = Column(Float) CallSign = Column(String) Power = Column(Integer) Grid = Column(String) Drift = Column(Integer) Distance = Column(Integer) Azimuth = Column(Integer) Band = Column(Integer) Version = Column(String) Code = Column(Integer) class Sent(Base): __tablename__ = 'Sent' rowId = Column(Integer) SpotID = Column(Integer, primary_key=True, autoincrement=True) Timestamp = Column(String) Reporter = Column(String) reporterGrid = Column(String) SNR = Column(Integer) Freq = Column(Float) CallSign = Column(String) Power = Column(Integer) Grid = Column(String) Drift = Column(Integer) Distance = Column(Integer) Azimuth = Column(Integer) Band = Column(Integer) Version = Column(String) Code = Column(Integer) engine = create_engine('sqlite:///wspr.db') engine.connect() Receive.__table__.create(bind=engine, checkfirst=True) Sent.__table__.create(bind=engine, checkfirst=True) Session = sessionmaker(bind=engine) session = Session() url = 'http://wsprnet.org/drupal/downloads' page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') results = soup.find_all("div", {"class": "field-item even"}) #Find all the links on the page. Loop through them and get the href tag. Then just grab the one 6 up from the bottom. #This should be the most recent monthly zip archive. links = soup.find_all("a") for link in links: linkArray.append(link.get('href')) ReportDownload = linkArray[-6:-5] print(ReportDownload[0]) #Download the latest zip and save it to the working folder. download = requests.get(ReportDownload[0], stream=True) try: with open('./download.zip', 'wb') as fd: for chunk in download.iter_content(chunk_size=128): fd.write(chunk) except: pass print(ReportDownload[0]) #unzip the file and grab the CSV filename with zipfile.ZipFile('./download.zip', 'r') as zip: zip.extractall('./') filename = zip.namelist() #Load the CSV into Pandas and add in the column names DailyData = read_csv(filename[0]) DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power', 'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code'] #Filter the Dataframe to show only reports that I received meSee = DailyData[DailyData['Reporter'] == callsign] ReceiveCount = 0 #Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the #the Recieve table for index, row in meSee.iterrows(): if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: pass else: session.add(Receive( SpotID=row['SpotID'], Timestamp=row['Timestamp'], Reporter=row['Reporter'], reporterGrid=row['Reporter Grid'], SNR=row['SNR'], Freq=row['Freq'], CallSign=row['Call Sign'], Power=row['Power'], Grid=row['Grid'], Drift=row['Drift'], Distance=row['Distance'], Azimuth=row['Azimuth'], Band=row['Band'], Version=row['Version'], Code=row['Code']) ) ReceiveCount = ReceiveCount + 1 #Filter the DataFrame to show only reports of stations that recieved me saw = DailyData[DailyData['Call Sign'] == callsign] SentCount = 0 #Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the #the Sent table for index, row in saw.iterrows(): # print(row['SpotID']) if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: pass else: session.add(Sent( SpotID=row['SpotID'], Timestamp=row['Timestamp'], Reporter=row['Reporter'], reporterGrid=row['Reporter Grid'], SNR=row['SNR'], Freq=row['Freq'], CallSign=row['Call Sign'], Power=row['Power'], Grid=row['Grid'], Drift=row['Drift'], Distance=row['Distance'], Azimuth=row['Azimuth'], Band=row['Band'], Version=row['Version'], Code=row['Code']) ) SentCount = SentCount+1 #Commit databse. Cleanup old files and add new record count to log. session.commit() remove('./download.zip') remove(filename[0]) logging.info('Downloaded %s. Imported %i Received and %i Sent', ReportDownload[0], ReceiveCount, SentCount)