from bs4 import BeautifulSoup import requests import zipfile from pandas import read_csv, set_option from sqlalchemy import create_engine, Column, Integer, String, Float from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from os import remove import logging logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO) callsign = 'KD8TUF' linkArray = [] testing = False set_option('display.max_rows', 3) set_option('display.max_columns', None) set_option('display.width', None) set_option('display.max_colwidth', None) engine = create_engine('sqlite:///wspr.db') engine.connect() Base = declarative_base() class Receive(Base): __tablename__ = 'Receive' rowId = Column(Integer) SpotID = Column(Integer, primary_key=True, autoincrement=True) Timestamp = Column(String) Reporter = Column(String) reporterGrid = Column(String) SNR = Column(Integer) Freq = Column(Float) CallSign = Column(String) Power = Column(Integer) Grid = Column(String) Drift = Column(Integer) Distance = Column(Integer) Azimuth = Column(Integer) Band = Column(Integer) Version = Column(Float) Code = Column(Integer) class Sent(Base): __tablename__ = 'Sent' rowId = Column(Integer) SpotID = Column(Integer, primary_key=True, autoincrement=True) Timestamp = Column(String) Reporter = Column(String) reporterGrid = Column(String) SNR = Column(Integer) Freq = Column(Float) CallSign = Column(String) Power = Column(Integer) Grid = Column(String) Drift = Column(Integer) Distance = Column(Integer) Azimuth = Column(Integer) Band = Column(Integer) Version = Column(String) Code = Column(Integer) Session = sessionmaker(bind=engine) session = Session() # Original plan of scrapping olddb # url = 'https://wsprnet.org/olddb' # page = requests.get(url) # soup = BeautifulSoup(page.content, 'html.parser') # evenrow = soup.find_all(id='evenrow') # for each in evenrow: # optionvalue = each.find_all('option') # for each in optionvalue: # print(optionvalue[1]) # # print(evenrow) if testing is False: url = 'http://wsprnet.org/drupal/downloads' page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') results = soup.find_all("div", {"class": "field-item even"}) links = soup.find_all("a") for link in links: linkArray.append(link.get('href')) ReportDownload = linkArray[-6:-5] download = requests.get(ReportDownload[0], stream=True) with open('./download.zip', 'wb') as fd: for chunk in download.iter_content(chunk_size=128): fd.write(chunk) print(ReportDownload[0]) with zipfile.ZipFile('./download.zip', 'r') as zip: zip.extractall('./') filename = zip.namelist() DailyData = read_csv(filename[0]) DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power', 'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code'] meSee = DailyData[DailyData['Reporter'] == callsign] ReceiveCount = 0 for index, row in meSee.iterrows(): if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: pass else: session.add(Receive( SpotID=row['SpotID'], Timestamp=row['Timestamp'], Reporter=row['Reporter'], reporterGrid=row['Reporter Grid'], SNR=row['SNR'], Freq=row['Freq'], CallSign=row['Call Sign'], Power=row['Power'], Grid=row['Grid'], Drift=row['Drift'], Distance=row['Distance'], Azimuth=row['Azimuth'], Band=row['Band'], Version=row['Version'], Code=row['Code']) ) ReceiveCount = ReceiveCount + 1 saw = DailyData[DailyData['Call Sign'] == callsign] SentCount = 0 for index, row in saw.iterrows(): # print(row['SpotID']) if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None: pass else: session.add(Sent( SpotID=row['SpotID'], Timestamp=row['Timestamp'], Reporter=row['Reporter'], reporterGrid=row['Reporter Grid'], SNR=row['SNR'], Freq=row['Freq'], CallSign=row['Call Sign'], Power=row['Power'], Grid=row['Grid'], Drift=row['Drift'], Distance=row['Distance'], Azimuth=row['Azimuth'], Band=row['Band'], Version=row['Version'], Code=row['Code']) ) SentCount = SentCount+1 session.commit() remove('./download.zip') remove(filename[0]) logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount)