Cleaned up and added comments. Added requirements file. Now checks if the database/tables exist and creates them if they don't.
This commit is contained in:
@@ -1,41 +0,0 @@
|
|||||||
create table Receive
|
|
||||||
(
|
|
||||||
SpotID int
|
|
||||||
constraint Receive_pk
|
|
||||||
primary key,
|
|
||||||
Timestamp TEXT,
|
|
||||||
Reporter TEXT,
|
|
||||||
reporterGrid TEXT,
|
|
||||||
SNR int,
|
|
||||||
Freq float,
|
|
||||||
CallSign TEXT,
|
|
||||||
Power int,
|
|
||||||
Grid TEXT,
|
|
||||||
Drift int,
|
|
||||||
Distance int,
|
|
||||||
Azimuth int,
|
|
||||||
Band int,
|
|
||||||
Version float,
|
|
||||||
Code int
|
|
||||||
);
|
|
||||||
|
|
||||||
create table Sent
|
|
||||||
(
|
|
||||||
SpotID int
|
|
||||||
constraint Receive_pk
|
|
||||||
primary key,
|
|
||||||
Timestamp TEXT,
|
|
||||||
Reporter TEXT,
|
|
||||||
reporterGrid TEXT,
|
|
||||||
SNR int,
|
|
||||||
Freq float,
|
|
||||||
CallSign TEXT,
|
|
||||||
Power int,
|
|
||||||
Grid TEXT,
|
|
||||||
Drift int,
|
|
||||||
Distance int,
|
|
||||||
Azimuth int,
|
|
||||||
Band int,
|
|
||||||
Version float,
|
|
||||||
Code int
|
|
||||||
);
|
|
||||||
59
main.py
59
main.py
@@ -8,19 +8,15 @@ from sqlalchemy.orm import sessionmaker
|
|||||||
from os import remove
|
from os import remove
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
|
|
||||||
|
|
||||||
callsign = 'KD8TUF'
|
callsign = 'KD8TUF'
|
||||||
linkArray = []
|
linkArray = []
|
||||||
testing = False
|
|
||||||
|
|
||||||
|
logging.basicConfig(filename='wspr.log', format='%(asctime)s %(message)s', level=logging.INFO)
|
||||||
set_option('display.max_rows', 3)
|
set_option('display.max_rows', 3)
|
||||||
set_option('display.max_columns', None)
|
set_option('display.max_columns', None)
|
||||||
set_option('display.width', None)
|
set_option('display.width', None)
|
||||||
set_option('display.max_colwidth', None)
|
set_option('display.max_colwidth', None)
|
||||||
|
|
||||||
engine = create_engine('sqlite:///wspr.db')
|
|
||||||
engine.connect()
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
@@ -64,46 +60,49 @@ class Sent(Base):
|
|||||||
Code = Column(Integer)
|
Code = Column(Integer)
|
||||||
|
|
||||||
|
|
||||||
|
engine = create_engine('sqlite:///wspr.db')
|
||||||
|
engine.connect()
|
||||||
|
Receive.__table__.create(bind=engine, checkfirst=True)
|
||||||
|
Sent.__table__.create(bind=engine, checkfirst=True)
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
session = Session()
|
session = Session()
|
||||||
|
|
||||||
# Original plan of scrapping olddb
|
|
||||||
# url = 'https://wsprnet.org/olddb'
|
|
||||||
# page = requests.get(url)
|
|
||||||
# soup = BeautifulSoup(page.content, 'html.parser')
|
|
||||||
# evenrow = soup.find_all(id='evenrow')
|
|
||||||
# for each in evenrow:
|
|
||||||
# optionvalue = each.find_all('option')
|
|
||||||
# for each in optionvalue:
|
|
||||||
# print(optionvalue[1])
|
|
||||||
# # print(evenrow)
|
|
||||||
|
|
||||||
if testing is False:
|
url = 'http://wsprnet.org/drupal/downloads'
|
||||||
url = 'http://wsprnet.org/drupal/downloads'
|
page = requests.get(url)
|
||||||
page = requests.get(url)
|
soup = BeautifulSoup(page.content, 'html.parser')
|
||||||
soup = BeautifulSoup(page.content, 'html.parser')
|
results = soup.find_all("div", {"class": "field-item even"})
|
||||||
results = soup.find_all("div", {"class": "field-item even"})
|
|
||||||
|
|
||||||
links = soup.find_all("a")
|
#Find all the links on the page. Loop through them and get the href tag. Then just grab the one 6 up from the bottom.
|
||||||
for link in links:
|
#This should be the most recent monthly zip archive.
|
||||||
|
links = soup.find_all("a")
|
||||||
|
for link in links:
|
||||||
linkArray.append(link.get('href'))
|
linkArray.append(link.get('href'))
|
||||||
ReportDownload = linkArray[-6:-5]
|
ReportDownload = linkArray[-6:-5]
|
||||||
|
|
||||||
download = requests.get(ReportDownload[0], stream=True)
|
#Download the latest zip and save it to the working folder.
|
||||||
with open('./download.zip', 'wb') as fd:
|
download = requests.get(ReportDownload[0], stream=True)
|
||||||
|
with open('./download.zip', 'wb') as fd:
|
||||||
for chunk in download.iter_content(chunk_size=128):
|
for chunk in download.iter_content(chunk_size=128):
|
||||||
fd.write(chunk)
|
fd.write(chunk)
|
||||||
print(ReportDownload[0])
|
# print(ReportDownload[0])
|
||||||
|
|
||||||
|
#unzip the file and grab the CSV filename
|
||||||
with zipfile.ZipFile('./download.zip', 'r') as zip:
|
with zipfile.ZipFile('./download.zip', 'r') as zip:
|
||||||
zip.extractall('./')
|
zip.extractall('./')
|
||||||
filename = zip.namelist()
|
filename = zip.namelist()
|
||||||
|
|
||||||
|
#Load the CSV into Pandas and add in the column names
|
||||||
DailyData = read_csv(filename[0])
|
DailyData = read_csv(filename[0])
|
||||||
DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
|
DailyData.columns = ['SpotID', 'Timestamp', 'Reporter', 'Reporter Grid', 'SNR', 'Freq', 'Call Sign', 'Grid', 'Power',
|
||||||
'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']
|
'Drift', 'Distance', 'Azimuth', 'Band', 'Version', 'Code']
|
||||||
|
|
||||||
|
#Filter the Dataframe to show only reports that I received
|
||||||
meSee = DailyData[DailyData['Reporter'] == callsign]
|
meSee = DailyData[DailyData['Reporter'] == callsign]
|
||||||
ReceiveCount = 0
|
ReceiveCount = 0
|
||||||
|
|
||||||
|
#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
|
||||||
|
#the Recieve table
|
||||||
for index, row in meSee.iterrows():
|
for index, row in meSee.iterrows():
|
||||||
if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
|
if session.query(Receive.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
|
||||||
pass
|
pass
|
||||||
@@ -127,8 +126,12 @@ for index, row in meSee.iterrows():
|
|||||||
)
|
)
|
||||||
ReceiveCount = ReceiveCount + 1
|
ReceiveCount = ReceiveCount + 1
|
||||||
|
|
||||||
|
#Filter the DataFrame to show only reports of stations that recieved me
|
||||||
saw = DailyData[DailyData['Call Sign'] == callsign]
|
saw = DailyData[DailyData['Call Sign'] == callsign]
|
||||||
SentCount = 0
|
SentCount = 0
|
||||||
|
|
||||||
|
#Iterate through each filtered row. Check if the SpotID already exists in the database. If it doesn't add it to the
|
||||||
|
#the Sent table
|
||||||
for index, row in saw.iterrows():
|
for index, row in saw.iterrows():
|
||||||
# print(row['SpotID'])
|
# print(row['SpotID'])
|
||||||
if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
|
if session.query(Sent.SpotID).filter_by(SpotID=row['SpotID']).first() is not None:
|
||||||
@@ -153,9 +156,9 @@ for index, row in saw.iterrows():
|
|||||||
)
|
)
|
||||||
SentCount = SentCount+1
|
SentCount = SentCount+1
|
||||||
|
|
||||||
|
#Commit databse. Cleanup old files and add new record count to log.
|
||||||
session.commit()
|
session.commit()
|
||||||
remove('./download.zip')
|
remove('./download.zip')
|
||||||
remove(filename[0])
|
remove(filename[0])
|
||||||
|
|
||||||
logging.info('Import Completed. Imported %i Received and %i Sent', ReceiveCount, SentCount)
|
logging.info('Downloaded %s. Imported %i Received and %i Sent', ReportDownload[0], ReceiveCount, SentCount)
|
||||||
|
|||||||
18
requirements
Normal file
18
requirements
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
beautifulsoup4==4.9.3
|
||||||
|
bs4==0.0.1
|
||||||
|
certifi==2020.12.5
|
||||||
|
chardet==4.0.0
|
||||||
|
greenlet==1.0.0
|
||||||
|
idna==2.10
|
||||||
|
importlib-metadata==3.10.0
|
||||||
|
numpy==1.20.2
|
||||||
|
pandas==1.2.3
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
pytz==2021.1
|
||||||
|
requests==2.25.1
|
||||||
|
six==1.15.0
|
||||||
|
soupsieve==2.2.1
|
||||||
|
SQLAlchemy==1.4.6
|
||||||
|
typing-extensions==3.7.4.3
|
||||||
|
urllib3==1.26.4
|
||||||
|
zipp==3.4.1
|
||||||
Reference in New Issue
Block a user