Initial commit. Two versions included. Main version accepts a hard coded qty break. Also included working mail option logic along with choice whether to include postage costs. Test version, was the origianl version, but the print qty location was moving too much for the PDF to parse easily. I might come back to that. The test version does not include logic for mailing

This commit is contained in:
2020-04-15 00:50:29 -04:00
commit cc44202d20
3 changed files with 187 additions and 0 deletions

76
test.py Normal file
View File

@@ -0,0 +1,76 @@
#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started
### This version is also lacking the mailing price logic
from operator import itemgetter
from itertools import groupby
import fitz
import csv
# doc = fitz.open("std.pdf")
doc = fitz.open("flyers.pdf")
count = doc.pageCount
currentPage = 0
qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs']
costrow =['Base','','','(none)','\"0\"']
# mailing = (input('Are you mailing: y/N: '))
# if mailing.lower() == 'y':
# print("Mailing")
# checkMail = True
# elif mailing.lower() == 'n' or mailing.lower() == "":
# print("Not Mailing")
# checkMail = False
# else:
# print("not a valid choice")
# exit()
while currentPage < count:
page = doc[currentPage]
words = page.getTextWords()
#to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right.
Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125)
Totalrect = (559.9500122070312, 593.7672729492188, 584.7210083007812, 602.8164672851562)
# Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875)
# Find the print quantity on each page
qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)]
qty.sort(key=itemgetter(3, 0))
group = groupby(qty, key=itemgetter(3))
for y1, gwords in group:
totalqty = (" ".join(w[4] for w in gwords))
# Find the total job cost on each page
total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)]
total.sort(key=itemgetter(3, 0))
group = groupby(total, key=itemgetter(3))
for y1, gwords in group:
totalcost = (" ".join(w[4] for w in gwords))
# Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals
perpiece = round(float(totalcost.strip('$'))/int(totalqty.replace(',','')),4)
# Add the per piece and quantity to the arrays
costrow.append('\"'+str(perpiece)+'\"')
qtyrow.append(totalqty.replace(',','')+'+ Units')
# if checkMail is True:
# print(words)
# Mailrect = ('')
currentPage = currentPage+1
# Add the weight sections after all the qty breaks
qtyrow.append('Unit Weight')
qtyrow.append('Tare Weight')
qtyrow.append('Max Weight')
costrow.append('\"'+str(.5039)+'\"')
costrow.append('\"'+str(48)+'\"')
costrow.append('\"'+str(400)+'\"')
csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE)
with open('pricetable.csv', 'w', newline='') as f:
writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="")
writer.writerow(qtyrow)
writer.writerow(costrow)