commit cc44202d20a90c9f6e8fea68592f60bf5c8c044d Author: Dan Dembinski Date: Wed Apr 15 00:50:29 2020 -0400 Initial commit. Two versions included. Main version accepts a hard coded qty break. Also included working mail option logic along with choice whether to include postage costs. Test version, was the origianl version, but the print qty location was moving too much for the PDF to parse easily. I might come back to that. The test version does not include logic for mailing diff --git a/main.py b/main.py new file mode 100644 index 0000000..72e19a4 --- /dev/null +++ b/main.py @@ -0,0 +1,110 @@ +from operator import itemgetter +from itertools import groupby +import fitz +import csv + +doc = fitz.open("std.pdf") +# doc = fitz.open("flyers.pdf") +# doc = fitz.open("poster.pdf") + +####### IMPORT MAILING RATE SECTION ######### +psFCrate = '.47' +psSCrate = '.25' +psFCcutoff = '500' + +count = doc.pageCount +currentPage = 0 +currentBreak = 1 + +qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] +costrow =['Base','','','(none)','\"0\"'] +mailrow = ['OptionalDataList','FormFilling','Equals','yes','\"0\"'] + +breaks = int(input("enter price break: ")) + +mailing = (input('Are you mailing: y/N: ')) +if mailing.lower() == 'y': + postage = (input('include postage: Y/n: ')) + if postage.lower() == 'y' or postage.lower() == '': + includePostage = True + print("Mailing") + print('First Class Postage Rate: '+psFCrate) + print('Standard Class Postage Rate: '+psSCrate) + print('Minimum Presort Standard Qty: '+psFCcutoff) + elif postage.lower() == 'n': + includePostage = False + else: + print('invalid selection') + exit() + checkMail = True +elif mailing.lower() == 'n' or mailing.lower() == "": + print("Not Mailing") + checkMail = False +else: + print("not a valid choice") + exit() + +while currentPage < count: + page = doc[currentPage] + + words = page.getTextWords() + +#to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. + Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) + Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) #compontent price not total price + Mailingrect = (555.25, 172.9172821044922, 584.5245971679688, 181.9665069580078) #component price for mailing component + +# Find the total job cost on each page + total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] + total.sort(key=itemgetter(3, 0)) + group = groupby(total, key=itemgetter(3)) + for y1, gwords in group: + totalcost = (" ".join(w[4] for w in gwords)) +# Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals + perpiece = round(float(totalcost.strip('$'))/currentBreak,4) + +# Add the per piece and quantity to the arrays + costrow.append('\"'+str(perpiece)+'\"') + qtyrow.append(str(currentBreak)+'+ Units') + if checkMail is True: + if currentPage < count: + currentPage = currentPage+1 + page = doc[currentPage] + words = page.getTextWords() + + mailCost = [w for w in words if fitz.Rect(w[:4]).intersects(Mailingrect)] + mailCost.sort(key=itemgetter(3, 0)) + group = groupby(mailCost, key=itemgetter(3)) + for y1, gwords in group: + totalMail = (" ".join(w[4] for w in gwords)) + mailPerPeice = round(float(totalMail.strip('$'))/currentBreak,4) + if includePostage is True: + if currentBreak < int(psFCcutoff): + mailPerPeice = mailPerPeice + float(psFCrate) + elif currentBreak >= int(psFCcutoff): + mailPerPeice = mailPerPeice + float(psSCrate) + mailrow.append('\"' + str(mailPerPeice) + '\"') + else: + exit() + if currentPage > 0: + currentBreak = currentBreak+breaks + else: + currentBreak = (currentBreak + breaks)-1 + currentPage = currentPage+1 + +# Add the weight sections after all the qty breaks +qtyrow.append('Unit Weight') +qtyrow.append('Tare Weight') +qtyrow.append('Max Weight') + +costrow.append('\"'+str(.5039)+'\"') +costrow.append('\"'+str(48)+'\"') +costrow.append('\"'+str(400)+'\"') + +# csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) +with open('pricetable.csv', 'w', newline='') as f: + writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") + writer.writerow(qtyrow) + writer.writerow(costrow) + if checkMail is True: + writer.writerow(mailrow) \ No newline at end of file diff --git a/required.txt b/required.txt new file mode 100644 index 0000000..01fb5f0 --- /dev/null +++ b/required.txt @@ -0,0 +1 @@ +PyMuPDF==1.16.17 diff --git a/test.py b/test.py new file mode 100644 index 0000000..a1d2289 --- /dev/null +++ b/test.py @@ -0,0 +1,76 @@ +#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started +### This version is also lacking the mailing price logic + +from operator import itemgetter +from itertools import groupby +import fitz +import csv + +# doc = fitz.open("std.pdf") +doc = fitz.open("flyers.pdf") + +count = doc.pageCount +currentPage = 0 + +qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] +costrow =['Base','','','(none)','\"0\"'] + +# mailing = (input('Are you mailing: y/N: ')) +# if mailing.lower() == 'y': +# print("Mailing") +# checkMail = True +# elif mailing.lower() == 'n' or mailing.lower() == "": +# print("Not Mailing") +# checkMail = False +# else: +# print("not a valid choice") +# exit() + +while currentPage < count: + page = doc[currentPage] + + words = page.getTextWords() + +#to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. + Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) + Totalrect = (559.9500122070312, 593.7672729492188, 584.7210083007812, 602.8164672851562) + # Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) + +# Find the print quantity on each page + qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] + qty.sort(key=itemgetter(3, 0)) + group = groupby(qty, key=itemgetter(3)) + for y1, gwords in group: + totalqty = (" ".join(w[4] for w in gwords)) +# Find the total job cost on each page + total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] + total.sort(key=itemgetter(3, 0)) + group = groupby(total, key=itemgetter(3)) + for y1, gwords in group: + totalcost = (" ".join(w[4] for w in gwords)) +# Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals + perpiece = round(float(totalcost.strip('$'))/int(totalqty.replace(',','')),4) + +# Add the per piece and quantity to the arrays + costrow.append('\"'+str(perpiece)+'\"') + qtyrow.append(totalqty.replace(',','')+'+ Units') + # if checkMail is True: + # print(words) + # Mailrect = ('') + + currentPage = currentPage+1 + +# Add the weight sections after all the qty breaks +qtyrow.append('Unit Weight') +qtyrow.append('Tare Weight') +qtyrow.append('Max Weight') + +costrow.append('\"'+str(.5039)+'\"') +costrow.append('\"'+str(48)+'\"') +costrow.append('\"'+str(400)+'\"') + +csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) +with open('pricetable.csv', 'w', newline='') as f: + writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") + writer.writerow(qtyrow) + writer.writerow(costrow) \ No newline at end of file