From 01b32052e3667880b5547d09142aaa0631224316 Mon Sep 17 00:00:00 2001 From: Dan Dembinski Date: Wed, 15 Apr 2020 22:44:16 -0400 Subject: [PATCH] Broke the test file out into it's own dev branch. Roughly merged the two files so there's some mailing logic present along with the print qty parsing logic. Mail logic is erroring out right now, but it's a start. --- main.py | 29 ++++++++++++++++------ test.py | 76 --------------------------------------------------------- 2 files changed, 22 insertions(+), 83 deletions(-) delete mode 100644 test.py diff --git a/main.py b/main.py index 8a4de3b..a0c792f 100644 --- a/main.py +++ b/main.py @@ -1,22 +1,25 @@ +#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started +### This version is also lacking the mailing price logic + from operator import itemgetter from itertools import groupby import fitz import csv - -doc = fitz.open("std.pdf") -# doc = fitz.open("flyers.pdf") -# doc = fitz.open("poster.pdf") - ####### IMPORT MAILING RATE SECTION ######### psFCrate = '.55' psSCrate = '.44' psFCcutoff = '200' + +# doc = fitz.open("std.pdf") +doc = fitz.open("flyers.pdf") + count = doc.pageCount currentPage = 0 currentBreak = 1 + qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] costrow =['Base','','','(none)','\"0\"'] mailrow = ['OptionalDataList','FormFilling','Equals','yes','\"0\"'] @@ -55,7 +58,12 @@ while currentPage < count: Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) #compontent price not total price Mailingrect = (555.25, 172.9172821044922, 584.5245971679688, 181.9665069580078) #component price for mailing component - +# Find the print quantity on each page + qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] + qty.sort(key=itemgetter(3, 0)) + group = groupby(qty, key=itemgetter(3)) + for y1, gwords in group: + totalqty = (" ".join(w[4] for w in gwords)) # Find the total job cost on each page total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] total.sort(key=itemgetter(3, 0)) @@ -96,6 +104,7 @@ while currentPage < count: currentBreak = currentBreak - 1 currentPage = currentPage+1 + # Add the weight sections after all the qty breaks qtyrow.append('Unit Weight') qtyrow.append('Tare Weight') @@ -116,4 +125,10 @@ with open('pricetable.csv', 'w', newline='') as f: writer.writerow(qtyrow) writer.writerow(costrow) if checkMail is True: - writer.writerow(mailrow) \ No newline at end of file + writer.writerow(mailrow) + + + + + + diff --git a/test.py b/test.py deleted file mode 100644 index a1d2289..0000000 --- a/test.py +++ /dev/null @@ -1,76 +0,0 @@ -#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started -### This version is also lacking the mailing price logic - -from operator import itemgetter -from itertools import groupby -import fitz -import csv - -# doc = fitz.open("std.pdf") -doc = fitz.open("flyers.pdf") - -count = doc.pageCount -currentPage = 0 - -qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] -costrow =['Base','','','(none)','\"0\"'] - -# mailing = (input('Are you mailing: y/N: ')) -# if mailing.lower() == 'y': -# print("Mailing") -# checkMail = True -# elif mailing.lower() == 'n' or mailing.lower() == "": -# print("Not Mailing") -# checkMail = False -# else: -# print("not a valid choice") -# exit() - -while currentPage < count: - page = doc[currentPage] - - words = page.getTextWords() - -#to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. - Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) - Totalrect = (559.9500122070312, 593.7672729492188, 584.7210083007812, 602.8164672851562) - # Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) - -# Find the print quantity on each page - qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] - qty.sort(key=itemgetter(3, 0)) - group = groupby(qty, key=itemgetter(3)) - for y1, gwords in group: - totalqty = (" ".join(w[4] for w in gwords)) -# Find the total job cost on each page - total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] - total.sort(key=itemgetter(3, 0)) - group = groupby(total, key=itemgetter(3)) - for y1, gwords in group: - totalcost = (" ".join(w[4] for w in gwords)) -# Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals - perpiece = round(float(totalcost.strip('$'))/int(totalqty.replace(',','')),4) - -# Add the per piece and quantity to the arrays - costrow.append('\"'+str(perpiece)+'\"') - qtyrow.append(totalqty.replace(',','')+'+ Units') - # if checkMail is True: - # print(words) - # Mailrect = ('') - - currentPage = currentPage+1 - -# Add the weight sections after all the qty breaks -qtyrow.append('Unit Weight') -qtyrow.append('Tare Weight') -qtyrow.append('Max Weight') - -costrow.append('\"'+str(.5039)+'\"') -costrow.append('\"'+str(48)+'\"') -costrow.append('\"'+str(400)+'\"') - -csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) -with open('pricetable.csv', 'w', newline='') as f: - writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") - writer.writerow(qtyrow) - writer.writerow(costrow) \ No newline at end of file