#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started ### This version is also lacking the mailing price logic from operator import itemgetter from itertools import groupby import fitz import csv ####### IMPORT MAILING RATE SECTION ######### psFCrate = '.55' psSCrate = '.44' psFCcutoff = '200' # doc = fitz.open("std.pdf") doc = fitz.open("flyers.pdf") count = doc.pageCount currentPage = 0 currentBreak = 1 qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] costrow =['Base','','','(none)','\"0\"'] mailrow = ['OptionalDataList','FormFilling','Equals','yes','\"0\"'] breaks = int(input("enter price break: ")) mailing = (input('Are you mailing: y/N: ')) if mailing.lower() == 'y': postage = (input('include postage: Y/n: ')) if postage.lower() == 'y' or postage.lower() == '': includePostage = True print("Mailing") print('First Class Postage Rate: '+psFCrate) print('Standard Class Postage Rate: '+psSCrate) print('Minimum Presort Standard Qty: '+psFCcutoff) elif postage.lower() == 'n': includePostage = False else: print('invalid selection') exit() checkMail = True elif mailing.lower() == 'n' or mailing.lower() == "": print("Not Mailing") checkMail = False else: print("not a valid choice") exit() while currentPage < count: page = doc[currentPage] words = page.getTextWords() #to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) #compontent price not total price Mailingrect = (555.25, 172.9172821044922, 584.5245971679688, 181.9665069580078) #component price for mailing component # Find the print quantity on each page qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] qty.sort(key=itemgetter(3, 0)) group = groupby(qty, key=itemgetter(3)) for y1, gwords in group: totalqty = (" ".join(w[4] for w in gwords)) # Find the total job cost on each page total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] total.sort(key=itemgetter(3, 0)) group = groupby(total, key=itemgetter(3)) for y1, gwords in group: totalcost = (" ".join(w[4] for w in gwords)) # Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals perpiece = round(float(totalcost.strip('$'))/currentBreak,4) # Add the per piece and quantity to the arrays costrow.append('\"'+str(perpiece)+'\"') qtyrow.append(str(currentBreak)+'+ Units') if checkMail is True: if currentPage < count: currentPage = currentPage+1 page = doc[currentPage] words = page.getTextWords() mailCost = [w for w in words if fitz.Rect(w[:4]).intersects(Mailingrect)] mailCost.sort(key=itemgetter(3, 0)) group = groupby(mailCost, key=itemgetter(3)) for y1, gwords in group: totalMail = (" ".join(w[4] for w in gwords)) mailPerPeice = round(float(totalMail.strip('$'))/currentBreak,4) if includePostage is True: if currentBreak < int(psFCcutoff): mailPerPeice = round(mailPerPeice + float(psFCrate),4) elif currentBreak >= int(psFCcutoff): mailPerPeice = round(mailPerPeice + float(psSCrate),4) mailrow.append('\"' + str(mailPerPeice) + '\"') else: exit() if currentPage > 0: currentBreak = currentBreak+breaks else: currentBreak = (currentBreak + breaks)-1 if checkMail is True and currentPage == 1: currentBreak = currentBreak - 1 currentPage = currentPage+1 # Add the weight sections after all the qty breaks qtyrow.append('Unit Weight') qtyrow.append('Tare Weight') qtyrow.append('Max Weight') costrow.append('\"'+str(.5039)+'\"') costrow.append('\"'+str(48)+'\"') costrow.append('\"'+str(400)+'\"') if checkMail is True: mailrow.append('\"0\"') mailrow.append('\"0\"') mailrow.append('\"0\"') # csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) with open('pricetable.csv', 'w', newline='') as f: writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") writer.writerow(qtyrow) writer.writerow(costrow) if checkMail is True: writer.writerow(mailrow)