from operator import itemgetter from itertools import groupby import fitz import csv doc = fitz.open("std.pdf") # doc = fitz.open("flyers.pdf") # doc = fitz.open("poster.pdf") ####### IMPORT MAILING RATE SECTION ######### psFCrate = '.47' psSCrate = '.25' psFCcutoff = '500' count = doc.pageCount currentPage = 0 currentBreak = 1 qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] costrow =['Base','','','(none)','\"0\"'] mailrow = ['OptionalDataList','FormFilling','Equals','yes','\"0\"'] breaks = int(input("enter price break: ")) mailing = (input('Are you mailing: y/N: ')) if mailing.lower() == 'y': postage = (input('include postage: Y/n: ')) if postage.lower() == 'y' or postage.lower() == '': includePostage = True print("Mailing") print('First Class Postage Rate: '+psFCrate) print('Standard Class Postage Rate: '+psSCrate) print('Minimum Presort Standard Qty: '+psFCcutoff) elif postage.lower() == 'n': includePostage = False else: print('invalid selection') exit() checkMail = True elif mailing.lower() == 'n' or mailing.lower() == "": print("Not Mailing") checkMail = False else: print("not a valid choice") exit() while currentPage < count: page = doc[currentPage] words = page.getTextWords() #to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) #compontent price not total price Mailingrect = (555.25, 172.9172821044922, 584.5245971679688, 181.9665069580078) #component price for mailing component # Find the total job cost on each page total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] total.sort(key=itemgetter(3, 0)) group = groupby(total, key=itemgetter(3)) for y1, gwords in group: totalcost = (" ".join(w[4] for w in gwords)) # Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals perpiece = round(float(totalcost.strip('$'))/currentBreak,4) # Add the per piece and quantity to the arrays costrow.append('\"'+str(perpiece)+'\"') qtyrow.append(str(currentBreak)+'+ Units') if checkMail is True: if currentPage < count: currentPage = currentPage+1 page = doc[currentPage] words = page.getTextWords() mailCost = [w for w in words if fitz.Rect(w[:4]).intersects(Mailingrect)] mailCost.sort(key=itemgetter(3, 0)) group = groupby(mailCost, key=itemgetter(3)) for y1, gwords in group: totalMail = (" ".join(w[4] for w in gwords)) mailPerPeice = round(float(totalMail.strip('$'))/currentBreak,4) if includePostage is True: if currentBreak < int(psFCcutoff): mailPerPeice = mailPerPeice + float(psFCrate) elif currentBreak >= int(psFCcutoff): mailPerPeice = mailPerPeice + float(psSCrate) mailrow.append('\"' + str(mailPerPeice) + '\"') else: exit() if currentPage > 0: currentBreak = currentBreak+breaks else: currentBreak = (currentBreak + breaks)-1 currentPage = currentPage+1 # Add the weight sections after all the qty breaks qtyrow.append('Unit Weight') qtyrow.append('Tare Weight') qtyrow.append('Max Weight') costrow.append('\"'+str(.5039)+'\"') costrow.append('\"'+str(48)+'\"') costrow.append('\"'+str(400)+'\"') # csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) with open('pricetable.csv', 'w', newline='') as f: writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") writer.writerow(qtyrow) writer.writerow(costrow) if checkMail is True: writer.writerow(mailrow)