#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started ### This version is also lacking the mailing price logic from operator import itemgetter from itertools import groupby import fitz import csv # doc = fitz.open("std.pdf") doc = fitz.open("flyers.pdf") count = doc.pageCount currentPage = 0 qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] costrow =['Base','','','(none)','\"0\"'] # mailing = (input('Are you mailing: y/N: ')) # if mailing.lower() == 'y': # print("Mailing") # checkMail = True # elif mailing.lower() == 'n' or mailing.lower() == "": # print("Not Mailing") # checkMail = False # else: # print("not a valid choice") # exit() while currentPage < count: page = doc[currentPage] words = page.getTextWords() #to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) Totalrect = (559.9500122070312, 593.7672729492188, 584.7210083007812, 602.8164672851562) # Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) # Find the print quantity on each page qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] qty.sort(key=itemgetter(3, 0)) group = groupby(qty, key=itemgetter(3)) for y1, gwords in group: totalqty = (" ".join(w[4] for w in gwords)) # Find the total job cost on each page total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] total.sort(key=itemgetter(3, 0)) group = groupby(total, key=itemgetter(3)) for y1, gwords in group: totalcost = (" ".join(w[4] for w in gwords)) # Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals perpiece = round(float(totalcost.strip('$'))/int(totalqty.replace(',','')),4) # Add the per piece and quantity to the arrays costrow.append('\"'+str(perpiece)+'\"') qtyrow.append(totalqty.replace(',','')+'+ Units') # if checkMail is True: # print(words) # Mailrect = ('') currentPage = currentPage+1 # Add the weight sections after all the qty breaks qtyrow.append('Unit Weight') qtyrow.append('Tare Weight') qtyrow.append('Max Weight') costrow.append('\"'+str(.5039)+'\"') costrow.append('\"'+str(48)+'\"') costrow.append('\"'+str(400)+'\"') csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) with open('pricetable.csv', 'w', newline='') as f: writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") writer.writerow(qtyrow) writer.writerow(costrow)