diff --git a/main.py b/main.py index 8a4de3b..a0c792f 100644 --- a/main.py +++ b/main.py @@ -1,22 +1,25 @@ +#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started +### This version is also lacking the mailing price logic + from operator import itemgetter from itertools import groupby import fitz import csv - -doc = fitz.open("std.pdf") -# doc = fitz.open("flyers.pdf") -# doc = fitz.open("poster.pdf") - ####### IMPORT MAILING RATE SECTION ######### psFCrate = '.55' psSCrate = '.44' psFCcutoff = '200' + +# doc = fitz.open("std.pdf") +doc = fitz.open("flyers.pdf") + count = doc.pageCount currentPage = 0 currentBreak = 1 + qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] costrow =['Base','','','(none)','\"0\"'] mailrow = ['OptionalDataList','FormFilling','Equals','yes','\"0\"'] @@ -55,7 +58,12 @@ while currentPage < count: Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) #compontent price not total price Mailingrect = (555.25, 172.9172821044922, 584.5245971679688, 181.9665069580078) #component price for mailing component - +# Find the print quantity on each page + qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] + qty.sort(key=itemgetter(3, 0)) + group = groupby(qty, key=itemgetter(3)) + for y1, gwords in group: + totalqty = (" ".join(w[4] for w in gwords)) # Find the total job cost on each page total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] total.sort(key=itemgetter(3, 0)) @@ -96,6 +104,7 @@ while currentPage < count: currentBreak = currentBreak - 1 currentPage = currentPage+1 + # Add the weight sections after all the qty breaks qtyrow.append('Unit Weight') qtyrow.append('Tare Weight') @@ -116,4 +125,10 @@ with open('pricetable.csv', 'w', newline='') as f: writer.writerow(qtyrow) writer.writerow(costrow) if checkMail is True: - writer.writerow(mailrow) \ No newline at end of file + writer.writerow(mailrow) + + + + + + diff --git a/test.py b/test.py deleted file mode 100644 index a1d2289..0000000 --- a/test.py +++ /dev/null @@ -1,76 +0,0 @@ -#### Test version attempts to parse both the total cost and print quantity from the PDFs. There were some issues with the print qty location moving. So, I went with the manually entered qty break option to get started -### This version is also lacking the mailing price logic - -from operator import itemgetter -from itertools import groupby -import fitz -import csv - -# doc = fitz.open("std.pdf") -doc = fitz.open("flyers.pdf") - -count = doc.pageCount -currentPage = 0 - -qtyrow = ['Price Input', 'Type', 'Comparisons', 'Options', 'Fixed Costs'] -costrow =['Base','','','(none)','\"0\"'] - -# mailing = (input('Are you mailing: y/N: ')) -# if mailing.lower() == 'y': -# print("Mailing") -# checkMail = True -# elif mailing.lower() == 'n' or mailing.lower() == "": -# print("Not Mailing") -# checkMail = False -# else: -# print("not a valid choice") -# exit() - -while currentPage < count: - page = doc[currentPage] - - words = page.getTextWords() - -#to find new rects print out he words array, copy output to editor and search for the value you're looking for. If there're multiple, guestimate which is right. - Qtyrect = (122.00001525878906, 328.7388916015625, 135.0115966796875, 337.45294189453125) - Totalrect = (559.9500122070312, 593.7672729492188, 584.7210083007812, 602.8164672851562) - # Totalrect = (559.75, 551.71728515625, 584.52099609375, 560.7664794921875) - -# Find the print quantity on each page - qty = [w for w in words if fitz.Rect(w[:4]).intersects(Qtyrect)] - qty.sort(key=itemgetter(3, 0)) - group = groupby(qty, key=itemgetter(3)) - for y1, gwords in group: - totalqty = (" ".join(w[4] for w in gwords)) -# Find the total job cost on each page - total = [w for w in words if fitz.Rect(w[:4]).intersects(Totalrect)] - total.sort(key=itemgetter(3, 0)) - group = groupby(total, key=itemgetter(3)) - for y1, gwords in group: - totalcost = (" ".join(w[4] for w in gwords)) -# Remove $ from cost and , from quantity. Divide total cost by print quantity to find per piece price and round 4 decimals - perpiece = round(float(totalcost.strip('$'))/int(totalqty.replace(',','')),4) - -# Add the per piece and quantity to the arrays - costrow.append('\"'+str(perpiece)+'\"') - qtyrow.append(totalqty.replace(',','')+'+ Units') - # if checkMail is True: - # print(words) - # Mailrect = ('') - - currentPage = currentPage+1 - -# Add the weight sections after all the qty breaks -qtyrow.append('Unit Weight') -qtyrow.append('Tare Weight') -qtyrow.append('Max Weight') - -costrow.append('\"'+str(.5039)+'\"') -costrow.append('\"'+str(48)+'\"') -costrow.append('\"'+str(400)+'\"') - -csv.register_dialect('unixpwd', delimiter=',', quoting=csv.QUOTE_NONE) -with open('pricetable.csv', 'w', newline='') as f: - writer = csv.writer(f, quoting=csv.QUOTE_NONE, quotechar="") - writer.writerow(qtyrow) - writer.writerow(costrow) \ No newline at end of file