import fitz # PyMuPDF import cv2 import numpy as np from PIL import Image def save_letter_images_from_pdf(pdf_path, output_folder): # Open the PDF file doc = fitz.open(pdf_path) for page_num in range(len(doc)): page = doc.load_page(page_num) # Get the text boxes text_boxes = page.get_text("dict")["blocks"] for block in text_boxes: if "lines" in block: # Check if block contains lines for line in block["lines"]: for span in line["spans"]: # Extract the bounding box of each letter rect = fitz.Rect(span["bbox"]) # Crop the page to the letter's bounding box pix = page.get_pixmap(clip=rect) # Convert to a numpy array img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n) # Convert color space from BGRA to grayscale if pix.n == 4: img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY) else: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Resize to 28x28 pixels img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA) # Save the image img_pil = Image.fromarray(img_resized) img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png") # Close the document doc.close() # Example usage pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf" output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/" save_letter_images_from_pdf(pdf_path, output_folder)