Schrifterkennung/Buchstaben_extrahieren2.py

import fitz  # PyMuPDF
import cv2
import numpy as np
from PIL import Image

def save_letter_images_from_pdf(pdf_path, output_folder):
    # Open the PDF file
    doc = fitz.open(pdf_path)

    for page_num in range(len(doc)):
        page = doc.load_page(page_num)

        # Get the text boxes
        text_boxes = page.get_text("dict")["blocks"]

        for block in text_boxes:
            if "lines" in block:  # Check if block contains lines
                for line in block["lines"]:
                    for span in line["spans"]:
                        # Extract the bounding box of each letter
                        rect = fitz.Rect(span["bbox"])

                        # Crop the page to the letter's bounding box
                        pix = page.get_pixmap(clip=rect)

                        # Convert to a numpy array
                        img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n)

                        # Convert color space from BGRA to grayscale
                        if pix.n == 4:
                            img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
                        else:
                            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                        # Resize to 28x28 pixels
                        img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)

                        # Save the image
                        img_pil = Image.fromarray(img_resized)
                        img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png")

    # Close the document
    doc.close()

# Example usage
pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf"
output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/"
save_letter_images_from_pdf(pdf_path, output_folder)