Schrifterkennung/Buchstaben_extrahieren2.py

49 lines
1.9 KiB
Python

import fitz # PyMuPDF
import cv2
import numpy as np
from PIL import Image
def save_letter_images_from_pdf(pdf_path, output_folder):
# Open the PDF file
doc = fitz.open(pdf_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
# Get the text boxes
text_boxes = page.get_text("dict")["blocks"]
for block in text_boxes:
if "lines" in block: # Check if block contains lines
for line in block["lines"]:
for span in line["spans"]:
# Extract the bounding box of each letter
rect = fitz.Rect(span["bbox"])
# Crop the page to the letter's bounding box
pix = page.get_pixmap(clip=rect)
# Convert to a numpy array
img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n)
# Convert color space from BGRA to grayscale
if pix.n == 4:
img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize to 28x28 pixels
img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
# Save the image
img_pil = Image.fromarray(img_resized)
img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png")
# Close the document
doc.close()
# Example usage
pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf"
output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/"
save_letter_images_from_pdf(pdf_path, output_folder)