49 lines
1.9 KiB
Python
49 lines
1.9 KiB
Python
import fitz # PyMuPDF
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
def save_letter_images_from_pdf(pdf_path, output_folder):
|
|
# Open the PDF file
|
|
doc = fitz.open(pdf_path)
|
|
|
|
for page_num in range(len(doc)):
|
|
page = doc.load_page(page_num)
|
|
|
|
# Get the text boxes
|
|
text_boxes = page.get_text("dict")["blocks"]
|
|
|
|
for block in text_boxes:
|
|
if "lines" in block: # Check if block contains lines
|
|
for line in block["lines"]:
|
|
for span in line["spans"]:
|
|
# Extract the bounding box of each letter
|
|
rect = fitz.Rect(span["bbox"])
|
|
|
|
# Crop the page to the letter's bounding box
|
|
pix = page.get_pixmap(clip=rect)
|
|
|
|
# Convert to a numpy array
|
|
img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n)
|
|
|
|
# Convert color space from BGRA to grayscale
|
|
if pix.n == 4:
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
|
|
else:
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Resize to 28x28 pixels
|
|
img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
|
|
|
|
# Save the image
|
|
img_pil = Image.fromarray(img_resized)
|
|
img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png")
|
|
|
|
# Close the document
|
|
doc.close()
|
|
|
|
# Example usage
|
|
pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf"
|
|
output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/"
|
|
save_letter_images_from_pdf(pdf_path, output_folder)
|
|
|