Initial (and hopefully last) commit

This commit is contained in:
mia 2026-03-12 10:37:32 +01:00
commit 06374b2608
8 changed files with 326 additions and 0 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
*.npy
*.odt
*.pdf
/out/
/.venv/

40
Buchstaben_extrahieren.py Normal file
View file

@ -0,0 +1,40 @@
import cv2
import numpy as np
import os
#User parameters
output_folder="letters"
image = cv2.imread("Blockbuchstaben.jpeg")
number_rows=13
number_cols=8
margin=15 #Rand der weggeschnitten wird bei den einzelnen Buchstaben
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
height, width, channels = image.shape
print(height)
print(width)
letter_width=width//number_cols
letter_height=height//number_rows
print(letter_width)
print(letter_height)
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for i in range(0,number_rows):
for j in range(0,number_cols):
x=j*letter_width
y=i*letter_height
letter_img=gray[y+margin:y+letter_height-margin,x+margin:x+letter_width-margin]
#if i==6 and j==8:
# cv2.imshow('letter_img', letter_img)
cv2.imwrite(output_folder+'/letter'+str(i)+str(j)+'.png', letter_img)
#cv2.imshow('BuchstabenRaster', gray)
cv2.waitKey(0)
cv2.destroyAllWindows()

View file

@ -0,0 +1,49 @@
import fitz # PyMuPDF
import cv2
import numpy as np
from PIL import Image
def save_letter_images_from_pdf(pdf_path, output_folder):
# Open the PDF file
doc = fitz.open(pdf_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
# Get the text boxes
text_boxes = page.get_text("dict")["blocks"]
for block in text_boxes:
if "lines" in block: # Check if block contains lines
for line in block["lines"]:
for span in line["spans"]:
# Extract the bounding box of each letter
rect = fitz.Rect(span["bbox"])
# Crop the page to the letter's bounding box
pix = page.get_pixmap(clip=rect)
# Convert to a numpy array
img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n)
# Convert color space from BGRA to grayscale
if pix.n == 4:
img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize to 28x28 pixels
img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
# Save the image
img_pil = Image.fromarray(img_resized)
img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png")
# Close the document
doc.close()
# Example usage
pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf"
output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/"
save_letter_images_from_pdf(pdf_path, output_folder)

113
app.py Normal file
View file

@ -0,0 +1,113 @@
import tensorflow as tf
from tensorflow.keras import layers, models, Sequential
import numpy as np
import matplotlib.pyplot as plt
import os
from PyQt6.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QPushButton, QLabel
from PyQt6.QtGui import QPainter, QPen, QImage
from PyQt6.QtCore import Qt, QPoint
import sys
data_dir = "/home/mia/Schule/KISY/schrifterkennung/" # Ignore full path, had some weird problem otherwise
model_file = "model.keras" # Model save file. No Idea if this is the correct extention but nobody cares, right?
print("We have done training already so we load this to not waste very precious cpu :)")
model = tf.keras.models.load_model(model_file)
#for images, labels in val_ds.take(10):
# preds = model.predict(images)
# print(f"Prediction: {class_names[np.argmax(preds[0])]}")
# print(f"Label: {class_names[labels[0].numpy().astype(int)]}")
# plt.imshow(images[0].numpy().squeeze(), cmap='gray')
# plt.title(f"Pred: {class_names[np.argmax(preds[0])]}")
# plt.show()
#### DISCLAIMER: This was written by AI; I hate GUI stuff ####
class DrawingCanvas(QWidget):
def __init__(self):
super().__init__()
self.setFixedSize(320, 320) # 10x the model input size for easier drawing
self.image = QImage(self.size(), QImage.Format.Format_Grayscale8)
self.image.fill(Qt.GlobalColor.white)
self.drawing = False
self.last_point = QPoint()
def paintEvent(self, event):
painter = QPainter(self)
painter.drawImage(0, 0, self.image)
def mousePressEvent(self, event):
if event.button() == Qt.MouseButton.LeftButton:
self.drawing = True
self.last_point = event.position().toPoint()
def mouseMoveEvent(self, event):
if (event.buttons() & Qt.MouseButton.LeftButton) and self.drawing:
painter = QPainter(self.image)
# We use a thick white pen because the model was trained on grayscale images
painter.setPen(QPen(Qt.GlobalColor.black, 18, Qt.PenStyle.SolidLine, Qt.PenCapStyle.RoundCap))
painter.drawLine(self.last_point, event.position().toPoint())
self.last_point = event.position().toPoint()
self.update()
def clear(self):
self.image.fill(Qt.GlobalColor.white)
self.update()
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Handwriting Recognition")
main_layout = QVBoxLayout()
self.canvas = DrawingCanvas()
self.result_label = QLabel("Draw something and click Predict. (Need to fill entire space or model has stroke")
self.result_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
predict_btn = QPushButton("Predict")
predict_btn.clicked.connect(self.predict_image)
clear_btn = QPushButton("Clear Canvas")
clear_btn.clicked.connect(self.canvas.clear)
main_layout.addWidget(self.canvas)
main_layout.addWidget(self.result_label)
main_layout.addWidget(predict_btn)
main_layout.addWidget(clear_btn)
container = QWidget()
container.setLayout(main_layout)
self.setCentralWidget(container)
def predict_image(self):
# 1. Resize the drawing to 32x32 to match the model input
scaled_img = self.canvas.image.scaled(32, 32, Qt.AspectRatioMode.IgnoreAspectRatio,
Qt.TransformationMode.SmoothTransformation)
# 2. Convert QImage to Numpy Array
ptr = scaled_img.bits()
ptr.setsize(32 * 32)
arr = np.frombuffer(ptr, np.uint8).reshape(32, 32, 1)
# 3. Add batch dimension and predict
# Note: We don't manually rescale by 1/255 here because your model has a Rescaling layer built-in!
img_batch = np.expand_dims(arr, axis=0)
prediction = model.predict(img_batch, verbose=0)
print(type(prediction), prediction)
class_names = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
print([x for x in zip(class_names, prediction[0])])
result = class_names[np.argmax(prediction)]
confidence = np.max(prediction) * 100
self.result_label.setText(f"Prediction: {result} ({confidence:.1f}%)")
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setStyle("Breeze") # Use system theme so it looks nice on linux
window = MainWindow()
window.show()
sys.exit(app.exec())

View file

@ -0,0 +1,29 @@
import os
from PIL import Image
import numpy as np
directory = "/home/mia/Schule/KISY/schrifterkennung/out"
files = os.listdir(f"{directory}/BigDataSet")
print(files)
labels = []
images = []
for letter in files:
dir = f"{directory}/BigDataSet/{letter}"
i = 0
os.makedirs(f"{directory}/Scaled/{letter}/", exist_ok=True)
for image in os.listdir(dir)[:]:
print(image)
img = Image.open(os.path.join(dir, image))
res = img.resize((32,32))
res = res.convert("L")
images.append(res)
labels.append(letter) # Label ist der Ordnername
res.save(f"{directory}/Scaled/{letter}/miakieler_{i}.png")
i += 1
X = np.array(images)
Y = np.array(labels)
np.save('images_big.npy', X)
np.save('labels_big.npy', Y)

72
make_model.py Normal file
View file

@ -0,0 +1,72 @@
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
# Daten einlesen
X = np.load('images_big.npy') # Eingabebilder
Y = np.load('labels_big.npy') # Labels (Buchstaben)
# Umwandlung der Labels in numerische Werte
unique_labels, Y_numeric = np.unique(Y, return_inverse=True)
# Vorverarbeitung der Daten
X = X.reshape(-1, 32, 32, 1) # Umwandlung in 4D-Array (N, R, C, K)
Y_categorical = to_categorical(Y_numeric) # One-Hot-Encoding der Labels
# Aufteilung der Daten in Trainings- und Testdatensätze
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y_categorical, test_size=0.1, random_state=42
)
# # Aufbau des Modells
# model = keras.Sequential([
# keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
# keras.layers.MaxPooling2D(pool_size=(2, 2)),
# keras.layers.Conv2D(64, (3, 3), activation='relu'),
# keras.layers.MaxPooling2D(pool_size=(2, 2)),
# keras.layers.Flatten(),
# keras.layers.Dense(128, activation='relu'),
# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen
# ])
model = keras.Sequential([
keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 1)),
keras.layers.BatchNormalization(),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Conv2D(128, (3, 3), activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(256, activation='relu'),
keras.layers.Dense(len(unique_labels), activation='softmax')
])
# model = keras.Sequential([
# keras.layers.Input(shape=(32, 32)), # images are 28x28
# keras.layers.Flatten(), # becomes 784
# keras.layers.Dense(64, activation='relu'),
# keras.layers.Dense(128, activation='relu'),
# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen
# ])
optimizer = Adam(learning_rate=0.001)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Kompilieren des Modells
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Modelldetails anzeigen
model.summary()
# Training des Modells
model.fit(X_train, Y_train, epochs=20, batch_size=128, validation_data=(X_test, Y_test), callbacks=[early_stopping])
# Bewertung des Modells
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print(f'Testgenauigkeit: {test_accuracy:.4f}')
model.save("model.keras")

BIN
model.keras Normal file

Binary file not shown.

18
shell.nix Normal file
View file

@ -0,0 +1,18 @@
let
pkgs = import <nixpkgs> {};
in pkgs.mkShell {
packages = [
(pkgs.python3.withPackages (python-pkgs: [
python-pkgs.pygame
python-pkgs.matplotlib
python-pkgs.sklearn-compat
python-pkgs.pandas
python-pkgs.opencv-python
python-pkgs.pytesseract
python-pkgs.tensorflow
python-pkgs.keras
python-pkgs.pyqt6
]))
];
}