commit 06374b2608e9ac9da145ff27eafc7bda5aec5b70 Author: mia Date: Thu Mar 12 10:37:32 2026 +0100 Initial (and hopefully last) commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..25d69d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.npy +*.odt +*.pdf +/out/ +/.venv/ diff --git a/Buchstaben_extrahieren.py b/Buchstaben_extrahieren.py new file mode 100644 index 0000000..28ba3d3 --- /dev/null +++ b/Buchstaben_extrahieren.py @@ -0,0 +1,40 @@ +import cv2 +import numpy as np +import os + +#User parameters +output_folder="letters" +image = cv2.imread("Blockbuchstaben.jpeg") +number_rows=13 +number_cols=8 +margin=15 #Rand der weggeschnitten wird bei den einzelnen Buchstaben + +gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + +height, width, channels = image.shape +print(height) +print(width) + +letter_width=width//number_cols +letter_height=height//number_rows + +print(letter_width) +print(letter_height) + +if not os.path.exists(output_folder): + os.makedirs(output_folder) + +for i in range(0,number_rows): + for j in range(0,number_cols): + x=j*letter_width + y=i*letter_height + + letter_img=gray[y+margin:y+letter_height-margin,x+margin:x+letter_width-margin] + #if i==6 and j==8: + # cv2.imshow('letter_img', letter_img) + cv2.imwrite(output_folder+'/letter'+str(i)+str(j)+'.png', letter_img) + + +#cv2.imshow('BuchstabenRaster', gray) +cv2.waitKey(0) +cv2.destroyAllWindows() \ No newline at end of file diff --git a/Buchstaben_extrahieren2.py b/Buchstaben_extrahieren2.py new file mode 100644 index 0000000..7d9c755 --- /dev/null +++ b/Buchstaben_extrahieren2.py @@ -0,0 +1,49 @@ +import fitz # PyMuPDF +import cv2 +import numpy as np +from PIL import Image + +def save_letter_images_from_pdf(pdf_path, output_folder): + # Open the PDF file + doc = fitz.open(pdf_path) + + for page_num in range(len(doc)): + page = doc.load_page(page_num) + + # Get the text boxes + text_boxes = page.get_text("dict")["blocks"] + + for block in text_boxes: + if "lines" in block: # Check if block contains lines + for line in block["lines"]: + for span in line["spans"]: + # Extract the bounding box of each letter + rect = fitz.Rect(span["bbox"]) + + # Crop the page to the letter's bounding box + pix = page.get_pixmap(clip=rect) + + # Convert to a numpy array + img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n) + + # Convert color space from BGRA to grayscale + if pix.n == 4: + img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY) + else: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Resize to 28x28 pixels + img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA) + + # Save the image + img_pil = Image.fromarray(img_resized) + img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png") + + # Close the document + doc.close() + +# Example usage +pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf" +output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/" +save_letter_images_from_pdf(pdf_path, output_folder) + diff --git a/app.py b/app.py new file mode 100644 index 0000000..256ed73 --- /dev/null +++ b/app.py @@ -0,0 +1,113 @@ +import tensorflow as tf +from tensorflow.keras import layers, models, Sequential +import numpy as np +import matplotlib.pyplot as plt +import os +from PyQt6.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QPushButton, QLabel +from PyQt6.QtGui import QPainter, QPen, QImage +from PyQt6.QtCore import Qt, QPoint +import sys + +data_dir = "/home/mia/Schule/KISY/schrifterkennung/" # Ignore full path, had some weird problem otherwise + +model_file = "model.keras" # Model save file. No Idea if this is the correct extention but nobody cares, right? + +print("We have done training already so we load this to not waste very precious cpu :)") +model = tf.keras.models.load_model(model_file) + +#for images, labels in val_ds.take(10): +# preds = model.predict(images) +# print(f"Prediction: {class_names[np.argmax(preds[0])]}") +# print(f"Label: {class_names[labels[0].numpy().astype(int)]}") +# plt.imshow(images[0].numpy().squeeze(), cmap='gray') +# plt.title(f"Pred: {class_names[np.argmax(preds[0])]}") +# plt.show() + +#### DISCLAIMER: This was written by AI; I hate GUI stuff #### + +class DrawingCanvas(QWidget): + def __init__(self): + super().__init__() + self.setFixedSize(320, 320) # 10x the model input size for easier drawing + self.image = QImage(self.size(), QImage.Format.Format_Grayscale8) + self.image.fill(Qt.GlobalColor.white) + self.drawing = False + self.last_point = QPoint() + + def paintEvent(self, event): + painter = QPainter(self) + painter.drawImage(0, 0, self.image) + + def mousePressEvent(self, event): + if event.button() == Qt.MouseButton.LeftButton: + self.drawing = True + self.last_point = event.position().toPoint() + + def mouseMoveEvent(self, event): + if (event.buttons() & Qt.MouseButton.LeftButton) and self.drawing: + painter = QPainter(self.image) + # We use a thick white pen because the model was trained on grayscale images + painter.setPen(QPen(Qt.GlobalColor.black, 18, Qt.PenStyle.SolidLine, Qt.PenCapStyle.RoundCap)) + painter.drawLine(self.last_point, event.position().toPoint()) + self.last_point = event.position().toPoint() + self.update() + + def clear(self): + self.image.fill(Qt.GlobalColor.white) + self.update() + + +class MainWindow(QMainWindow): + def __init__(self): + super().__init__() + self.setWindowTitle("Handwriting Recognition") + + main_layout = QVBoxLayout() + self.canvas = DrawingCanvas() + self.result_label = QLabel("Draw something and click Predict. (Need to fill entire space or model has stroke") + self.result_label.setAlignment(Qt.AlignmentFlag.AlignCenter) + + predict_btn = QPushButton("Predict") + predict_btn.clicked.connect(self.predict_image) + + clear_btn = QPushButton("Clear Canvas") + clear_btn.clicked.connect(self.canvas.clear) + + main_layout.addWidget(self.canvas) + main_layout.addWidget(self.result_label) + main_layout.addWidget(predict_btn) + main_layout.addWidget(clear_btn) + + container = QWidget() + container.setLayout(main_layout) + self.setCentralWidget(container) + + def predict_image(self): + # 1. Resize the drawing to 32x32 to match the model input + scaled_img = self.canvas.image.scaled(32, 32, Qt.AspectRatioMode.IgnoreAspectRatio, + Qt.TransformationMode.SmoothTransformation) + + # 2. Convert QImage to Numpy Array + ptr = scaled_img.bits() + ptr.setsize(32 * 32) + arr = np.frombuffer(ptr, np.uint8).reshape(32, 32, 1) + + # 3. Add batch dimension and predict + # Note: We don't manually rescale by 1/255 here because your model has a Rescaling layer built-in! + img_batch = np.expand_dims(arr, axis=0) + prediction = model.predict(img_batch, verbose=0) + print(type(prediction), prediction) + + class_names = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') + print([x for x in zip(class_names, prediction[0])]) + result = class_names[np.argmax(prediction)] + confidence = np.max(prediction) * 100 + self.result_label.setText(f"Prediction: {result} ({confidence:.1f}%)") + + +if __name__ == "__main__": + app = QApplication(sys.argv) + app.setStyle("Breeze") # Use system theme so it looks nice on linux + window = MainWindow() + window.show() + sys.exit(app.exec()) diff --git a/convert_from_big_dataset.py b/convert_from_big_dataset.py new file mode 100644 index 0000000..64733a8 --- /dev/null +++ b/convert_from_big_dataset.py @@ -0,0 +1,29 @@ +import os +from PIL import Image +import numpy as np + +directory = "/home/mia/Schule/KISY/schrifterkennung/out" + +files = os.listdir(f"{directory}/BigDataSet") +print(files) +labels = [] +images = [] +for letter in files: + dir = f"{directory}/BigDataSet/{letter}" + i = 0 + os.makedirs(f"{directory}/Scaled/{letter}/", exist_ok=True) + for image in os.listdir(dir)[:]: + print(image) + img = Image.open(os.path.join(dir, image)) + res = img.resize((32,32)) + res = res.convert("L") + images.append(res) + labels.append(letter) # Label ist der Ordnername + res.save(f"{directory}/Scaled/{letter}/miakieler_{i}.png") + i += 1 + +X = np.array(images) +Y = np.array(labels) + +np.save('images_big.npy', X) +np.save('labels_big.npy', Y) diff --git a/make_model.py b/make_model.py new file mode 100644 index 0000000..7876aed --- /dev/null +++ b/make_model.py @@ -0,0 +1,72 @@ +import numpy as np +import tensorflow as tf +from tensorflow import keras +from sklearn.model_selection import train_test_split +from tensorflow.keras.utils import to_categorical +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.callbacks import EarlyStopping + +# Daten einlesen +X = np.load('images_big.npy') # Eingabebilder +Y = np.load('labels_big.npy') # Labels (Buchstaben) + +# Umwandlung der Labels in numerische Werte +unique_labels, Y_numeric = np.unique(Y, return_inverse=True) + +# Vorverarbeitung der Daten +X = X.reshape(-1, 32, 32, 1) # Umwandlung in 4D-Array (N, R, C, K) +Y_categorical = to_categorical(Y_numeric) # One-Hot-Encoding der Labels + +# Aufteilung der Daten in Trainings- und Testdatensätze +X_train, X_test, Y_train, Y_test = train_test_split( + X, Y_categorical, test_size=0.1, random_state=42 +) + +# # Aufbau des Modells +# model = keras.Sequential([ +# keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)), +# keras.layers.MaxPooling2D(pool_size=(2, 2)), +# keras.layers.Conv2D(64, (3, 3), activation='relu'), +# keras.layers.MaxPooling2D(pool_size=(2, 2)), +# keras.layers.Flatten(), +# keras.layers.Dense(128, activation='relu'), +# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen +# ]) + +model = keras.Sequential([ + keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 1)), + keras.layers.BatchNormalization(), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Conv2D(128, (3, 3), activation='relu'), + keras.layers.BatchNormalization(), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(256, activation='relu'), + keras.layers.Dense(len(unique_labels), activation='softmax') +]) + +# model = keras.Sequential([ +# keras.layers.Input(shape=(32, 32)), # images are 28x28 +# keras.layers.Flatten(), # becomes 784 +# keras.layers.Dense(64, activation='relu'), +# keras.layers.Dense(128, activation='relu'), +# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen +# ]) + +optimizer = Adam(learning_rate=0.001) +early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) +# Kompilieren des Modells +model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) +# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) + +# Modelldetails anzeigen +model.summary() + +# Training des Modells +model.fit(X_train, Y_train, epochs=20, batch_size=128, validation_data=(X_test, Y_test), callbacks=[early_stopping]) + +# Bewertung des Modells +test_loss, test_accuracy = model.evaluate(X_test, Y_test) +print(f'Testgenauigkeit: {test_accuracy:.4f}') +model.save("model.keras") + diff --git a/model.keras b/model.keras new file mode 100644 index 0000000..fc245a6 Binary files /dev/null and b/model.keras differ diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..0de5b83 --- /dev/null +++ b/shell.nix @@ -0,0 +1,18 @@ +let + pkgs = import {}; +in pkgs.mkShell { + packages = [ + (pkgs.python3.withPackages (python-pkgs: [ + python-pkgs.pygame + python-pkgs.matplotlib + python-pkgs.sklearn-compat + python-pkgs.pandas + python-pkgs.opencv-python + python-pkgs.pytesseract + python-pkgs.tensorflow + python-pkgs.keras + python-pkgs.pyqt6 + ])) + ]; +} +