Initial (and hopefully last) commit
This commit is contained in:
commit
06374b2608
8 changed files with 326 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
*.npy
|
||||||
|
*.odt
|
||||||
|
*.pdf
|
||||||
|
/out/
|
||||||
|
/.venv/
|
||||||
40
Buchstaben_extrahieren.py
Normal file
40
Buchstaben_extrahieren.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
|
#User parameters
|
||||||
|
output_folder="letters"
|
||||||
|
image = cv2.imread("Blockbuchstaben.jpeg")
|
||||||
|
number_rows=13
|
||||||
|
number_cols=8
|
||||||
|
margin=15 #Rand der weggeschnitten wird bei den einzelnen Buchstaben
|
||||||
|
|
||||||
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
height, width, channels = image.shape
|
||||||
|
print(height)
|
||||||
|
print(width)
|
||||||
|
|
||||||
|
letter_width=width//number_cols
|
||||||
|
letter_height=height//number_rows
|
||||||
|
|
||||||
|
print(letter_width)
|
||||||
|
print(letter_height)
|
||||||
|
|
||||||
|
if not os.path.exists(output_folder):
|
||||||
|
os.makedirs(output_folder)
|
||||||
|
|
||||||
|
for i in range(0,number_rows):
|
||||||
|
for j in range(0,number_cols):
|
||||||
|
x=j*letter_width
|
||||||
|
y=i*letter_height
|
||||||
|
|
||||||
|
letter_img=gray[y+margin:y+letter_height-margin,x+margin:x+letter_width-margin]
|
||||||
|
#if i==6 and j==8:
|
||||||
|
# cv2.imshow('letter_img', letter_img)
|
||||||
|
cv2.imwrite(output_folder+'/letter'+str(i)+str(j)+'.png', letter_img)
|
||||||
|
|
||||||
|
|
||||||
|
#cv2.imshow('BuchstabenRaster', gray)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
49
Buchstaben_extrahieren2.py
Normal file
49
Buchstaben_extrahieren2.py
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
def save_letter_images_from_pdf(pdf_path, output_folder):
|
||||||
|
# Open the PDF file
|
||||||
|
doc = fitz.open(pdf_path)
|
||||||
|
|
||||||
|
for page_num in range(len(doc)):
|
||||||
|
page = doc.load_page(page_num)
|
||||||
|
|
||||||
|
# Get the text boxes
|
||||||
|
text_boxes = page.get_text("dict")["blocks"]
|
||||||
|
|
||||||
|
for block in text_boxes:
|
||||||
|
if "lines" in block: # Check if block contains lines
|
||||||
|
for line in block["lines"]:
|
||||||
|
for span in line["spans"]:
|
||||||
|
# Extract the bounding box of each letter
|
||||||
|
rect = fitz.Rect(span["bbox"])
|
||||||
|
|
||||||
|
# Crop the page to the letter's bounding box
|
||||||
|
pix = page.get_pixmap(clip=rect)
|
||||||
|
|
||||||
|
# Convert to a numpy array
|
||||||
|
img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n)
|
||||||
|
|
||||||
|
# Convert color space from BGRA to grayscale
|
||||||
|
if pix.n == 4:
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
|
||||||
|
else:
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Resize to 28x28 pixels
|
||||||
|
img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
|
||||||
|
|
||||||
|
# Save the image
|
||||||
|
img_pil = Image.fromarray(img_resized)
|
||||||
|
img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png")
|
||||||
|
|
||||||
|
# Close the document
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf"
|
||||||
|
output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/"
|
||||||
|
save_letter_images_from_pdf(pdf_path, output_folder)
|
||||||
|
|
||||||
113
app.py
Normal file
113
app.py
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras import layers, models, Sequential
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import os
|
||||||
|
from PyQt6.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QPushButton, QLabel
|
||||||
|
from PyQt6.QtGui import QPainter, QPen, QImage
|
||||||
|
from PyQt6.QtCore import Qt, QPoint
|
||||||
|
import sys
|
||||||
|
|
||||||
|
data_dir = "/home/mia/Schule/KISY/schrifterkennung/" # Ignore full path, had some weird problem otherwise
|
||||||
|
|
||||||
|
model_file = "model.keras" # Model save file. No Idea if this is the correct extention but nobody cares, right?
|
||||||
|
|
||||||
|
print("We have done training already so we load this to not waste very precious cpu :)")
|
||||||
|
model = tf.keras.models.load_model(model_file)
|
||||||
|
|
||||||
|
#for images, labels in val_ds.take(10):
|
||||||
|
# preds = model.predict(images)
|
||||||
|
# print(f"Prediction: {class_names[np.argmax(preds[0])]}")
|
||||||
|
# print(f"Label: {class_names[labels[0].numpy().astype(int)]}")
|
||||||
|
# plt.imshow(images[0].numpy().squeeze(), cmap='gray')
|
||||||
|
# plt.title(f"Pred: {class_names[np.argmax(preds[0])]}")
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
#### DISCLAIMER: This was written by AI; I hate GUI stuff ####
|
||||||
|
|
||||||
|
class DrawingCanvas(QWidget):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.setFixedSize(320, 320) # 10x the model input size for easier drawing
|
||||||
|
self.image = QImage(self.size(), QImage.Format.Format_Grayscale8)
|
||||||
|
self.image.fill(Qt.GlobalColor.white)
|
||||||
|
self.drawing = False
|
||||||
|
self.last_point = QPoint()
|
||||||
|
|
||||||
|
def paintEvent(self, event):
|
||||||
|
painter = QPainter(self)
|
||||||
|
painter.drawImage(0, 0, self.image)
|
||||||
|
|
||||||
|
def mousePressEvent(self, event):
|
||||||
|
if event.button() == Qt.MouseButton.LeftButton:
|
||||||
|
self.drawing = True
|
||||||
|
self.last_point = event.position().toPoint()
|
||||||
|
|
||||||
|
def mouseMoveEvent(self, event):
|
||||||
|
if (event.buttons() & Qt.MouseButton.LeftButton) and self.drawing:
|
||||||
|
painter = QPainter(self.image)
|
||||||
|
# We use a thick white pen because the model was trained on grayscale images
|
||||||
|
painter.setPen(QPen(Qt.GlobalColor.black, 18, Qt.PenStyle.SolidLine, Qt.PenCapStyle.RoundCap))
|
||||||
|
painter.drawLine(self.last_point, event.position().toPoint())
|
||||||
|
self.last_point = event.position().toPoint()
|
||||||
|
self.update()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.image.fill(Qt.GlobalColor.white)
|
||||||
|
self.update()
|
||||||
|
|
||||||
|
|
||||||
|
class MainWindow(QMainWindow):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.setWindowTitle("Handwriting Recognition")
|
||||||
|
|
||||||
|
main_layout = QVBoxLayout()
|
||||||
|
self.canvas = DrawingCanvas()
|
||||||
|
self.result_label = QLabel("Draw something and click Predict. (Need to fill entire space or model has stroke")
|
||||||
|
self.result_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
||||||
|
|
||||||
|
predict_btn = QPushButton("Predict")
|
||||||
|
predict_btn.clicked.connect(self.predict_image)
|
||||||
|
|
||||||
|
clear_btn = QPushButton("Clear Canvas")
|
||||||
|
clear_btn.clicked.connect(self.canvas.clear)
|
||||||
|
|
||||||
|
main_layout.addWidget(self.canvas)
|
||||||
|
main_layout.addWidget(self.result_label)
|
||||||
|
main_layout.addWidget(predict_btn)
|
||||||
|
main_layout.addWidget(clear_btn)
|
||||||
|
|
||||||
|
container = QWidget()
|
||||||
|
container.setLayout(main_layout)
|
||||||
|
self.setCentralWidget(container)
|
||||||
|
|
||||||
|
def predict_image(self):
|
||||||
|
# 1. Resize the drawing to 32x32 to match the model input
|
||||||
|
scaled_img = self.canvas.image.scaled(32, 32, Qt.AspectRatioMode.IgnoreAspectRatio,
|
||||||
|
Qt.TransformationMode.SmoothTransformation)
|
||||||
|
|
||||||
|
# 2. Convert QImage to Numpy Array
|
||||||
|
ptr = scaled_img.bits()
|
||||||
|
ptr.setsize(32 * 32)
|
||||||
|
arr = np.frombuffer(ptr, np.uint8).reshape(32, 32, 1)
|
||||||
|
|
||||||
|
# 3. Add batch dimension and predict
|
||||||
|
# Note: We don't manually rescale by 1/255 here because your model has a Rescaling layer built-in!
|
||||||
|
img_batch = np.expand_dims(arr, axis=0)
|
||||||
|
prediction = model.predict(img_batch, verbose=0)
|
||||||
|
print(type(prediction), prediction)
|
||||||
|
|
||||||
|
class_names = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
||||||
|
print([x for x in zip(class_names, prediction[0])])
|
||||||
|
result = class_names[np.argmax(prediction)]
|
||||||
|
confidence = np.max(prediction) * 100
|
||||||
|
self.result_label.setText(f"Prediction: {result} ({confidence:.1f}%)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = QApplication(sys.argv)
|
||||||
|
app.setStyle("Breeze") # Use system theme so it looks nice on linux
|
||||||
|
window = MainWindow()
|
||||||
|
window.show()
|
||||||
|
sys.exit(app.exec())
|
||||||
29
convert_from_big_dataset.py
Normal file
29
convert_from_big_dataset.py
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
import os
|
||||||
|
from PIL import Image
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
directory = "/home/mia/Schule/KISY/schrifterkennung/out"
|
||||||
|
|
||||||
|
files = os.listdir(f"{directory}/BigDataSet")
|
||||||
|
print(files)
|
||||||
|
labels = []
|
||||||
|
images = []
|
||||||
|
for letter in files:
|
||||||
|
dir = f"{directory}/BigDataSet/{letter}"
|
||||||
|
i = 0
|
||||||
|
os.makedirs(f"{directory}/Scaled/{letter}/", exist_ok=True)
|
||||||
|
for image in os.listdir(dir)[:]:
|
||||||
|
print(image)
|
||||||
|
img = Image.open(os.path.join(dir, image))
|
||||||
|
res = img.resize((32,32))
|
||||||
|
res = res.convert("L")
|
||||||
|
images.append(res)
|
||||||
|
labels.append(letter) # Label ist der Ordnername
|
||||||
|
res.save(f"{directory}/Scaled/{letter}/miakieler_{i}.png")
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
X = np.array(images)
|
||||||
|
Y = np.array(labels)
|
||||||
|
|
||||||
|
np.save('images_big.npy', X)
|
||||||
|
np.save('labels_big.npy', Y)
|
||||||
72
make_model.py
Normal file
72
make_model.py
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from tensorflow.keras.utils import to_categorical
|
||||||
|
from tensorflow.keras.optimizers import Adam
|
||||||
|
from tensorflow.keras.callbacks import EarlyStopping
|
||||||
|
|
||||||
|
# Daten einlesen
|
||||||
|
X = np.load('images_big.npy') # Eingabebilder
|
||||||
|
Y = np.load('labels_big.npy') # Labels (Buchstaben)
|
||||||
|
|
||||||
|
# Umwandlung der Labels in numerische Werte
|
||||||
|
unique_labels, Y_numeric = np.unique(Y, return_inverse=True)
|
||||||
|
|
||||||
|
# Vorverarbeitung der Daten
|
||||||
|
X = X.reshape(-1, 32, 32, 1) # Umwandlung in 4D-Array (N, R, C, K)
|
||||||
|
Y_categorical = to_categorical(Y_numeric) # One-Hot-Encoding der Labels
|
||||||
|
|
||||||
|
# Aufteilung der Daten in Trainings- und Testdatensätze
|
||||||
|
X_train, X_test, Y_train, Y_test = train_test_split(
|
||||||
|
X, Y_categorical, test_size=0.1, random_state=42
|
||||||
|
)
|
||||||
|
|
||||||
|
# # Aufbau des Modells
|
||||||
|
# model = keras.Sequential([
|
||||||
|
# keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
|
||||||
|
# keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||||
|
# keras.layers.Conv2D(64, (3, 3), activation='relu'),
|
||||||
|
# keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||||
|
# keras.layers.Flatten(),
|
||||||
|
# keras.layers.Dense(128, activation='relu'),
|
||||||
|
# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen
|
||||||
|
# ])
|
||||||
|
|
||||||
|
model = keras.Sequential([
|
||||||
|
keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 1)),
|
||||||
|
keras.layers.BatchNormalization(),
|
||||||
|
keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||||
|
keras.layers.Conv2D(128, (3, 3), activation='relu'),
|
||||||
|
keras.layers.BatchNormalization(),
|
||||||
|
keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||||
|
keras.layers.Flatten(),
|
||||||
|
keras.layers.Dense(256, activation='relu'),
|
||||||
|
keras.layers.Dense(len(unique_labels), activation='softmax')
|
||||||
|
])
|
||||||
|
|
||||||
|
# model = keras.Sequential([
|
||||||
|
# keras.layers.Input(shape=(32, 32)), # images are 28x28
|
||||||
|
# keras.layers.Flatten(), # becomes 784
|
||||||
|
# keras.layers.Dense(64, activation='relu'),
|
||||||
|
# keras.layers.Dense(128, activation='relu'),
|
||||||
|
# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen
|
||||||
|
# ])
|
||||||
|
|
||||||
|
optimizer = Adam(learning_rate=0.001)
|
||||||
|
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
|
||||||
|
# Kompilieren des Modells
|
||||||
|
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
|
||||||
|
# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||||
|
|
||||||
|
# Modelldetails anzeigen
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
# Training des Modells
|
||||||
|
model.fit(X_train, Y_train, epochs=20, batch_size=128, validation_data=(X_test, Y_test), callbacks=[early_stopping])
|
||||||
|
|
||||||
|
# Bewertung des Modells
|
||||||
|
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
|
||||||
|
print(f'Testgenauigkeit: {test_accuracy:.4f}')
|
||||||
|
model.save("model.keras")
|
||||||
|
|
||||||
BIN
model.keras
Normal file
BIN
model.keras
Normal file
Binary file not shown.
18
shell.nix
Normal file
18
shell.nix
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
let
|
||||||
|
pkgs = import <nixpkgs> {};
|
||||||
|
in pkgs.mkShell {
|
||||||
|
packages = [
|
||||||
|
(pkgs.python3.withPackages (python-pkgs: [
|
||||||
|
python-pkgs.pygame
|
||||||
|
python-pkgs.matplotlib
|
||||||
|
python-pkgs.sklearn-compat
|
||||||
|
python-pkgs.pandas
|
||||||
|
python-pkgs.opencv-python
|
||||||
|
python-pkgs.pytesseract
|
||||||
|
python-pkgs.tensorflow
|
||||||
|
python-pkgs.keras
|
||||||
|
python-pkgs.pyqt6
|
||||||
|
]))
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue