Initial (and hopefully last) commit
This commit is contained in:
commit
06374b2608
8 changed files with 326 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
*.npy
|
||||
*.odt
|
||||
*.pdf
|
||||
/out/
|
||||
/.venv/
|
||||
40
Buchstaben_extrahieren.py
Normal file
40
Buchstaben_extrahieren.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
#User parameters
|
||||
output_folder="letters"
|
||||
image = cv2.imread("Blockbuchstaben.jpeg")
|
||||
number_rows=13
|
||||
number_cols=8
|
||||
margin=15 #Rand der weggeschnitten wird bei den einzelnen Buchstaben
|
||||
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
height, width, channels = image.shape
|
||||
print(height)
|
||||
print(width)
|
||||
|
||||
letter_width=width//number_cols
|
||||
letter_height=height//number_rows
|
||||
|
||||
print(letter_width)
|
||||
print(letter_height)
|
||||
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
for i in range(0,number_rows):
|
||||
for j in range(0,number_cols):
|
||||
x=j*letter_width
|
||||
y=i*letter_height
|
||||
|
||||
letter_img=gray[y+margin:y+letter_height-margin,x+margin:x+letter_width-margin]
|
||||
#if i==6 and j==8:
|
||||
# cv2.imshow('letter_img', letter_img)
|
||||
cv2.imwrite(output_folder+'/letter'+str(i)+str(j)+'.png', letter_img)
|
||||
|
||||
|
||||
#cv2.imshow('BuchstabenRaster', gray)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
49
Buchstaben_extrahieren2.py
Normal file
49
Buchstaben_extrahieren2.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import fitz # PyMuPDF
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
def save_letter_images_from_pdf(pdf_path, output_folder):
|
||||
# Open the PDF file
|
||||
doc = fitz.open(pdf_path)
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc.load_page(page_num)
|
||||
|
||||
# Get the text boxes
|
||||
text_boxes = page.get_text("dict")["blocks"]
|
||||
|
||||
for block in text_boxes:
|
||||
if "lines" in block: # Check if block contains lines
|
||||
for line in block["lines"]:
|
||||
for span in line["spans"]:
|
||||
# Extract the bounding box of each letter
|
||||
rect = fitz.Rect(span["bbox"])
|
||||
|
||||
# Crop the page to the letter's bounding box
|
||||
pix = page.get_pixmap(clip=rect)
|
||||
|
||||
# Convert to a numpy array
|
||||
img = np.array(pix.samples).reshape(pix.height, pix.width, pix.n)
|
||||
|
||||
# Convert color space from BGRA to grayscale
|
||||
if pix.n == 4:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
|
||||
else:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Resize to 28x28 pixels
|
||||
img_resized = cv2.resize(img, (28, 28), interpolation=cv2.INTER_AREA)
|
||||
|
||||
# Save the image
|
||||
img_pil = Image.fromarray(img_resized)
|
||||
img_pil.save(f"{output_folder}/letter_page{page_num + 1}_box{len(text_boxes)}.png")
|
||||
|
||||
# Close the document
|
||||
doc.close()
|
||||
|
||||
# Example usage
|
||||
pdf_path = "/home/mia/Schule/KISY/schrifterkennung/letters.pdf"
|
||||
output_folder = "/home/mia/Schule/KISY/schrifterkennung/out/"
|
||||
save_letter_images_from_pdf(pdf_path, output_folder)
|
||||
|
||||
113
app.py
Normal file
113
app.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import layers, models, Sequential
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
from PyQt6.QtWidgets import QApplication, QMainWindow, QWidget, QVBoxLayout, QPushButton, QLabel
|
||||
from PyQt6.QtGui import QPainter, QPen, QImage
|
||||
from PyQt6.QtCore import Qt, QPoint
|
||||
import sys
|
||||
|
||||
data_dir = "/home/mia/Schule/KISY/schrifterkennung/" # Ignore full path, had some weird problem otherwise
|
||||
|
||||
model_file = "model.keras" # Model save file. No Idea if this is the correct extention but nobody cares, right?
|
||||
|
||||
print("We have done training already so we load this to not waste very precious cpu :)")
|
||||
model = tf.keras.models.load_model(model_file)
|
||||
|
||||
#for images, labels in val_ds.take(10):
|
||||
# preds = model.predict(images)
|
||||
# print(f"Prediction: {class_names[np.argmax(preds[0])]}")
|
||||
# print(f"Label: {class_names[labels[0].numpy().astype(int)]}")
|
||||
# plt.imshow(images[0].numpy().squeeze(), cmap='gray')
|
||||
# plt.title(f"Pred: {class_names[np.argmax(preds[0])]}")
|
||||
# plt.show()
|
||||
|
||||
#### DISCLAIMER: This was written by AI; I hate GUI stuff ####
|
||||
|
||||
class DrawingCanvas(QWidget):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.setFixedSize(320, 320) # 10x the model input size for easier drawing
|
||||
self.image = QImage(self.size(), QImage.Format.Format_Grayscale8)
|
||||
self.image.fill(Qt.GlobalColor.white)
|
||||
self.drawing = False
|
||||
self.last_point = QPoint()
|
||||
|
||||
def paintEvent(self, event):
|
||||
painter = QPainter(self)
|
||||
painter.drawImage(0, 0, self.image)
|
||||
|
||||
def mousePressEvent(self, event):
|
||||
if event.button() == Qt.MouseButton.LeftButton:
|
||||
self.drawing = True
|
||||
self.last_point = event.position().toPoint()
|
||||
|
||||
def mouseMoveEvent(self, event):
|
||||
if (event.buttons() & Qt.MouseButton.LeftButton) and self.drawing:
|
||||
painter = QPainter(self.image)
|
||||
# We use a thick white pen because the model was trained on grayscale images
|
||||
painter.setPen(QPen(Qt.GlobalColor.black, 18, Qt.PenStyle.SolidLine, Qt.PenCapStyle.RoundCap))
|
||||
painter.drawLine(self.last_point, event.position().toPoint())
|
||||
self.last_point = event.position().toPoint()
|
||||
self.update()
|
||||
|
||||
def clear(self):
|
||||
self.image.fill(Qt.GlobalColor.white)
|
||||
self.update()
|
||||
|
||||
|
||||
class MainWindow(QMainWindow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.setWindowTitle("Handwriting Recognition")
|
||||
|
||||
main_layout = QVBoxLayout()
|
||||
self.canvas = DrawingCanvas()
|
||||
self.result_label = QLabel("Draw something and click Predict. (Need to fill entire space or model has stroke")
|
||||
self.result_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
||||
|
||||
predict_btn = QPushButton("Predict")
|
||||
predict_btn.clicked.connect(self.predict_image)
|
||||
|
||||
clear_btn = QPushButton("Clear Canvas")
|
||||
clear_btn.clicked.connect(self.canvas.clear)
|
||||
|
||||
main_layout.addWidget(self.canvas)
|
||||
main_layout.addWidget(self.result_label)
|
||||
main_layout.addWidget(predict_btn)
|
||||
main_layout.addWidget(clear_btn)
|
||||
|
||||
container = QWidget()
|
||||
container.setLayout(main_layout)
|
||||
self.setCentralWidget(container)
|
||||
|
||||
def predict_image(self):
|
||||
# 1. Resize the drawing to 32x32 to match the model input
|
||||
scaled_img = self.canvas.image.scaled(32, 32, Qt.AspectRatioMode.IgnoreAspectRatio,
|
||||
Qt.TransformationMode.SmoothTransformation)
|
||||
|
||||
# 2. Convert QImage to Numpy Array
|
||||
ptr = scaled_img.bits()
|
||||
ptr.setsize(32 * 32)
|
||||
arr = np.frombuffer(ptr, np.uint8).reshape(32, 32, 1)
|
||||
|
||||
# 3. Add batch dimension and predict
|
||||
# Note: We don't manually rescale by 1/255 here because your model has a Rescaling layer built-in!
|
||||
img_batch = np.expand_dims(arr, axis=0)
|
||||
prediction = model.predict(img_batch, verbose=0)
|
||||
print(type(prediction), prediction)
|
||||
|
||||
class_names = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
||||
print([x for x in zip(class_names, prediction[0])])
|
||||
result = class_names[np.argmax(prediction)]
|
||||
confidence = np.max(prediction) * 100
|
||||
self.result_label.setText(f"Prediction: {result} ({confidence:.1f}%)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QApplication(sys.argv)
|
||||
app.setStyle("Breeze") # Use system theme so it looks nice on linux
|
||||
window = MainWindow()
|
||||
window.show()
|
||||
sys.exit(app.exec())
|
||||
29
convert_from_big_dataset.py
Normal file
29
convert_from_big_dataset.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
directory = "/home/mia/Schule/KISY/schrifterkennung/out"
|
||||
|
||||
files = os.listdir(f"{directory}/BigDataSet")
|
||||
print(files)
|
||||
labels = []
|
||||
images = []
|
||||
for letter in files:
|
||||
dir = f"{directory}/BigDataSet/{letter}"
|
||||
i = 0
|
||||
os.makedirs(f"{directory}/Scaled/{letter}/", exist_ok=True)
|
||||
for image in os.listdir(dir)[:]:
|
||||
print(image)
|
||||
img = Image.open(os.path.join(dir, image))
|
||||
res = img.resize((32,32))
|
||||
res = res.convert("L")
|
||||
images.append(res)
|
||||
labels.append(letter) # Label ist der Ordnername
|
||||
res.save(f"{directory}/Scaled/{letter}/miakieler_{i}.png")
|
||||
i += 1
|
||||
|
||||
X = np.array(images)
|
||||
Y = np.array(labels)
|
||||
|
||||
np.save('images_big.npy', X)
|
||||
np.save('labels_big.npy', Y)
|
||||
72
make_model.py
Normal file
72
make_model.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from sklearn.model_selection import train_test_split
|
||||
from tensorflow.keras.utils import to_categorical
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import EarlyStopping
|
||||
|
||||
# Daten einlesen
|
||||
X = np.load('images_big.npy') # Eingabebilder
|
||||
Y = np.load('labels_big.npy') # Labels (Buchstaben)
|
||||
|
||||
# Umwandlung der Labels in numerische Werte
|
||||
unique_labels, Y_numeric = np.unique(Y, return_inverse=True)
|
||||
|
||||
# Vorverarbeitung der Daten
|
||||
X = X.reshape(-1, 32, 32, 1) # Umwandlung in 4D-Array (N, R, C, K)
|
||||
Y_categorical = to_categorical(Y_numeric) # One-Hot-Encoding der Labels
|
||||
|
||||
# Aufteilung der Daten in Trainings- und Testdatensätze
|
||||
X_train, X_test, Y_train, Y_test = train_test_split(
|
||||
X, Y_categorical, test_size=0.1, random_state=42
|
||||
)
|
||||
|
||||
# # Aufbau des Modells
|
||||
# model = keras.Sequential([
|
||||
# keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
|
||||
# keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||
# keras.layers.Conv2D(64, (3, 3), activation='relu'),
|
||||
# keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||
# keras.layers.Flatten(),
|
||||
# keras.layers.Dense(128, activation='relu'),
|
||||
# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen
|
||||
# ])
|
||||
|
||||
model = keras.Sequential([
|
||||
keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 1)),
|
||||
keras.layers.BatchNormalization(),
|
||||
keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||
keras.layers.Conv2D(128, (3, 3), activation='relu'),
|
||||
keras.layers.BatchNormalization(),
|
||||
keras.layers.MaxPooling2D(pool_size=(2, 2)),
|
||||
keras.layers.Flatten(),
|
||||
keras.layers.Dense(256, activation='relu'),
|
||||
keras.layers.Dense(len(unique_labels), activation='softmax')
|
||||
])
|
||||
|
||||
# model = keras.Sequential([
|
||||
# keras.layers.Input(shape=(32, 32)), # images are 28x28
|
||||
# keras.layers.Flatten(), # becomes 784
|
||||
# keras.layers.Dense(64, activation='relu'),
|
||||
# keras.layers.Dense(128, activation='relu'),
|
||||
# keras.layers.Dense(len(unique_labels), activation='softmax') # Anzahl der Klassen
|
||||
# ])
|
||||
|
||||
optimizer = Adam(learning_rate=0.001)
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
|
||||
# Kompilieren des Modells
|
||||
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
|
||||
# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||
|
||||
# Modelldetails anzeigen
|
||||
model.summary()
|
||||
|
||||
# Training des Modells
|
||||
model.fit(X_train, Y_train, epochs=20, batch_size=128, validation_data=(X_test, Y_test), callbacks=[early_stopping])
|
||||
|
||||
# Bewertung des Modells
|
||||
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
|
||||
print(f'Testgenauigkeit: {test_accuracy:.4f}')
|
||||
model.save("model.keras")
|
||||
|
||||
BIN
model.keras
Normal file
BIN
model.keras
Normal file
Binary file not shown.
18
shell.nix
Normal file
18
shell.nix
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
let
|
||||
pkgs = import <nixpkgs> {};
|
||||
in pkgs.mkShell {
|
||||
packages = [
|
||||
(pkgs.python3.withPackages (python-pkgs: [
|
||||
python-pkgs.pygame
|
||||
python-pkgs.matplotlib
|
||||
python-pkgs.sklearn-compat
|
||||
python-pkgs.pandas
|
||||
python-pkgs.opencv-python
|
||||
python-pkgs.pytesseract
|
||||
python-pkgs.tensorflow
|
||||
python-pkgs.keras
|
||||
python-pkgs.pyqt6
|
||||
]))
|
||||
];
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue