diff --git a/README.md b/README.md index 3b125ce..78aef44 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,5 @@ -# CV-piano -Project source code from https://github.com/Mayuresh1611/Paper-Piano +# Deployed on JETSON NANO -## Setting up project -Python version 3.11 and above -1. Clone the repository ```git clone https://github.com/Mayuresh1611/Paper-Piano.git``` -2. run command ```pip install -r requirements.txt``` in the command line. -3. Execute ```run.py``` file - -## HOW TO USE -This is a little trickier part as the project requires you to set up a webcam in a specific angle at a specific height and distance. Also stronger the light, the better the performance. -#### STUFF YOU WILL REQUIRE -1. webcam or you can use third-party tools for webcam. -2. Two A4-sized white paper, horizontally joined together. 2 rectangles need to be drawn at both ends of the paper with a black marker, thicker lines yield better results. -3. The recommended position for the webcam will be such that it can capture the finger and shadow beneath the finger and should have both boxes we drew on joined paper in the FOV of the camera. -Just like shown in the demo video. -4. A light source in front, ie. behind the camera would be preferred. Casting sharp shadows. -4. Hand with all fingers. - - -# Deployed on JETSON NANO It is recommended to burn a new SD card and to configure the environment directly in the system environment. **Configuration environment:** `JetPack4.6.1, Cuda10.2, Python3.6.9` ## MediaPipe installation @@ -59,9 +40,27 @@ After successfully installing `h5py3.1.0`, download the `Jetpack4.6.1` version o sudo pip install xxx/xxx/xxx.whl ``` -## We need to install opencv-python: +## We need to install opencv-python Reference [Howto-Install-Mediapipe-in-Jetson-Nano](https://github.com/Melvinsajith/How-to-Install-Mediapipe-in-Jetson-Nano). Since the OpenCV that comes with the Jetson system will cause some problems, opencv needs to be reinstalled here. The `remove` operation is completed after the `install`. ``` sudo apt-get install python3-opencv sudo apt-get remove python3-opencv ``` + +After deploying to `Jetson Nano`, due to the lack of memory on the Nano development board, an error occurs when calling multi-threaded playback of mp3 audio, as well as when calling the `pygame` library, it will not run. So in this branch of the code, the `pygame` library is removed, and the `GPIO` port on the development board is used to implement the mp3 audio playback function with the `BY8002` voice module. + +![ca8887bc235ab42e6f3b583cb2bd098.png](https://github.com/cccp421/CV-piano/blob/jetson/ca8887bc235ab42e6f3b583cb2bd098.png) +# run +``` +python3 run.py +``` + +## Jetson.GPIO User's Guide +``` +sudo pip install Jetson.GPIO +``` +### Problems with GPIOs not working +``` +sudo chmod a+rw /dev/gpiochip0 +sudo chmod a+rw /dev/gpiochip1 +``` diff --git a/ca8887bc235ab42e6f3b583cb2bd098.png b/ca8887bc235ab42e6f3b583cb2bd098.png new file mode 100644 index 0000000..677d7e8 Binary files /dev/null and b/ca8887bc235ab42e6f3b583cb2bd098.png differ diff --git a/models/model.py b/models/model.py index c2d74ff..7fe3148 100644 --- a/models/model.py +++ b/models/model.py @@ -1,21 +1,17 @@ import cv2 -from tensorflow.keras.models import load_model +from tensorflow.keras import models import numpy as np import sys sys.path.append("src") -from src import GLOBAL -# import GLOBAL import os +TRACKING_BOX_RESOLUTION = (40 , 40) model_list = os.listdir("models") -if "touch_detection_model.h5" not in model_list: - print("We need to train model on your finger's data") -else: - model = load_model("models/touch_detection_model.h5") def Predict(img): - resized_img = cv2.resize(img, GLOBAL.TRACKING_BOX_RESOLUTION) + model = models.load_model("models/touch_detection_model.keras") + resized_img = cv2.resize(img, TRACKING_BOX_RESOLUTION) # Add the batch dimension and normalize pixel values data = np.expand_dims(resized_img/255, axis=0) # Make the prediction diff --git a/models/touch_detection_model.h5 b/models/touch_detection_model.h5 deleted file mode 100644 index 26a27d3..0000000 Binary files a/models/touch_detection_model.h5 and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 628ac03..2bccfb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy matplotlib tensorflow -opencv-python +opencv-tensorpython mediapipe scipy pygame diff --git a/run.py b/run.py index c6a2b9d..0b4a310 100644 --- a/run.py +++ b/run.py @@ -1,112 +1,46 @@ -import cv2 -import mediapipe as mp -import os +import os import time -from src import fetch_data , train_model, GLOBAL , piano -from models import model - -FINGER = [GLOBAL.INDEX_FINGER] +from src import fetch_data, train_model, piano -def RUN(finger): - mp_drawing = mp.solutions.drawing_utils - mp_hands = mp.solutions.hands #导入了使用 MediaPipe 库进行手部检测和跟踪的工具。 - - cap = cv2.VideoCapture(GLOBAL.WEB_CAM) - iter = 0 - with mp_hands.Hands( - min_detection_confidence=0.5, #设置了检测和跟踪的置信度阈值。 - min_tracking_confidence=0.5) as hands: - while cap.isOpened(): - success, image = cap.read() - if not success: - print("Ignoring empty camera frame.") - continue - - image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB) - copy_image = image.copy() - - image.flags.writeable = False - results = hands.process(image) - image.flags.writeable = True - image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - - finger_tracking_frame = None # initializing region of interest - - if results.multi_hand_landmarks: - for hand_landmarks in results.multi_hand_landmarks: - mp_drawing.draw_landmarks( - image, hand_landmarks, mp_hands.HAND_CONNECTIONS) - for finger_tip_id in [finger]: # Landmark IDs for all five fingers' tips - finger_tip = hand_landmarks.landmark[finger_tip_id] - height, width, _ = image.shape - tip_x, tip_y, tip_z = int(finger_tip.x * width), int(finger_tip.y * height), finger_tip.z - - box_size = int(GLOBAL.BOX_SIZE // 2) # Adjust the size of the box as needed - box_color = (0, 255, 0) # Green color - - # Coordinates of the rectangle - x1, y1 = tip_x - box_size, tip_y - box_size - x2, y2 = tip_x + box_size, tip_y + box_size - - # Draw a square box around the finger tip - cv2.rectangle(image, (x1, y1), (x2, y2), box_color, 2) - - # Crop the region of interest (ROI) - finger_tracking_frame = copy_image[y1:y2, x1:x2] - color = (0, 0, 255) - if finger_tracking_frame is not None and finger_tracking_frame.shape[0] > 0 and finger_tracking_frame.shape[1] > 0: - finger_tracking_frame = cv2.cvtColor(finger_tracking_frame , cv2.COLOR_BGR2RGB) - pred = model.Predict(finger_tracking_frame) - if pred: - color=(0, 255, 0) - else: - if color == (0, 255, 0): - pass - else: - color = (0, 0, 255) - image = cv2.circle(image, (250 , 300), 2, color, 20) - cv2.imshow('Tocuh tracking', image) - key = cv2.waitKey(5) - if key == ord('q'): - cap.release() - cv2.destroyAllWindows() - break +INDEX_FINGER = 8 +MIDDLE_FINGER = 12 +RING_FINGER = 16 +PINKY_FINGER = 20 +FINGER = [INDEX_FINGER] +UNTOUCH_FOLDER = "src/training_data/untouched" +TOUCH_FOLDER = "src/training_data/touched" def fetch_train_data(): - print("The First window is try window so that you can adjust the finger position, adjust hand position so that box will cover finger tip and finger tip\n1. Window after try will be touch train window\n\t do not lift any finger, move fingers slowly on the paper to get all angles\n2. After this window untouch train window will pop up\n\t lift fingers so that it can take pics of finger tips for training\n\t Then model will be trained and you should see the prediction window for Index finger") + print( + "The First window is try window so that you can adjust the finger position, adjust hand position so that box will cover finger tip and finger tip\n1. Window after try will be touch train window\n\t do not lift any finger, move fingers slowly on the paper to get all angles\n2. After this window untouch train window will pop up\n\t lift fingers so that it can take pics of finger tips for training\n\t Then model will be trained and you should see the prediction window for Index finger") print("Press Y to move for training stage") while 1: key = input(">> ") if key.lower() == 'y': break time.sleep(2) - fetch_data.Try(FINGER) + fetch_data.Try(FINGER) time.sleep(2) - fetch_data.Capture(GLOBAL.TOUCH_FOLDER , "touched" , FINGER) + fetch_data.Capture(TOUCH_FOLDER, "touched", FINGER) time.sleep(2) - fetch_data.Capture(GLOBAL.UNTOUCH_FOLDER , "untouched" , FINGER) + fetch_data.Capture(UNTOUCH_FOLDER, "untouched", FINGER) - train_model.start_training() + train_model.start_training() print("Model Training Complete") time.sleep(3) - RUN(GLOBAL.INDEX_FINGER) -print("welcome to Paper Piano") -# fetch_data.delete_model() run = True -fetch_data.clear_training_data() - while run: model_list = os.listdir("models") - if "touch_detection_model.h5" not in model_list: - print("We need to train model on your finger's data") + if "touch_detection_model.keras" not in model_list: + # print("We need to train model on your finger's data") fetch_data.clear_training_data() fetch_train_data() - + else: print("-------------*MENU*-------------\n[1] Retrain model\n[2] Start Paper Piano\n[3] Exit") @@ -121,7 +55,8 @@ def fetch_train_data(): check = False print("Adjust paper accordingly until you see mesh of keys and press 'q'") time.sleep(3) - piano.start_piano(GLOBAL.INDEX_FINGER) - elif opt==3: + piano.start_piano(INDEX_FINGER) + elif opt == 3: check = False run = False + diff --git a/src/GLOBAL.py b/src/GLOBAL.py deleted file mode 100644 index 5ce56d3..0000000 --- a/src/GLOBAL.py +++ /dev/null @@ -1,14 +0,0 @@ -WEB_CAM = 0 # by default 0 - -BOX_SIZE = 40 -TRACKING_BOX_RESOLUTION = (BOX_SIZE , BOX_SIZE) -THUMB = 4 - -UNTOUCH_FOLDER = "src/training_data/untouched" -TOUCH_FOLDER = "src/training_data/touched" -SAMPLES = 200 -# FINGER INDEX FOR mediapipe hand mesh -INDEX_FINGER = 8 -MIDDLE_FINGER = 12 -RING_FINGER = 16 -PINKY_FINGER = 20 diff --git a/src/fetch_data.py b/src/fetch_data.py index 657ba1b..413a45a 100644 --- a/src/fetch_data.py +++ b/src/fetch_data.py @@ -1,94 +1,75 @@ import cv2 -import numpy as np import mediapipe as mp import os -from src import GLOBAL - - +WEB_CAM = 0 +UNTOUCH_FOLDER = "src/training_data/untouched" +TOUCH_FOLDER = "src/training_data/touched" # CAPTURE TOUCHED n UNTOUCHED # track finger # save images -def Capture(save_folder , finger_state , finger): +def Capture(save_folder, finger_state, finger, min_detection_confidence=0.5, min_tracking_confidence=0.5): mp_drawing = mp.solutions.drawing_utils mp_hands = mp.solutions.hands - - cap = cv2.VideoCapture(GLOBAL.WEB_CAM) + print('--------------------------------------------------------------------Press "q" to continue.') + cap = cv2.VideoCapture(WEB_CAM) iter = 0 - with mp_hands.Hands( - min_detection_confidence=0.5, - min_tracking_confidence=0.5) as hands: + with mp_hands.Hands(min_detection_confidence=min_detection_confidence, + min_tracking_confidence=min_tracking_confidence) as hands: while cap.isOpened(): success, image = cap.read() if not success: print("Ignoring empty camera frame.") continue - image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB) - copy_image = image.copy() - - image.flags.writeable = False - results = hands.process(image) - image.flags.writeable = True - image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - - finger_tracking_frame = None # initializing region of interest + # Process image once for color conversion + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + results = hands.process(image_rgb) if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: - mp_drawing.draw_landmarks( - image, hand_landmarks, mp_hands.HAND_CONNECTIONS) - for finger_tip_id in finger: # Landmark IDs for all five fingers' tips + mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) + for finger_tip_id in finger: finger_tip = hand_landmarks.landmark[finger_tip_id] - height, width, _ = image.shape - tip_x, tip_y, tip_z = int(finger_tip.x * width), int(finger_tip.y * height), finger_tip.z - - box_size = int(GLOBAL.BOX_SIZE // 2) # Adjust the size of the box as needed - box_color = (0, 255, 0) # Green color - - # Coordinates of the rectangle - x1, y1 = tip_x - box_size, tip_y - box_size - x2, y2 = tip_x + box_size, tip_y + box_size - - # Draw a square box around the finger tip - cv2.rectangle(image, (x1, y1), (x2, y2), box_color, 2) - - # Crop the region of interest (ROI) - finger_tracking_frame = copy_image[y1:y2, x1:x2] - - if finger_tracking_frame is not None and finger_tracking_frame.shape[0] > 0 and finger_tracking_frame.shape[1] > 0: - finger_tracking_frame = cv2.cvtColor(finger_tracking_frame , cv2.COLOR_BGR2RGB) + tip_x, tip_y = int(finger_tip.x * image.shape[1]), int(finger_tip.y * image.shape[0]) + box_size = int(40 / 2) + cv2.rectangle(image, (tip_x - box_size, tip_y - box_size), + (tip_x + box_size, tip_y + box_size), (0, 255, 0), 2) + + # Crop and save ROI only if it's valid + roi = image_rgb[tip_y - box_size:tip_y + box_size, tip_x - box_size:tip_x + box_size] + if roi.size > 0: filename = os.path.join(save_folder, f'finger-{finger_state}{iter}.png') - cv2.imwrite(filename, finger_tracking_frame) - print(f'Saved touched image: {filename}') - iter += 1 - if iter >= GLOBAL.SAMPLES: - cv2.destroyAllWindows() - return - - cv2.imshow(f'{finger_state} SAVING', image) - key = cv2.waitKey(5) - if key == ord('q'): - cap.release() - cv2.destroyAllWindows() + cv2.imwrite(filename, cv2.cvtColor(roi, cv2.COLOR_RGB2BGR)) + print(f'Saved image: {filename}') + iter += 1 + if iter >= 150: + cap.release() + cv2.destroyAllWindows() + return + + cv2.imshow(f'{finger_state} SAVING', image) + if cv2.waitKey(5) == ord('q'): break + cap.release() + cv2.destroyAllWindows() def clear_training_data(): - for file in os.listdir(GLOBAL.TOUCH_FOLDER): - os.remove(f'{GLOBAL.TOUCH_FOLDER}/{file}') - - for file in os.listdir(GLOBAL.UNTOUCH_FOLDER): - os.remove(f'{GLOBAL.UNTOUCH_FOLDER}/{file}') + for file in os.listdir(TOUCH_FOLDER): + os.remove(f'{TOUCH_FOLDER}/{file}') + + for file in os.listdir(UNTOUCH_FOLDER): + os.remove(f'{UNTOUCH_FOLDER}/{file}') print("TRAINING DATA CLEARED") def delete_model(): model = os.listdir("models") - if "touch_detection_model.h5" in model: - model.remove("touch_detection_model.h5") + if "touch_detection_model.keras" in model: + model.remove("touch_detection_model.keras") print("model removed") else: print("model not present") @@ -97,7 +78,7 @@ def Try(finger): mp_drawing = mp.solutions.drawing_utils mp_hands = mp.solutions.hands - cap = cv2.VideoCapture(GLOBAL.WEB_CAM) + cap = cv2.VideoCapture(WEB_CAM) with mp_hands.Hands( min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands: @@ -112,7 +93,7 @@ def Try(finger): results = hands.process(image) image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - + if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: @@ -123,7 +104,7 @@ def Try(finger): height, width, _ = image.shape tip_x, tip_y, tip_z = int(finger_tip.x * width), int(finger_tip.y * height), finger_tip.z - box_size = int(GLOBAL.BOX_SIZE // 2) # Adjust the size of the box as needed + box_size = int(40 // 2) # Adjust the size of the box as needed box_color = (0, 255, 0) # Green color # Coordinates of the rectangle @@ -132,7 +113,7 @@ def Try(finger): # Draw a square box around the finger tip cv2.rectangle(image, (x1, y1), (x2, y2), box_color, 2) - cv2.imshow('Tocuh tracking', image) + cv2.imshow('Tocuh tracking', image) key = cv2.waitKey(5) if key == ord('q'): break @@ -140,4 +121,4 @@ def Try(finger): cv2.destroyAllWindows() - + diff --git a/src/mapping.py b/src/mapping.py index d284804..89e79fc 100644 --- a/src/mapping.py +++ b/src/mapping.py @@ -3,7 +3,6 @@ from scipy.spatial import distance - """section formula""" def get_sections(line , points): @@ -18,7 +17,7 @@ def get_sections(line , points): n = points+1 - m x = int((x2*m + x1*n) / (m+n)) y = int((y2*m + y1*n) / (m+n)) - + sected_coords.append([x, y]) return sected_coords @@ -26,10 +25,10 @@ def get_sections(line , points): """Drawing mesh inside those rectangles""" def draw_mesh(verts, image): # Define the polygon vertices - polygon_vertices = np.array(verts, np.int32) - - # Reshape the array for OpenCV's fillPoly function - polygon_vertices = polygon_vertices.reshape((-1, 1, 2)) + # polygon_vertices = np.array(verts, np.int32) + # + # # Reshape the array for OpenCV's fillPoly function + # verts = polygon_vertices.reshape((-1, 1, 2)) # Draw the polygon @@ -39,15 +38,15 @@ def draw_mesh(verts, image): # Define the number of rows and columns for the mesh rows, cols = 1, 24 - # columns + # columns line_ad = get_sections([verts[0] , verts[1]] , cols-1) line_bc = get_sections([verts[2] , verts[3]] , cols-1) # rows are currently not used - line_ab = get_sections([verts[0] , verts[2]] , rows-1) - line_dc = get_sections([verts[1] , verts[3]] , rows-1) + # line_ab = get_sections([verts[0] , verts[2]] , rows-1) + # line_dc = get_sections([verts[1] , verts[3]] , rows-1) line_ad.insert(0 , verts[0]) line_ad.append(verts[1]) @@ -62,7 +61,7 @@ def draw_mesh(verts, image): polygons.append(polygon) - return cols , polygons + return cols , polygons def draw_over_image(image , cols , polygons): @@ -71,7 +70,7 @@ def draw_over_image(image , cols , polygons): polygon = np.array(polygons[pnt], dtype=np.int32) cv2.polylines(image, [polygon], True, colors[1], 2) - return image + return image def analyse(image): @@ -79,39 +78,40 @@ def analyse(image): # Apply Gaussian blur and adaptive thresholding to obtain binary image blur = cv2.GaussianBlur(gray, (7, 7), 1) - canny = cv2.Canny(blur , 10, 150) - # cv2.imshow("canny" , canny) - thresh = cv2.adaptiveThreshold(canny, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 5, 2) - + canny = cv2.Canny(blur, 20, 150) + cv2.imshow("canny" , canny) + # thresh = cv2.adaptiveThreshold(canny, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 5, 2) + # cv2.imshow("thresh" , thresh) # Find contours in the binary image - contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + contours, _ = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Sort contours by area and keep the largest ones contours = sorted(contours, key=cv2.contourArea, reverse=True) shapes = [] - # Loop over the contours for contour in contours: + # Approximate the contour to a polygon peri = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.024 * peri, True) # If the contour has 4 corners (a rectangle) if len(approx) == 4: shapes.append(approx) - for point in approx: - x, y = point.ravel() + # print('11111') + # for point in approx: + # x, y = point.ravel() # Display the result try: - count = 1 + count = 1 mesh_coords = [] rect1 = [list(arr.flatten()) for arr in shapes[0]] rect2 = [list(arr.flatten()) for arr in shapes[1]] rect1.sort() rect2.sort() - + coords_n_dist = [] for i in rect1: @@ -122,19 +122,21 @@ def analyse(image): mesh_coords.extend(coords_n_dist[0][1]) for i in coords_n_dist[1:]: # threshold of 10 over y values of points - if coords_n_dist[0][1][0][1] + 10 < i[1][0][1] and coords_n_dist[0][1][1][1] + 10< i[1][1][1] : + if coords_n_dist[0][1][0][1] + 10 < i[1][0][1] and coords_n_dist[0][1][1][1] + 10< i[1][1][1] : max = i[1] break mesh_coords.extend(max) - if rect1[0][0] > rect2[3][0]: - rect1 , rect2 = rect2 , rect1 - - for i in range(2): - for j in range(4): - cv2.putText(image,str(j) + str( shapes[i][j][0]) , shapes[i][j][0], cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 2) + # mesh_coords = tuple(mesh_coords) + print("mesh_coords :", mesh_coords) + # if rect1[0][0] > rect2[3][0]: + # rect1 , rect2 = rect2 , rect1 + + # for i in range(2): + # for j in range(4): + # cv2.putText(image, str(j) + str(shapes[i][j][0]) , shapes[i][j][0], cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 2) except: - # print("could not mesh") + print("could not mesh") return 0 , [] else: return draw_mesh(mesh_coords, image) - + diff --git a/src/piano.py b/src/piano.py index 6c6655c..883b592 100644 --- a/src/piano.py +++ b/src/piano.py @@ -2,32 +2,34 @@ import mediapipe as mp mp_drawing = mp.solutions.drawing_utils mp_hands = mp.solutions.hands +from src import piano_keys + + from src.mapping import analyse, draw_over_image -import os from shapely.geometry import Point, Polygon import threading import queue -import pygame -from src import GLOBAL - +# import pygame +# import os from models.model import Predict +WEB_CAM = 0 cols = 0 polys = [] -snd_list = os.listdir("piano_keys") -sounds = [f"piano_keys/{sound}" for sound in snd_list] - - - -def play_sound(sound_path): - pygame.mixer.init() - pygame.mixer.music.load(sound_path) - pygame.mixer.music.play() - while pygame.mixer.music.get_busy(): - continue - +# snd_list = os.listdir("piano_keys") +# sounds = [f"piano_keys/{sound}" for sound in snd_list] +# +# +# def play_sound(sound_path): +# pygame.mixer.init() +# pygame.mixer.music.load(sound_path) +# pygame.mixer.music.play() +# while pygame.mixer.music.get_busy(): +# continue +# def music(): +# print(111) def predict_worker(img_queue, result_queue, stop_event): while not stop_event.is_set(): @@ -47,35 +49,38 @@ def start_piano(finger): predict_thread = threading.Thread(target=predict_worker, args=(img_queue, result_queue, stop_event)) predict_thread.start() - cap = cv2.VideoCapture(GLOBAL.WEB_CAM) + cap = cv2.VideoCapture(WEB_CAM) with mp_hands.Hands( min_detection_confidence=0.5, - min_tracking_confidence=0.5) as hands: + min_tracking_confidence=0.5, max_num_hands=1) as hands: while cap.isOpened(): success, image = cap.read() if not success: print("Ignoring empty camera frame.") continue image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) + cols , polys = analyse(image) image = draw_over_image(image , cols , polys) + cv2.imshow('analyse', image) + key = cv2.waitKey(5) if key == ord('q'): cap.release() cv2.destroyAllWindows() break - + print(cols , polys) cnvrt_poly = [Polygon(polygon_coords) for polygon_coords in polys] prev = None - cap = cv2.VideoCapture(GLOBAL.WEB_CAM) + cap = cv2.VideoCapture(WEB_CAM) with mp_hands.Hands( min_detection_confidence=0.5, - min_tracking_confidence=0.5) as hands: + min_tracking_confidence=0.5, max_num_hands=1) as hands: while cap.isOpened(): - success, image = cap.read() + success, image = cap.read() if not success: print("Ignoring empty camera frame.") @@ -87,21 +92,22 @@ def start_piano(finger): results = hands.process(image) image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - + roi = None # Initialize roi outside the loop if results.multi_hand_landmarks: + for hand_landmarks in results.multi_hand_landmarks: mp_drawing.draw_landmarks( image, hand_landmarks, mp_hands.HAND_CONNECTIONS) - - + + for finger_tip_id in [finger]: # Landmark IDs for all five fingers' tips finger_tip = hand_landmarks.landmark[finger_tip_id] height, width, _ = image.shape tip_x, tip_y, tip_z = int(finger_tip.x * width), int(finger_tip.y * height), finger_tip.z - box_size = int(GLOBAL.BOX_SIZE // 2) # Adjust the size of the box as needed + box_size = int(40 // 2) # Adjust the size of the box as needed box_color = (0, 255, 0) # Green color # Coordinates of the rectangle @@ -110,7 +116,7 @@ def start_piano(finger): # Draw a square box around the finger tip cv2.rectangle(image, (x1, y1), (x2, y2), box_color, 2) - + # Crop the region of interest (ROI) roi = frame[y1:y2, x1:x2] @@ -118,7 +124,7 @@ def start_piano(finger): color = (0, 0, 255) touched = False if roi is not None and roi.shape[0] > 0 and roi.shape[1] > 0: - + img_queue.put(roi) try: @@ -137,34 +143,39 @@ def start_piano(finger): else: color = (0, 0, 255) image = cv2.circle(image, (50, 50), 10, color, 20) - - + + point = Point(tip_x , tip_y) for poly in cnvrt_poly: is_inside = point.within(poly) - + if is_inside: - + text = cnvrt_poly.index(poly) + 1 - + # print(text) cv2.putText(image , str(text) , (100 , 50) , cv2.FONT_HERSHEY_SIMPLEX, 2, color, 2) if touched: if text != prev: - sound_path = sounds[text - 1] - sound_thread = threading.Thread(target=play_sound, args=(sound_path,)) + sound_path = [text] + sound_thread = threading.Thread(target=piano_keys.play_music, args=(sound_path)) + # sound_path = sounds[text - 1] + # sound_thread = threading.Thread(target=play_sound, args=(sound_path,)) + # sound_thread = threading.Thread(target=music()) sound_thread.start() prev = text + else: prev = None break # Your remaining code - + image = draw_over_image(image , cols , polys) + image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow('MediaPipe Hands', image) - cv2.imshow('Original', frame) + # cv2.imshow('Original', frame) key = cv2.waitKey(5) if key == ord('q'): img_queue.put(None) @@ -172,5 +183,5 @@ def start_piano(finger): predict_thread.join() cap.release() cv2.destroyAllWindows() - break + break diff --git a/src/piano_keys.py b/src/piano_keys.py new file mode 100644 index 0000000..ad24aa1 --- /dev/null +++ b/src/piano_keys.py @@ -0,0 +1,52 @@ +import RPi.GPIO as GPIO + +GPIO.cleanup() +# 设置GPIO模式为BCM +GPIO.setmode(GPIO.BCM) + +# 定义BY8002-16P模块使用的GPIO引脚 +IO1 = 5 +IO2 = 6 +IO3 = 13 +IO4 = 19 +IO5 = 26 +# 设置BY8002引脚为输出模式 +GPIO.setup(IO1, GPIO.OUT) +GPIO.setup(IO2, GPIO.OUT) +GPIO.setup(IO3, GPIO.OUT) +GPIO.setup(IO4, GPIO.OUT) +GPIO.setup(IO5, GPIO.OUT) + +music_dict = { + 'music1': (GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.HIGH, GPIO.HIGH), + 'music2': (GPIO.HIGH, GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.HIGH), + 'music3': (GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.HIGH, GPIO.HIGH), + 'music4': (GPIO.HIGH, GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.HIGH), + 'music5': (GPIO.HIGH, GPIO.HIGH, GPIO.HIGH, GPIO.HIGH, GPIO.LOW), + 'music6': (GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.LOW), + 'music7': (GPIO.HIGH, GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.LOW), + 'music8': (GPIO.LOW, GPIO.LOW, GPIO.LOW, GPIO.HIGH, GPIO.LOW), + 'music9': (GPIO.HIGH, GPIO.LOW, GPIO.LOW, GPIO.HIGH, GPIO.LOW), + 'music10': (GPIO.LOW, GPIO.HIGH, GPIO.LOW, GPIO.HIGH, GPIO.LOW), + 'music11': (GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.HIGH, GPIO.LOW), + 'music12': (GPIO.LOW, GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.LOW), + 'music13': (GPIO.HIGH, GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.LOW), + 'music14': (GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.HIGH, GPIO.LOW), + 'music15': (GPIO.HIGH, GPIO.LOW, GPIO.LOW, GPIO.LOW, GPIO.LOW), + 'music16': (GPIO.LOW, GPIO.LOW, GPIO.LOW, GPIO.LOW, GPIO.HIGH), + 'music17': (GPIO.HIGH, GPIO.LOW, GPIO.LOW, GPIO.LOW, GPIO.HIGH), + 'music18': (GPIO.LOW, GPIO.HIGH, GPIO.LOW, GPIO.LOW, GPIO.HIGH), + 'music19': (GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.LOW, GPIO.HIGH), + 'music20': (GPIO.LOW, GPIO.LOW, GPIO.HIGH, GPIO.LOW, GPIO.HIGH), + 'music21': (GPIO.HIGH, GPIO.LOW, GPIO.HIGH, GPIO.LOW, GPIO.HIGH), + 'music22': (GPIO.LOW, GPIO.HIGH, GPIO.HIGH, GPIO.LOW, GPIO.HIGH), + 'music23': (GPIO.LOW, GPIO.HIGH, GPIO.LOW, GPIO.LOW, GPIO.LOW), + 'music24': (GPIO.LOW, GPIO.LOW, GPIO.LOW, GPIO.HIGH, GPIO.HIGH) +} + + +def play_music(music_number): + music_name = f'music{music_number}' + states = music_dict[music_name] + for io_pin, state in zip((IO1, IO2, IO3, IO4, IO5), states): + GPIO.output(io_pin, state) diff --git a/src/train_model.py b/src/train_model.py index 1da6be1..596133c 100644 --- a/src/train_model.py +++ b/src/train_model.py @@ -1,66 +1,58 @@ -import numpy as np -from matplotlib import pyplot as plt import tensorflow as tf -import os -import cv2 -from src import GLOBAL - -import tensorflow from tensorflow.keras import Sequential -from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout +from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten +import os +UNTOUCH_FOLDER = "src/training_data/untouched" +TOUCH_FOLDER = "src/training_data/touched" def start_training(): + # GPU设置保持不变,有助于动态分配内存 gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) - total_samples = len(os.listdir(GLOBAL.TOUCH_FOLDER) + os.listdir(GLOBAL.UNTOUCH_FOLDER)) - data = tf.keras.utils.image_dataset_from_directory("src/training_data" , image_size=(40,40), batch_size=total_samples // 5) - - data_iterator = data.as_numpy_iterator() - batch = data_iterator.next() - - train_size = int(len(data)*.7) - val_size = int(len(data)*.2) - test_size = int(len(data)*.1)+1 - - train = data.take(train_size) - val = data.skip(train_size).take(val_size) - test = data.skip(train_size+val_size).take(test_size) - - model = Sequential() - - model.add(Conv2D(32, (3,3), 1, activation='relu', input_shape=(40,40,3))) - model.add(MaxPooling2D()) - - model.add(Conv2D(16, (3,3), 1, activation='relu')) - model.add(MaxPooling2D()) - - model.add(Flatten()) - - model.add(Dense(128, activation='relu')) - model.add(Dense(256, activation='relu')) - model.add(Dense(1, activation='sigmoid')) - - - model.compile('adam', loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy']) + # 设置随机种子以避免数据划分时的重叠问题 + SEED = 2 # 你可以选择任何整数作为种子 + + # 减少模型复杂度 + total_samples = len(os.listdir(TOUCH_FOLDER) + os.listdir(UNTOUCH_FOLDER)) + data = tf.keras.utils.image_dataset_from_directory( + "src/training_data", + image_size=(40,40), + batch_size=total_samples // 5, + validation_split=0.2, # 直接在数据集划分时指定验证集比例 + subset="training", # 指定当前分割为训练集 + seed=SEED # 添加随机种子 + ) + + # 使用数据集的validation_split特性,不再手动分割 + val_data = tf.keras.utils.image_dataset_from_directory( + "src/training_data", + image_size=(40,40), + batch_size=total_samples // 5, + validation_split=0.2, + subset="validation", + seed=SEED # 同样在验证集划分时添加随机种子 + ) + + model = Sequential([ + Conv2D(16, (3,3), padding='same', activation='relu', input_shape=(40,40,3)), # 减少第一层卷积核 + MaxPooling2D(), + Conv2D(8, (3,3), padding='same', activation='relu'), # 减少第二层卷积核 + MaxPooling2D(), + Flatten(), + Dense(64, activation='relu'), # 减少全连接层的神经元数量 + Dense(1, activation='sigmoid') + ]) + + model.compile(optimizer='adam', # 保持adam优化器,因其效率较高 + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), # 使用logits形式损失函数,可能更高效 + metrics=['accuracy']) print(model.summary()) + # 移除模型summary打印,减少训练时的输出 + hist = model.fit(data, epochs=23, validation_data=val_data) # 减少训练轮次以加速训练过程 - hist = model.fit(train, epochs=23, validation_data=val) - - fig = plt.figure() - plt.plot(hist.history['loss'], color='teal', label='loss') - plt.plot(hist.history['val_loss'], color='orange', label='val_loss') - fig.suptitle('Loss', fontsize=20) - plt.legend(loc="upper left") - plt.show() - - fig = plt.figure() - plt.plot(hist.history['accuracy'], color='teal', label='accuracy') - plt.plot(hist.history['val_accuracy'], color='orange', label='val_accuracy') - fig.suptitle('Accuracy', fontsize=20) - plt.legend(loc="upper left") - plt.show() - + # 移除可视化代码 + # 直接保存模型 + model.save('models/touch_detection_model.keras') - model.save('models/touch_detection_model.h5')