

I have to create a project for counting person whenever they come before the camera for this i have to create a TFlite model which is for detecting the person , now I just want to implement tracking on this model by using BYTETRACKER as it is a very efficient tracker for tracking the object in frame. I have cloned the official byteTrack Repository in my colab environment but unable to run it properly.

Whenever I try to use Byte Tracker on Colab it is getting various errors :

**ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-4-e9fde43dc6b9> in <cell line: 5>()
      3 import tflite_runtime.interpreter as tflite
      4 import yaml
----> 5 from byte_tracker import BYTETracker  # Assuming you have your ByteTrack tracker module
      7 # Load the TFLite model

ModuleNotFoundError: No module named 'byte_tracker'**

Here is my code for the tflite model+ByteTrack Tracker :

import cv2
import numpy as np
import tflite_runtime.interpreter as tflite
import yaml
from byte_tracker import BYTETracker  # Assuming you have your ByteTrack tracker module

# Load the TFLite model
interpreter = tflite.Interpreter(model_path='/content/gdrive/MyDrive/Project/best_saved_model/best_float32.tflite')

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load ByteTrack configuration from YAML file
with open('/content/gdrive/MyDrive/Project/bytetrack.yaml', 'r') as file:
    bytetrack_config = yaml.safe_load(file)

# Initialize the ByteTrack tracker using parameters from YAML
bytetracker = BYTETracker(

video_path = '/content/gdrive/MyDrive/Project/fourperson.mp4'
output_video_path = "/content/gdrive/MyDrive/Project/tflitevideo.mp4"

# Initialize video capture from a file
cap = cv2.VideoCapture(video_path)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

new_width = frame_width // 2
new_height = frame_height // 2

fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4 format
out = cv2.VideoWriter(output_video_path, fourcc, fps, (new_width, new_height))

# Check if the video was opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")

# Initialize tracking data
unique_ids_seen = set()

# Track ID counter
tracker_id_counter = 0

# Process each frame from the video
while True:
    ret, frame =  # Read a frame from the video
    if not ret:
        print("End of video or error reading frame.")

    # Preprocess the input frame
    input_shape = input_details[0]['shape']
    resized_frame = cv2.resize(frame, (input_shape[1], input_shape[2]))

    # Normalize the input if float32 model
    input_data = np.array(resized_frame, dtype=np.float32) / 255.0
    input_data = np.expand_dims(input_data, axis=0)

    # Set the tensor
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Run the interpreter

    # Get the output
    output_data = interpreter.get_tensor(output_details[0]['index'])

    # Process each detection in output_data (assuming YOLO output format)
    output_data = np.squeeze(output_data)  # Remove batch dimension
    detections = []

    for i in range(output_data.shape[1]):  # Iterate over the 8400 anchor boxes
        # Extract box coordinates and confidence score
        x_center, y_center, width, height, confidence = output_data[:, i]

        # Only process boxes with a high enough confidence score
        if confidence > 0.5:  # Threshold can be adjusted
            # Convert center coordinates to corner coordinates
            x1 = int((x_center - width / 2) * frame_width)  # Scale back to original frame size
            y1 = int((y_center - height / 2) * frame_height)
            x2 = int((x_center + width / 2) * frame_width)
            y2 = int((y_center + height / 2) * frame_height)

            # Prepare detection for ByteTrack
            bbox = [x1, y1, x2, y2, confidence]  # Format detection as [x1, y1, x2, y2, score]

    # Convert detections to numpy array for ByteTrack
    detections_np = np.array(detections, dtype=np.float32)

    # Update the ByteTrack tracker with the detections for the current frame
    online_targets = bytetracker.update(detections_np, [frame_height, frame_width], [new_height, new_width])

    # Loop through each target to track and draw bounding boxes
    for target in online_targets:
        track_id = target.track_id  # Unique ID of the tracked person
        tlwh = target.tlwh  # Bounding box in the format [x, y, width, height]

        x1, y1, w, h = [int(coord) for coord in tlwh]
        x2 = x1 + w
        y2 = y1 + h

        # Draw the bounding box around the tracked person
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Display the tracking ID near the bounding box
        cv2.putText(frame, f'ID: {track_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)

        # Add unique ID to the set

    # Count the total number of unique persons seen so far
    total_person_count = len(unique_ids_seen)

    # Display the total unique person count in the frame
    cv2.putText(frame, f'Total Unique Persons: {total_person_count}', (50, 100),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Resize and write the frame to the output video
    resized_output_frame = cv2.resize(frame, (new_width, new_height))

    # Exit the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):

# Release the video capture and writer objects
print("Output video saved at:", output_video_path)

