admin管理员组

文章数量:1290935

I'm trying to do a very primitive reinforced learning model on tensorflow. Although it's relatively small, a single iteration takes ~6-7 seconds.

def build_model():
    model = keras.Sequential([
        layers.Input(shape=(400,)),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(3)
    ])
    modelpile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="huber")
    return model

class DQNAgent:
    def __init__(self):
        self.model = build_model()
        self.target_model = build_model()
        self.target_model.set_weights(self.model.get_weights())

        self.memory = deque(maxlen=1000)
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.gamma = 0.95
        self.batch_size = 32

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice([0, 1, 2])
        q_values = self.model.predict(np.array([state]), verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        states, targets = [], []

        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                target += self.gamma * np.max(self.target_model.predict(np.array([next_state]), verbose=0))

            q_values = self.model.predict(np.array([state]), verbose=0)
            q_values[0][action] = target

            states.append(state)
            targets.append(q_values[0])

        self.model.fit(np.array(states), np.array(targets), epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

After profiling all the given code, I saw that the model.predict() takes that much time to complete: Profiler results

Initially I thought I just need to compute on GPU, but after two days wasted trying to do so, nothing really changed. Does it really take that much time, or I messed smth up in the code?

GPU:Geforce 2060,
CPU:Intel Core i7, 
Windows 11,
Python:3.10
Tensorflow: 2.10

I'm trying to do a very primitive reinforced learning model on tensorflow. Although it's relatively small, a single iteration takes ~6-7 seconds.

def build_model():
    model = keras.Sequential([
        layers.Input(shape=(400,)),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(3)
    ])
    modelpile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="huber")
    return model

class DQNAgent:
    def __init__(self):
        self.model = build_model()
        self.target_model = build_model()
        self.target_model.set_weights(self.model.get_weights())

        self.memory = deque(maxlen=1000)
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.gamma = 0.95
        self.batch_size = 32

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice([0, 1, 2])
        q_values = self.model.predict(np.array([state]), verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        states, targets = [], []

        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                target += self.gamma * np.max(self.target_model.predict(np.array([next_state]), verbose=0))

            q_values = self.model.predict(np.array([state]), verbose=0)
            q_values[0][action] = target

            states.append(state)
            targets.append(q_values[0])

        self.model.fit(np.array(states), np.array(targets), epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

After profiling all the given code, I saw that the model.predict() takes that much time to complete: Profiler results

Initially I thought I just need to compute on GPU, but after two days wasted trying to do so, nothing really changed. Does it really take that much time, or I messed smth up in the code?

GPU:Geforce 2060,
CPU:Intel Core i7, 
Windows 11,
Python:3.10
Tensorflow: 2.10
Share Improve this question edited Feb 13 at 20:24 desertnaut 60.4k32 gold badges152 silver badges179 bronze badges asked Feb 13 at 17:07 ВладиславВладислав 11 bronze badge 1
  • For one, you should not call model.predict in a loop. You can just call the model directly, i.e. model(x) instead of model.predict(x). – xdurch0 Commented Feb 13 at 18:09
Add a comment  | 

1 Answer 1

Reset to default -1

Pretty silly of me, but I think the problem is that the net was learning every step, instead of accumulating it's decisions and then learn them. I overlooked that.

本文标签: pythonTensorflow perfomance issueStack Overflow