Source code for irlc.ex13.dqn_network

# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
[docs] class DQNNetwork: """ A class representing a deep Q network. Note that this function is batched. I.e. ``s`` is assumed to be a numpy array of dimension ``batch_size x n`` The following example shows how you can evaluate the Q-values in a given state. An example: .. runblock:: pycon >>> from irlc.ex13.torch_networks import TorchNetwork >>> import gymnasium as gym >>> import numpy as np >>> env = gym.make("CartPole-v1") >>> Q = TorchNetwork(env, trainable=True, learning_rate=0.001) # DQN network requires an env to set network dimensions >>> batch_size = 32 # As an example >>> states = np.random.rand(batch_size, env.observation_space.shape[0]) # Creates some dummy input >>> states.shape # batch_size x n >>> qvals = Q(states) # Evaluate Q(s,a) >>> qvals.shape # This is a tensor of dimension batch_size x actions >>> print(qvals[0,1]) # Get Q(s_0, 1) >>> Y = np.random.rand(batch_size, env.action_space.n) # Generate target Q-values (training data) >>> Q.fit(states, Y) # Train the Q-network for 1 gradient descent step """
[docs] def update_Phi(self, source, tau=0.01): r""" Update (adapts) the weights in this network towards those in source by a small amount. For each weight :math:`w_i` in (this) network, and each corresponding weight :math:`w'_i` in the ``source`` network, the following Polyak update is performed: .. math:: w_i \leftarrow w_i + \tau (w'_i - w_i) :param source: Target network to update towards :param tau: Update rate (rate of change :math:`\\tau` :return: ``None`` """ raise NotImplementedError
def __call__(self, s): """ Evaluate the Q-values in the given (batched) state. :param s: A matrix of size ``batch_size x n`` where :math:`n` is the state dimension. :return: The Q-values as a ``batch_size x d`` dimensional matrix where :math:`d` is the number of actions. """ raise NotImplementedError
[docs] def fit(self, s, target): r""" Fit the network weights by minimizing .. math:: \frac{1}{B}\sum_{i=1}^B \sum_{a=1}^K \| q_\phi(s_i)_a - y_{i,a} \|^2 where ``target`` corresponds to :math:`y` and is a ``[batch_size x actions]`` matrix of target Q-values. :param s: :param target: :return: """ raise NotImplementedError