obsInfo = rlNumericSpec([3 1]);
actInfo = rlNumericSpec([1 1]);
env = rlSimulinkEnv(mdl, [mdl '/Subsystem2/RL Agent'], obsInfo, actInfo);
featureInputLayer(prod(obsInfo.Dimension))
actorNet = dlnetwork(actorLayers);
actor = rlContinuousDeterministicActor( ...
featureInputLayer(prod(actInfo.Dimension))
criticNet = dlnetwork(criticLayers);
critic = rlQValueFunction(...
actorOpts = rlOptimizerOptions('LearnRate', 1e-4, 'GradientThreshold', 0.3);
criticOpts = rlOptimizerOptions('LearnRate', 1e-3, 'GradientThreshold', 0.2);
agentOpts = rlDDPGAgentOptions( ...
'MiniBatchSize', 256, ...
'DiscountFactor', 0.99, ...
'ExperienceBufferLength', 1e6, ...
'ActorOptimizerOptions', actorOpts, ...
'CriticOptimizerOptions', criticOpts, ...
'UseTargetNetwork', true, ...
'TargetSmoothFactor', 1e-3, ...
agent = rlDDPGAgent(actor, critic, agentOpts);
trainOpts = rlTrainingOptions( ...
'MaxStepsPerEpisode', 800, ...
'StopTrainingCriteria', 'AverageReward', ...
'StopTrainingValue', 2000, ...
'SaveAgentCriteria', 'AverageReward', ...
trainingStats = train(agent, env, trainOpts);
simOptions = rlSimulationOptions('MaxSteps', 1000);
sim(env, agent, simOptions);
obsInputLayer = featureInputLayer(prod(obsInfo.Dimension),Name="obsInput");
actInputLayer = featureInputLayer(prod(actInfo.Dimension),Name="actInput");
criticLayers = [concatenationLayer(1,2,Name="concat")
fullyConnectedLayer(200, 'Name', 'fc1')
reluLayer('Name', 'relu1')
fullyConnectedLayer(200, 'Name', 'fc2')
reluLayer('Name', 'relu2')
fullyConnectedLayer(1, 'Name', 'qValue')];
criticNet = addLayers(criticNet, obsInputLayer);
criticNet = addLayers(criticNet, actInputLayer);
criticNet = addLayers(criticNet, criticLayers);
criticNet = connectLayers(criticNet,"obsInput","concat/in1");
criticNet = connectLayers(criticNet,"actInput","concat/in2");
critic = rlQValueFunction(criticNet, obsInfo, actInfo);