From 24d1750230ad0e23cc13bdc05f4a8682d204ff8c Mon Sep 17 00:00:00 2001 From: Illia Achour Date: Fri, 28 Apr 2023 22:17:50 +0200 Subject: [PATCH 1/3] Introduce initial Gymnasium task ecosystem --- .../Gymnasium/GymnasiumControl.cs | 133 ++++++++++++++ .../Gymnasium/GymnasiumControl.resx | 60 +++++++ .../Gymnasium/GymnasiumExperimentUi.cs | 27 +++ .../Gymnasium/GymnasiumExperimentUiFactory.cs | 20 +++ .../SharpNeat.Tasks.Windows.csproj | 6 + .../Gymnasium/GymnasiumEpisode.cs | 166 ++++++++++++++++++ .../Gymnasium/GymnasiumEvaluationScheme.cs | 35 ++++ .../Gymnasium/GymnasiumEvaluator.cs | 32 ++++ .../Gymnasium/GymnasiumExperimentFactory.cs | 40 +++++ .../SharpNeat.Windows.App.csproj | 9 + .../experiments-config/gymnasium.config.json | 34 ++++ .../experiments-descriptions/gymnasium.txt | 1 + .../config/experiments.json | 16 ++ src/SharpNeat.Windows.App/gymnasium/main.py | 131 ++++++++++++++ 14 files changed, 710 insertions(+) create mode 100644 src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs create mode 100644 src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx create mode 100644 src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs create mode 100644 src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs create mode 100644 src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs create mode 100644 src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs create mode 100644 src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs create mode 100644 src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs create mode 100644 src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json create mode 100644 src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt create mode 100644 src/SharpNeat.Windows.App/gymnasium/main.py diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs new file mode 100644 index 00000000..a006e14f --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs @@ -0,0 +1,133 @@ +using System.Windows.Forms; +using SharpNeat.Evaluation; +using SharpNeat.EvolutionAlgorithm; +using SharpNeat.Experiments; +using SharpNeat.Neat.Genome; +using SharpNeat.Tasks.Gymnasium; +using SharpNeat.Windows; + +namespace SharpNeat.Tasks.Windows.Gymnasium; + +public class GymnasiumControl : GenomeControl +{ + readonly IGenomeDecoder,IBlackBox> _genomeDecoder; + + // The agent used by the simulation thread. + volatile IBlackBox _agent; + readonly bool _initializing = true; + + // Thread for running simulation. + readonly Thread _simThread; + + // Indicates whether a simulation is running. Access is thread synchronised using Interlocked. + volatile bool _terminateSimThread; + + // Event that signals simulation thread to start a simulation. + readonly AutoResetEvent _simStartEvent = new(false); + readonly ManualResetEvent _simNotRunningEvent = new(false); + + /// + /// Constructs a new instance of . + /// + /// Genome decoder. + public GymnasiumControl( + IGenomeDecoder, IBlackBox> genomeDecoder) + { + _genomeDecoder = genomeDecoder ?? throw new ArgumentNullException(nameof(genomeDecoder)); + + try + { + InitializeComponent(); + + // Create background thread for running simulation alongside NEAT algorithm. + _simThread = new(new ThreadStart(SimulationThread)) + { + IsBackground = true + }; + _simThread.Start(); + } + finally + { + _initializing = false; + } + } + + public override void OnGenomeUpdated() + { + base.OnGenomeUpdated(); + + // Get a local reference to avoid possible race conditions on the class field. + IGenome genome = _genome; + + if(genome is null || _terminateSimThread || _initializing) + return; + + // Dispose any existing agent. + var existingAgent = _agent; + Thread.MemoryBarrier(); + existingAgent?.Dispose(); + + // Decode the genome, and store the resulting IBlackBox agent in an instance field. + NeatGenome neatGenome = genome as NeatGenome; + _agent = _genomeDecoder.Decode(neatGenome); + + // Signal simulation thread to start running a one simulation. + _simStartEvent.Set(); + } + + #region Private Methods [Windows.Forms Designer Code] + + private void InitializeComponent() + { + } + + #endregion + + /// + /// Simulate prey capture until thread is terminated. + /// + private void SimulationThread() + { + // Wait to be signalled to start the next trial run. + _simStartEvent.WaitOne(); + + IBlackBox agent = _agent; + + // Clear any prior agent state. + agent.Reset(); + + while (true) + { + // Check if we have been signalled to terminate before starting a simulation run. + if(_terminateSimThread) + break; + + var episode = new GymnasiumEpisode(24, 4, true, true); + episode.Evaluate(agent); + + // Test if the thread should be terminated. + if (_terminateSimThread) + break; + } + + // Signal any thread waiting for this simulation thread to terminate. + _simNotRunningEvent.Set(); + } + + protected override void Dispose(bool disposing) + { + if( disposing ) + { + // Signal the simulation thread to terminate, and wait for it to terminate. + _terminateSimThread = true; + _simStartEvent.Set(); + _simNotRunningEvent.WaitOne(); + + base.Dispose(disposing); + + _agent.Dispose(); + _simStartEvent.Dispose(); + _simNotRunningEvent.Dispose(); + } + } +} diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx new file mode 100644 index 00000000..f298a7be --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs new file mode 100644 index 00000000..a6f8a4a2 --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs @@ -0,0 +1,27 @@ +using SharpNeat.Experiments; +using SharpNeat.Neat.Genome.Double; +using SharpNeat.Windows; +using SharpNeat.Windows.Neat; + +namespace SharpNeat.Tasks.Windows.Gymnasium; + +public sealed class GymnasiumExperimentUi : NeatExperimentUi +{ + readonly INeatExperiment _neatExperiment; + + public GymnasiumExperimentUi( + INeatExperiment neatExperiment) + { + _neatExperiment = neatExperiment ?? throw new ArgumentNullException(nameof(neatExperiment)); + } + + /// + public override GenomeControl CreateTaskControl() + { + var genomeDecoder = NeatGenomeDecoderFactory.CreateGenomeDecoder( + _neatExperiment.IsAcyclic, + _neatExperiment.EnableHardwareAcceleratedNeuralNets); + + return new GymnasiumControl(genomeDecoder); + } +} diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs new file mode 100644 index 00000000..50e27e38 --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs @@ -0,0 +1,20 @@ +using SharpNeat.Experiments; +using SharpNeat.Experiments.ConfigModels; +using SharpNeat.IO; +using SharpNeat.Windows.Experiments; + +namespace SharpNeat.Tasks.Windows.Gymnasium; + +public sealed class GymnasiumExperimentUiFactory : IExperimentUiFactory +{ + /// + public IExperimentUi CreateExperimentUi( + INeatExperiment neatExperiment, + Stream jsonConfigStream) + { + // Load experiment JSON config. + ExperimentConfig experimentConfig = JsonUtils.Deserialize(jsonConfigStream); + + return new GymnasiumExperimentUi(neatExperiment); + } +} diff --git a/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj b/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj index f9475473..f0de1970 100644 --- a/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj +++ b/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj @@ -19,4 +19,10 @@ + + + UserControl + + + diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs new file mode 100644 index 00000000..59ec97ed --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs @@ -0,0 +1,166 @@ +using System.Diagnostics; +using System.Globalization; +using System.IO.Pipes; +using SharpNeat.Evaluation; + +namespace SharpNeat.Tasks.Gymnasium; + +public sealed class GymnasiumEpisode +{ + readonly int _inputCount; + readonly int _outputCount; + readonly bool _isContinious; + readonly bool _test; + + public GymnasiumEpisode(int inputCount, int outputCount, bool isContinious, bool test) + { + _inputCount = inputCount; + _outputCount = outputCount; + _isContinious = isContinious; + _test = test; + } + + public FitnessInfo Evaluate(IBlackBox phenome) + { + var uuid = Guid.NewGuid(); + + var start = new ProcessStartInfo + { + FileName = @"pythonw.exe", + WorkingDirectory = @"./", + Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1}", uuid.ToString(), _test), + UseShellExecute = false, + RedirectStandardOutput = false + }; + + var process = Process.Start(start) ?? throw new InvalidOperationException("No proccess resource is started"); + var totalReward = 0.0; + + try + { + var namedPipeClientStream = new NamedPipeClientStream(".", $"gymnasium_pipe_{uuid}", PipeDirection.InOut); + namedPipeClientStream.Connect(10000); + namedPipeClientStream.ReadMode = PipeTransmissionMode.Message; + + // Clear any prior agent state. + phenome.Reset(); + + while (true) + { + // Determine agent sensor input values. + // Reset all inputs. + var inputs = phenome.Inputs.Span; + inputs.Clear(); + + var observationTuple = ReadObservation(namedPipeClientStream, _inputCount); + var observation = observationTuple.observation; + totalReward = observationTuple.reward[0]; + var done = observationTuple.done[0]; + + if (done == 1) + { + break; + } + + observation.CopyTo(phenome.Inputs); + phenome.Activate(); + + // var clampedOutputs = outputs.Select(output => Math.Clamp(output, -1.0, 1.0)).ToArray(); + if (_isContinious) + { + var outputBuffer = new byte[_outputCount * sizeof(float)]; + var outputs = new double[_outputCount]; + phenome.Outputs.CopyTo(outputs); + Buffer.BlockCopy(Array.ConvertAll(outputs, x => (float)x), 0, outputBuffer, 0, outputBuffer.Length); + namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length); + } + else + { + int maxSigIndex = ReadMaxSigIndex(phenome); + var outputBuffer = new byte[sizeof(int)]; + Buffer.BlockCopy(new int[] { maxSigIndex }, 0, outputBuffer, 0, outputBuffer.Length); + namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length); + } + } + + namedPipeClientStream.Close(); + } + catch + { + if (!_test) + { + throw; + } + } + finally + { + process.WaitForExit(); + } + + var maskedReward = totalReward < 1 ? Math.Pow(Math.E, totalReward - 1) : totalReward; + return new FitnessInfo(maskedReward); + } + + static (double[] observation, double[] reward, int[] done) ReadObservation(NamedPipeClientStream namedPipeClientStream, int count) + { + var count0 = count * sizeof(double); + var count1 = sizeof(double); + var count2 = sizeof(int); + var inputBuffer = new byte[count0 + count1 + count2]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + double[] observation = new double[count]; + double[] reward = new double[1]; + int[] done = new int[1]; + var offset1 = count0; + var offset2 = count0 + count1; + Buffer.BlockCopy(inputBuffer, 0, observation, 0, count0); + Buffer.BlockCopy(inputBuffer, offset1, reward, 0, count1); + Buffer.BlockCopy(inputBuffer, offset2, done, 0, count2); + return (observation, reward, done); + } + + static double[] ReadDoubleArray(NamedPipeClientStream namedPipeClientStream, int count) + { + var inputBuffer = new byte[count * sizeof(double)]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + double[] values = new double[inputBuffer.Length / sizeof(double)]; + Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(double)); + return values; + } + + static float[] ReadFloatArray(NamedPipeClientStream namedPipeClientStream, int count) + { + var inputBuffer = new byte[count * sizeof(float)]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + float[] values = new float[inputBuffer.Length / sizeof(float)]; + Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(float)); + return values; + } + + static int[] ReadIntArray(NamedPipeClientStream namedPipeClientStream, int count) + { + var inputBuffer = new byte[count * sizeof(int)]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + int[] values = new int[inputBuffer.Length / sizeof(int)]; + Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(int)); + return values; + } + + int ReadMaxSigIndex(IBlackBox phenome) + { + double maxSig = phenome.Outputs.Span[0]; + int maxSigIdx = 0; + + for (int i = 1; i < _outputCount; i++) + { + double v = phenome.Outputs.Span[i]; + if (v > maxSig) + { + maxSig = v; + maxSigIdx = i; + } + } + + return maxSigIdx; + } +} diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs new file mode 100644 index 00000000..e53ce103 --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs @@ -0,0 +1,35 @@ +using SharpNeat.Evaluation; + +namespace SharpNeat.Tasks.Gymnasium; +internal class GymnasiumEvaluationScheme : IBlackBoxEvaluationScheme +{ + /// + public int InputCount => 24; + + /// + public int OutputCount => 4; + + /// + public bool IsDeterministic => true; + + /// + public IComparer FitnessComparer => PrimaryFitnessInfoComparer.Singleton; + + /// + public FitnessInfo NullFitness => FitnessInfo.DefaultFitnessInfo; + + /// + public bool EvaluatorsHaveState => false; + + /// + public IPhenomeEvaluator> CreateEvaluator() + { + return new GymnasiumEvaluator(InputCount, OutputCount, true, false); + } + + /// + public bool TestForStopCondition(FitnessInfo fitnessInfo) + { + return (fitnessInfo.PrimaryFitness >= 300.0); + } +} diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs new file mode 100644 index 00000000..dae7e638 --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs @@ -0,0 +1,32 @@ +using SharpNeat.Evaluation; + +namespace SharpNeat.Tasks.Gymnasium; + +public sealed class GymnasiumEvaluator : IPhenomeEvaluator> +{ + readonly int _inputCount; + readonly int _outputCount; + readonly bool _isContinious; + readonly bool _test; + readonly int _trialsPerEvaluation = 1; + + public GymnasiumEvaluator(int inputCount, int outputCount, bool isContinious, bool test) + { + _inputCount = inputCount; + _outputCount = outputCount; + _isContinious = isContinious; + _test = test; + } + + public FitnessInfo Evaluate(IBlackBox phenome) + { + var finesses = new List(); + for (int i = 0; i < _trialsPerEvaluation; i++) + { + var episode = new GymnasiumEpisode(_inputCount, _outputCount, _isContinious, _test); + finesses.Add(episode.Evaluate(phenome)); + } + + return new FitnessInfo(finesses.Average(finesses => finesses.PrimaryFitness)); + } +} diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs new file mode 100644 index 00000000..9ac4b053 --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs @@ -0,0 +1,40 @@ +using SharpNeat.Experiments; +using SharpNeat.Experiments.ConfigModels; +using SharpNeat.IO; +using SharpNeat.NeuralNets; + +namespace SharpNeat.Tasks.Gymnasium; + +internal class GymnasiumExperimentFactory : INeatExperimentFactory +{ + /// + public string Id => "gmnasium"; + + /// + public INeatExperiment CreateExperiment(Stream jsonConfigStream) + { + // Load experiment JSON config. + ExperimentConfig experimentConfig = JsonUtils.Deserialize(jsonConfigStream); + + // Create an evaluation scheme object for the Gemnasium task. + var evalScheme = new GymnasiumEvaluationScheme(); + + // Create a NeatExperiment object with the evaluation scheme, + // and assign some default settings (these can be overridden by config). + var experiment = new NeatExperiment(evalScheme, Id) + { + IsAcyclic = true, + ActivationFnName = ActivationFunctionId.LeakyReLU.ToString() + }; + + // Apply configuration to the experiment instance. + experiment.Configure(experimentConfig); + return experiment; + } + + /// + public INeatExperiment CreateExperimentSinglePrecision(Stream jsonConfigStream) + { + throw new NotImplementedException(); + } +} diff --git a/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj b/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj index 887e0335..d6ac006d 100644 --- a/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj +++ b/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj @@ -50,6 +50,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest @@ -71,6 +74,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest @@ -86,6 +92,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json b/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json new file mode 100644 index 00000000..dd243d9a --- /dev/null +++ b/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json @@ -0,0 +1,34 @@ +{ + "name": "Gymnasium", + "isAcyclic": true, + "cyclesPerActivation": 1, + "activationFnName": "LeakyReLU", + "evolutionAlgorithm": { + "speciesCount": 15, + "elitismProportion": 0.5, + "selectionProportion": 0.5, + "offspringAsexualProportion": 0.5, + "offspringSexualProportion": 0.5, + "interspeciesMatingProportion": 0.01 + }, + "reproductionAsexual": { + "connectionWeightMutationProbability": 0.94, + "addNodeMutationProbability": 0.01, + "addConnectionMutationProbability": 0.025, + "deleteConnectionMutationProbability": 0.025 + }, + "reproductionSexual": { + "secondaryParentGeneProbability": 0.1 + }, + "populationSize": 225, + "initialInterconnectionsProportion": 0.05, + "connectionWeightScale": 5.0, + "complexityRegulationStrategy": { + "strategyName": "relative", + "relativeComplexityCeiling": 30, + "minSimplifcationGenerations": 10 + }, + "degreeOfParallelism": 16, + "enableHardwareAcceleratedNeuralNets": false, + "enableHardwareAcceleratedActivationFunctions": false +} \ No newline at end of file diff --git a/src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt b/src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt new file mode 100644 index 00000000..0ef4b5e2 --- /dev/null +++ b/src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt @@ -0,0 +1 @@ +TBD \ No newline at end of file diff --git a/src/SharpNeat.Windows.App/config/experiments.json b/src/SharpNeat.Windows.App/config/experiments.json index 037f29fa..032dc8ac 100644 --- a/src/SharpNeat.Windows.App/config/experiments.json +++ b/src/SharpNeat.Windows.App/config/experiments.json @@ -143,6 +143,22 @@ "assemblyName": "SharpNeat.Tasks.Windows", "typeName": "SharpNeat.Tasks.Windows.GenerativeFunctionRegression.GenerativeFnRegressionUiFactory" } + }, + + // === Gymnasium === + { + "name": "Gymnasium", + "experimentFactory": { + "assemblyName": "SharpNeat.Tasks", + "typeName": "SharpNeat.Tasks.Gymnasium.GymnasiumExperimentFactory" + }, + "configFile": "config/experiments-config/gymnasium.config.json", + "descriptionFile": "config/experiments-descriptions/gymnasium.txt", + // UI settings. + "experimentUiFactory": { + "assemblyName": "SharpNeat.Tasks.Windows", + "typeName": "SharpNeat.Tasks.Windows.Gymnasium.GymnasiumExperimentUiFactory" + } } ] } diff --git a/src/SharpNeat.Windows.App/gymnasium/main.py b/src/SharpNeat.Windows.App/gymnasium/main.py new file mode 100644 index 00000000..bbef57e7 --- /dev/null +++ b/src/SharpNeat.Windows.App/gymnasium/main.py @@ -0,0 +1,131 @@ +import struct +import time +import traceback + +import gymnasium as gym +from gymnasium import spaces +from gymnasium.wrappers import ClipAction +import win32pipe +import win32file +from argparse import ArgumentParser +import numpy as np +import logging + + +logging.basicConfig(filename='debug.log', encoding='utf-8', level=logging.FATAL) +logging.debug("start") + +parser = ArgumentParser() +parser.add_argument("-uuid", dest="uuid") +parser.add_argument("-render", dest="render") +args = parser.parse_args() +render = args.render == "True" + +pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid, + win32pipe.PIPE_ACCESS_DUPLEX, + win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT, + 1, 1024, 1024, 0, None) +logging.debug("Connecting pipe...") +win32pipe.ConnectNamedPipe(pipe, None) +logging.debug("Pipe connected") + +# env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None) +# env = gym.make("LunarLander-v2", render_mode="human" if render else None) +try: + # env = gym.make("LunarLander-v2", enable_wind=True, render_mode="human" if render else None) + env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human" if render else None) + env = ClipAction(env) + + logging.debug("Environment created") + logging.debug("Environment action size: %s", str(env.action_space.shape[0])) + logging.debug("Environment action type: %s", str(env.action_space.dtype)) + logging.debug("Environment action type size: %s", str(env.action_space.dtype.itemsize)) + logging.debug("Environment action type char: %s", str(env.action_space.dtype.char)) +except Exception as e: + logging.error(e) + + +def run_episode(): + observation, info = env.reset() + + logging.debug("Initial observation:", observation) + send_observation(observation, 0, False) + logging.debug("Initial observation sent") + + total_reward = 0 + total_timesteps = 0 + + while 1: + logging.debug("Starting step") + a = read_action(env.action_space) + logging.debug("Action read:", a) + + total_timesteps += 1 + + observation, reward, terminated, truncated, info = env.step(a) + logging.debug(observation) + + done = terminated or truncated + + # if reward != 0: + # print("reward %0.3f" % reward) + + total_reward += reward + + masked_done = done + + # if render: + # masked_done = False + + send_observation(observation, float(total_reward), masked_done) + logging.debug("Observation sent") + + if render: + env.render() + time.sleep(0.02) + + if done: + logging.debug("Terminated") + if not render: + # pipe.close() + env.close() + break + else: + env.close() + # env.reset() + # print(reward) + # input("Done") + # print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) + + +def send_observation(observation: np.array, reward: float, done: bool): + win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes(np.array([int(done)]))) + + +def read_action(space: spaces.Space): + is_discrete = len(space.shape) == 0 + count = 1 if is_discrete else space.shape[0] + type_char = 'i' if is_discrete else space.dtype.char + item_size = 4 if is_discrete else space.dtype.itemsize + result, action_struct = win32file.ReadFile(pipe, item_size * count) + action_got = struct.unpack(count * type_char, action_struct) + return action_got[0] if is_discrete else action_got + + +def read_int_action(): + result, action_struct = win32file.ReadFile(pipe, 4) + action_got = struct.unpack('i', action_struct)[0] + return action_got + + +def read_float_action(count): + result, action_struct = win32file.ReadFile(pipe, 8 * count) + action_got = struct.unpack('dddd', action_struct) + return action_got + + +try: + run_episode() +except Exception as e: + logging.error(str(e)) + logging.error(traceback.format_exc()) From 36ffaa71ddc14b560eac74fa2fd15771a09cf13c Mon Sep 17 00:00:00 2001 From: Illia Achour Date: Fri, 28 Apr 2023 23:02:06 +0200 Subject: [PATCH 2/3] Clean up code --- src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs | 4 +--- src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs index a006e14f..12cdf118 100644 --- a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs @@ -1,7 +1,5 @@ -using System.Windows.Forms; -using SharpNeat.Evaluation; +using SharpNeat.Evaluation; using SharpNeat.EvolutionAlgorithm; -using SharpNeat.Experiments; using SharpNeat.Neat.Genome; using SharpNeat.Tasks.Gymnasium; using SharpNeat.Windows; diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs index e53ce103..40555fda 100644 --- a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs @@ -1,6 +1,7 @@ using SharpNeat.Evaluation; namespace SharpNeat.Tasks.Gymnasium; + internal class GymnasiumEvaluationScheme : IBlackBoxEvaluationScheme { /// From 698d4a47c6326965a3005e0a4b6aa164a1957e98 Mon Sep 17 00:00:00 2001 From: Illia Achour Date: Sun, 30 Apr 2023 03:23:09 +0200 Subject: [PATCH 3/3] Refine Gymnasium task --- .../Gymnasium/GymnasiumControl.cs | 4 +- .../Gymnasium/GymnasiumEpisode.cs | 69 +++++++++---------- .../Gymnasium/GymnasiumEvaluationScheme.cs | 2 +- .../Gymnasium/GymnasiumEvaluator.cs | 20 +++--- .../Gymnasium/GymnasiumExperimentFactory.cs | 2 +- .../experiments-config/gymnasium.config.json | 2 +- src/SharpNeat.Windows.App/gymnasium/main.py | 64 ++++++++++------- 7 files changed, 89 insertions(+), 74 deletions(-) diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs index 12cdf118..70c195fc 100644 --- a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs @@ -86,7 +86,7 @@ private void InitializeComponent() /// private void SimulationThread() { - // Wait to be signalled to start the next trial run. + // Wait to be signaled to start the next trial run. _simStartEvent.WaitOne(); IBlackBox agent = _agent; @@ -96,7 +96,7 @@ private void SimulationThread() while (true) { - // Check if we have been signalled to terminate before starting a simulation run. + // Check if we have been signaled to terminate before starting a simulation run. if(_terminateSimThread) break; diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs index 59ec97ed..480cd369 100644 --- a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs @@ -7,16 +7,16 @@ namespace SharpNeat.Tasks.Gymnasium; public sealed class GymnasiumEpisode { - readonly int _inputCount; - readonly int _outputCount; - readonly bool _isContinious; - readonly bool _test; + private readonly int _inputCount; + private readonly int _outputCount; + private readonly bool _isContinuous; + private readonly bool _test; - public GymnasiumEpisode(int inputCount, int outputCount, bool isContinious, bool test) + public GymnasiumEpisode(int inputCount, int outputCount, bool isContinuous, bool test) { _inputCount = inputCount; _outputCount = outputCount; - _isContinious = isContinious; + _isContinuous = isContinuous; _test = test; } @@ -28,12 +28,12 @@ public FitnessInfo Evaluate(IBlackBox phenome) { FileName = @"pythonw.exe", WorkingDirectory = @"./", - Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1}", uuid.ToString(), _test), + Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1} -test False", uuid.ToString(), _test), UseShellExecute = false, RedirectStandardOutput = false }; - var process = Process.Start(start) ?? throw new InvalidOperationException("No proccess resource is started"); + var process = Process.Start(start) ?? throw new InvalidOperationException("No process resource is started"); var totalReward = 0.0; try @@ -52,10 +52,9 @@ public FitnessInfo Evaluate(IBlackBox phenome) var inputs = phenome.Inputs.Span; inputs.Clear(); - var observationTuple = ReadObservation(namedPipeClientStream, _inputCount); - var observation = observationTuple.observation; - totalReward = observationTuple.reward[0]; - var done = observationTuple.done[0]; + var (observation, rewardArray, doneArray) = ReadObservation(namedPipeClientStream, _inputCount); + totalReward = rewardArray[0]; + var done = doneArray[0]; if (done == 1) { @@ -66,7 +65,7 @@ public FitnessInfo Evaluate(IBlackBox phenome) phenome.Activate(); // var clampedOutputs = outputs.Select(output => Math.Clamp(output, -1.0, 1.0)).ToArray(); - if (_isContinious) + if (_isContinuous) { var outputBuffer = new byte[_outputCount * sizeof(float)]; var outputs = new double[_outputCount]; @@ -76,7 +75,7 @@ public FitnessInfo Evaluate(IBlackBox phenome) } else { - int maxSigIndex = ReadMaxSigIndex(phenome); + var maxSigIndex = ReadMaxSigIndex(phenome); var outputBuffer = new byte[sizeof(int)]; Buffer.BlockCopy(new int[] { maxSigIndex }, 0, outputBuffer, 0, outputBuffer.Length); namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length); @@ -101,16 +100,16 @@ public FitnessInfo Evaluate(IBlackBox phenome) return new FitnessInfo(maskedReward); } - static (double[] observation, double[] reward, int[] done) ReadObservation(NamedPipeClientStream namedPipeClientStream, int count) + private static (double[] observation, double[] reward, int[] done) ReadObservation(Stream namedPipeClientStream, int count) { var count0 = count * sizeof(double); - var count1 = sizeof(double); - var count2 = sizeof(int); + const int count1 = sizeof(double); + const int count2 = sizeof(int); var inputBuffer = new byte[count0 + count1 + count2]; namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); - double[] observation = new double[count]; - double[] reward = new double[1]; - int[] done = new int[1]; + var observation = new double[count]; + var reward = new double[1]; + var done = new int[1]; var offset1 = count0; var offset2 = count0 + count1; Buffer.BlockCopy(inputBuffer, 0, observation, 0, count0); @@ -119,46 +118,44 @@ public FitnessInfo Evaluate(IBlackBox phenome) return (observation, reward, done); } - static double[] ReadDoubleArray(NamedPipeClientStream namedPipeClientStream, int count) + private static double[] ReadDoubleArray(Stream namedPipeClientStream, int count) { var inputBuffer = new byte[count * sizeof(double)]; namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); - double[] values = new double[inputBuffer.Length / sizeof(double)]; + var values = new double[inputBuffer.Length / sizeof(double)]; Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(double)); return values; } - static float[] ReadFloatArray(NamedPipeClientStream namedPipeClientStream, int count) + private static float[] ReadFloatArray(Stream namedPipeClientStream, int count) { var inputBuffer = new byte[count * sizeof(float)]; namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); - float[] values = new float[inputBuffer.Length / sizeof(float)]; + var values = new float[inputBuffer.Length / sizeof(float)]; Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(float)); return values; } - static int[] ReadIntArray(NamedPipeClientStream namedPipeClientStream, int count) + private static int[] ReadIntArray(Stream namedPipeClientStream, int count) { var inputBuffer = new byte[count * sizeof(int)]; namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); - int[] values = new int[inputBuffer.Length / sizeof(int)]; + var values = new int[inputBuffer.Length / sizeof(int)]; Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(int)); return values; } - int ReadMaxSigIndex(IBlackBox phenome) + private int ReadMaxSigIndex(IBlackBox phenome) { - double maxSig = phenome.Outputs.Span[0]; - int maxSigIdx = 0; + var maxSig = phenome.Outputs.Span[0]; + var maxSigIdx = 0; - for (int i = 1; i < _outputCount; i++) + for (var i = 1; i < _outputCount; i++) { - double v = phenome.Outputs.Span[i]; - if (v > maxSig) - { - maxSig = v; - maxSigIdx = i; - } + var v = phenome.Outputs.Span[i]; + if (!(v > maxSig)) continue; + maxSig = v; + maxSigIdx = i; } return maxSigIdx; diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs index 40555fda..6d5f4830 100644 --- a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs @@ -11,7 +11,7 @@ internal class GymnasiumEvaluationScheme : IBlackBoxEvaluationScheme public int OutputCount => 4; /// - public bool IsDeterministic => true; + public bool IsDeterministic => false; /// public IComparer FitnessComparer => PrimaryFitnessInfoComparer.Singleton; diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs index dae7e638..d70a1148 100644 --- a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs @@ -4,29 +4,29 @@ namespace SharpNeat.Tasks.Gymnasium; public sealed class GymnasiumEvaluator : IPhenomeEvaluator> { - readonly int _inputCount; - readonly int _outputCount; - readonly bool _isContinious; - readonly bool _test; - readonly int _trialsPerEvaluation = 1; + private const int TrialsPerEvaluation = 1; + private readonly int _inputCount; + private readonly int _outputCount; + private readonly bool _isContinuous; + private readonly bool _test; - public GymnasiumEvaluator(int inputCount, int outputCount, bool isContinious, bool test) + public GymnasiumEvaluator(int inputCount, int outputCount, bool isContinuous, bool test) { _inputCount = inputCount; _outputCount = outputCount; - _isContinious = isContinious; + _isContinuous = isContinuous; _test = test; } public FitnessInfo Evaluate(IBlackBox phenome) { var finesses = new List(); - for (int i = 0; i < _trialsPerEvaluation; i++) + for (var i = 0; i < TrialsPerEvaluation; i++) { - var episode = new GymnasiumEpisode(_inputCount, _outputCount, _isContinious, _test); + var episode = new GymnasiumEpisode(_inputCount, _outputCount, _isContinuous, _test); finesses.Add(episode.Evaluate(phenome)); } - return new FitnessInfo(finesses.Average(finesses => finesses.PrimaryFitness)); + return new FitnessInfo(finesses.Average(fitness => fitness.PrimaryFitness)); } } diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs index 9ac4b053..7f38b1ec 100644 --- a/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs @@ -8,7 +8,7 @@ namespace SharpNeat.Tasks.Gymnasium; internal class GymnasiumExperimentFactory : INeatExperimentFactory { /// - public string Id => "gmnasium"; + public string Id => "gymnasium"; /// public INeatExperiment CreateExperiment(Stream jsonConfigStream) diff --git a/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json b/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json index dd243d9a..a72909d2 100644 --- a/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json +++ b/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json @@ -1,6 +1,6 @@ { "name": "Gymnasium", - "isAcyclic": true, + "isAcyclic": false, "cyclesPerActivation": 1, "activationFnName": "LeakyReLU", "evolutionAlgorithm": { diff --git a/src/SharpNeat.Windows.App/gymnasium/main.py b/src/SharpNeat.Windows.App/gymnasium/main.py index bbef57e7..f5fcf83d 100644 --- a/src/SharpNeat.Windows.App/gymnasium/main.py +++ b/src/SharpNeat.Windows.App/gymnasium/main.py @@ -1,3 +1,4 @@ +import collections import struct import time import traceback @@ -11,29 +12,31 @@ import numpy as np import logging - logging.basicConfig(filename='debug.log', encoding='utf-8', level=logging.FATAL) logging.debug("start") parser = ArgumentParser() -parser.add_argument("-uuid", dest="uuid") -parser.add_argument("-render", dest="render") +parser.add_argument("-uuid", dest="uuid", default="test") +parser.add_argument("-render", dest="render", default="False") +parser.add_argument("-test", dest="test", default="True") args = parser.parse_args() render = args.render == "True" +test = args.test == "True" -pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid, - win32pipe.PIPE_ACCESS_DUPLEX, - win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT, - 1, 1024, 1024, 0, None) -logging.debug("Connecting pipe...") -win32pipe.ConnectNamedPipe(pipe, None) -logging.debug("Pipe connected") +if not test: + pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid, + win32pipe.PIPE_ACCESS_DUPLEX, + win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT, + 1, 1024, 1024, 0, None) + logging.debug("Connecting pipe...") + win32pipe.ConnectNamedPipe(pipe, None) + logging.debug("Pipe connected") # env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None) # env = gym.make("LunarLander-v2", render_mode="human" if render else None) try: # env = gym.make("LunarLander-v2", enable_wind=True, render_mode="human" if render else None) - env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human" if render else None) + env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None) env = ClipAction(env) logging.debug("Environment created") @@ -48,17 +51,23 @@ def run_episode(): observation, info = env.reset() - logging.debug("Initial observation:", observation) - send_observation(observation, 0, False) - logging.debug("Initial observation sent") + if not test: + logging.debug("Initial observation:", observation) + send_observation(observation, 0, False) + logging.debug("Initial observation sent") + max_reward_history_len = 100 total_reward = 0 total_timesteps = 0 + latest_rewards = collections.deque(maxlen=max_reward_history_len) while 1: logging.debug("Starting step") - a = read_action(env.action_space) - logging.debug("Action read:", a) + + if not test: + a = read_action(env.action_space) + else: + a = env.action_space.sample() total_timesteps += 1 @@ -71,20 +80,28 @@ def run_episode(): # print("reward %0.3f" % reward) total_reward += reward + latest_rewards.append(float(reward)) masked_done = done - # if render: - # masked_done = False + if total_timesteps >= max_reward_history_len: + low_performing = True + for historical_reward in latest_rewards: + if historical_reward > 0: + low_performing = False + break + if low_performing: + masked_done = True - send_observation(observation, float(total_reward), masked_done) - logging.debug("Observation sent") + if not test: + send_observation(observation, float(total_reward), masked_done) + logging.debug("Observation sent") if render: env.render() - time.sleep(0.02) + time.sleep(0.01) - if done: + if masked_done: logging.debug("Terminated") if not render: # pipe.close() @@ -99,7 +116,8 @@ def run_episode(): def send_observation(observation: np.array, reward: float, done: bool): - win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes(np.array([int(done)]))) + win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes( + np.array([int(done)]))) def read_action(space: spaces.Space):