diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs new file mode 100644 index 00000000..70c195fc --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs @@ -0,0 +1,131 @@ +using SharpNeat.Evaluation; +using SharpNeat.EvolutionAlgorithm; +using SharpNeat.Neat.Genome; +using SharpNeat.Tasks.Gymnasium; +using SharpNeat.Windows; + +namespace SharpNeat.Tasks.Windows.Gymnasium; + +public class GymnasiumControl : GenomeControl +{ + readonly IGenomeDecoder,IBlackBox> _genomeDecoder; + + // The agent used by the simulation thread. + volatile IBlackBox _agent; + readonly bool _initializing = true; + + // Thread for running simulation. + readonly Thread _simThread; + + // Indicates whether a simulation is running. Access is thread synchronised using Interlocked. + volatile bool _terminateSimThread; + + // Event that signals simulation thread to start a simulation. + readonly AutoResetEvent _simStartEvent = new(false); + readonly ManualResetEvent _simNotRunningEvent = new(false); + + /// + /// Constructs a new instance of . + /// + /// Genome decoder. + public GymnasiumControl( + IGenomeDecoder, IBlackBox> genomeDecoder) + { + _genomeDecoder = genomeDecoder ?? throw new ArgumentNullException(nameof(genomeDecoder)); + + try + { + InitializeComponent(); + + // Create background thread for running simulation alongside NEAT algorithm. + _simThread = new(new ThreadStart(SimulationThread)) + { + IsBackground = true + }; + _simThread.Start(); + } + finally + { + _initializing = false; + } + } + + public override void OnGenomeUpdated() + { + base.OnGenomeUpdated(); + + // Get a local reference to avoid possible race conditions on the class field. + IGenome genome = _genome; + + if(genome is null || _terminateSimThread || _initializing) + return; + + // Dispose any existing agent. + var existingAgent = _agent; + Thread.MemoryBarrier(); + existingAgent?.Dispose(); + + // Decode the genome, and store the resulting IBlackBox agent in an instance field. + NeatGenome neatGenome = genome as NeatGenome; + _agent = _genomeDecoder.Decode(neatGenome); + + // Signal simulation thread to start running a one simulation. + _simStartEvent.Set(); + } + + #region Private Methods [Windows.Forms Designer Code] + + private void InitializeComponent() + { + } + + #endregion + + /// + /// Simulate prey capture until thread is terminated. + /// + private void SimulationThread() + { + // Wait to be signaled to start the next trial run. + _simStartEvent.WaitOne(); + + IBlackBox agent = _agent; + + // Clear any prior agent state. + agent.Reset(); + + while (true) + { + // Check if we have been signaled to terminate before starting a simulation run. + if(_terminateSimThread) + break; + + var episode = new GymnasiumEpisode(24, 4, true, true); + episode.Evaluate(agent); + + // Test if the thread should be terminated. + if (_terminateSimThread) + break; + } + + // Signal any thread waiting for this simulation thread to terminate. + _simNotRunningEvent.Set(); + } + + protected override void Dispose(bool disposing) + { + if( disposing ) + { + // Signal the simulation thread to terminate, and wait for it to terminate. + _terminateSimThread = true; + _simStartEvent.Set(); + _simNotRunningEvent.WaitOne(); + + base.Dispose(disposing); + + _agent.Dispose(); + _simStartEvent.Dispose(); + _simNotRunningEvent.Dispose(); + } + } +} diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx new file mode 100644 index 00000000..f298a7be --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.resx @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs new file mode 100644 index 00000000..a6f8a4a2 --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUi.cs @@ -0,0 +1,27 @@ +using SharpNeat.Experiments; +using SharpNeat.Neat.Genome.Double; +using SharpNeat.Windows; +using SharpNeat.Windows.Neat; + +namespace SharpNeat.Tasks.Windows.Gymnasium; + +public sealed class GymnasiumExperimentUi : NeatExperimentUi +{ + readonly INeatExperiment _neatExperiment; + + public GymnasiumExperimentUi( + INeatExperiment neatExperiment) + { + _neatExperiment = neatExperiment ?? throw new ArgumentNullException(nameof(neatExperiment)); + } + + /// + public override GenomeControl CreateTaskControl() + { + var genomeDecoder = NeatGenomeDecoderFactory.CreateGenomeDecoder( + _neatExperiment.IsAcyclic, + _neatExperiment.EnableHardwareAcceleratedNeuralNets); + + return new GymnasiumControl(genomeDecoder); + } +} diff --git a/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs new file mode 100644 index 00000000..50e27e38 --- /dev/null +++ b/src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumExperimentUiFactory.cs @@ -0,0 +1,20 @@ +using SharpNeat.Experiments; +using SharpNeat.Experiments.ConfigModels; +using SharpNeat.IO; +using SharpNeat.Windows.Experiments; + +namespace SharpNeat.Tasks.Windows.Gymnasium; + +public sealed class GymnasiumExperimentUiFactory : IExperimentUiFactory +{ + /// + public IExperimentUi CreateExperimentUi( + INeatExperiment neatExperiment, + Stream jsonConfigStream) + { + // Load experiment JSON config. + ExperimentConfig experimentConfig = JsonUtils.Deserialize(jsonConfigStream); + + return new GymnasiumExperimentUi(neatExperiment); + } +} diff --git a/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj b/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj index f9475473..f0de1970 100644 --- a/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj +++ b/src/SharpNeat.Tasks.Windows/SharpNeat.Tasks.Windows.csproj @@ -19,4 +19,10 @@ + + + UserControl + + + diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs new file mode 100644 index 00000000..480cd369 --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs @@ -0,0 +1,163 @@ +using System.Diagnostics; +using System.Globalization; +using System.IO.Pipes; +using SharpNeat.Evaluation; + +namespace SharpNeat.Tasks.Gymnasium; + +public sealed class GymnasiumEpisode +{ + private readonly int _inputCount; + private readonly int _outputCount; + private readonly bool _isContinuous; + private readonly bool _test; + + public GymnasiumEpisode(int inputCount, int outputCount, bool isContinuous, bool test) + { + _inputCount = inputCount; + _outputCount = outputCount; + _isContinuous = isContinuous; + _test = test; + } + + public FitnessInfo Evaluate(IBlackBox phenome) + { + var uuid = Guid.NewGuid(); + + var start = new ProcessStartInfo + { + FileName = @"pythonw.exe", + WorkingDirectory = @"./", + Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1} -test False", uuid.ToString(), _test), + UseShellExecute = false, + RedirectStandardOutput = false + }; + + var process = Process.Start(start) ?? throw new InvalidOperationException("No process resource is started"); + var totalReward = 0.0; + + try + { + var namedPipeClientStream = new NamedPipeClientStream(".", $"gymnasium_pipe_{uuid}", PipeDirection.InOut); + namedPipeClientStream.Connect(10000); + namedPipeClientStream.ReadMode = PipeTransmissionMode.Message; + + // Clear any prior agent state. + phenome.Reset(); + + while (true) + { + // Determine agent sensor input values. + // Reset all inputs. + var inputs = phenome.Inputs.Span; + inputs.Clear(); + + var (observation, rewardArray, doneArray) = ReadObservation(namedPipeClientStream, _inputCount); + totalReward = rewardArray[0]; + var done = doneArray[0]; + + if (done == 1) + { + break; + } + + observation.CopyTo(phenome.Inputs); + phenome.Activate(); + + // var clampedOutputs = outputs.Select(output => Math.Clamp(output, -1.0, 1.0)).ToArray(); + if (_isContinuous) + { + var outputBuffer = new byte[_outputCount * sizeof(float)]; + var outputs = new double[_outputCount]; + phenome.Outputs.CopyTo(outputs); + Buffer.BlockCopy(Array.ConvertAll(outputs, x => (float)x), 0, outputBuffer, 0, outputBuffer.Length); + namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length); + } + else + { + var maxSigIndex = ReadMaxSigIndex(phenome); + var outputBuffer = new byte[sizeof(int)]; + Buffer.BlockCopy(new int[] { maxSigIndex }, 0, outputBuffer, 0, outputBuffer.Length); + namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length); + } + } + + namedPipeClientStream.Close(); + } + catch + { + if (!_test) + { + throw; + } + } + finally + { + process.WaitForExit(); + } + + var maskedReward = totalReward < 1 ? Math.Pow(Math.E, totalReward - 1) : totalReward; + return new FitnessInfo(maskedReward); + } + + private static (double[] observation, double[] reward, int[] done) ReadObservation(Stream namedPipeClientStream, int count) + { + var count0 = count * sizeof(double); + const int count1 = sizeof(double); + const int count2 = sizeof(int); + var inputBuffer = new byte[count0 + count1 + count2]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + var observation = new double[count]; + var reward = new double[1]; + var done = new int[1]; + var offset1 = count0; + var offset2 = count0 + count1; + Buffer.BlockCopy(inputBuffer, 0, observation, 0, count0); + Buffer.BlockCopy(inputBuffer, offset1, reward, 0, count1); + Buffer.BlockCopy(inputBuffer, offset2, done, 0, count2); + return (observation, reward, done); + } + + private static double[] ReadDoubleArray(Stream namedPipeClientStream, int count) + { + var inputBuffer = new byte[count * sizeof(double)]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + var values = new double[inputBuffer.Length / sizeof(double)]; + Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(double)); + return values; + } + + private static float[] ReadFloatArray(Stream namedPipeClientStream, int count) + { + var inputBuffer = new byte[count * sizeof(float)]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + var values = new float[inputBuffer.Length / sizeof(float)]; + Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(float)); + return values; + } + + private static int[] ReadIntArray(Stream namedPipeClientStream, int count) + { + var inputBuffer = new byte[count * sizeof(int)]; + namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length); + var values = new int[inputBuffer.Length / sizeof(int)]; + Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(int)); + return values; + } + + private int ReadMaxSigIndex(IBlackBox phenome) + { + var maxSig = phenome.Outputs.Span[0]; + var maxSigIdx = 0; + + for (var i = 1; i < _outputCount; i++) + { + var v = phenome.Outputs.Span[i]; + if (!(v > maxSig)) continue; + maxSig = v; + maxSigIdx = i; + } + + return maxSigIdx; + } +} diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs new file mode 100644 index 00000000..6d5f4830 --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs @@ -0,0 +1,36 @@ +using SharpNeat.Evaluation; + +namespace SharpNeat.Tasks.Gymnasium; + +internal class GymnasiumEvaluationScheme : IBlackBoxEvaluationScheme +{ + /// + public int InputCount => 24; + + /// + public int OutputCount => 4; + + /// + public bool IsDeterministic => false; + + /// + public IComparer FitnessComparer => PrimaryFitnessInfoComparer.Singleton; + + /// + public FitnessInfo NullFitness => FitnessInfo.DefaultFitnessInfo; + + /// + public bool EvaluatorsHaveState => false; + + /// + public IPhenomeEvaluator> CreateEvaluator() + { + return new GymnasiumEvaluator(InputCount, OutputCount, true, false); + } + + /// + public bool TestForStopCondition(FitnessInfo fitnessInfo) + { + return (fitnessInfo.PrimaryFitness >= 300.0); + } +} diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs new file mode 100644 index 00000000..d70a1148 --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs @@ -0,0 +1,32 @@ +using SharpNeat.Evaluation; + +namespace SharpNeat.Tasks.Gymnasium; + +public sealed class GymnasiumEvaluator : IPhenomeEvaluator> +{ + private const int TrialsPerEvaluation = 1; + private readonly int _inputCount; + private readonly int _outputCount; + private readonly bool _isContinuous; + private readonly bool _test; + + public GymnasiumEvaluator(int inputCount, int outputCount, bool isContinuous, bool test) + { + _inputCount = inputCount; + _outputCount = outputCount; + _isContinuous = isContinuous; + _test = test; + } + + public FitnessInfo Evaluate(IBlackBox phenome) + { + var finesses = new List(); + for (var i = 0; i < TrialsPerEvaluation; i++) + { + var episode = new GymnasiumEpisode(_inputCount, _outputCount, _isContinuous, _test); + finesses.Add(episode.Evaluate(phenome)); + } + + return new FitnessInfo(finesses.Average(fitness => fitness.PrimaryFitness)); + } +} diff --git a/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs b/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs new file mode 100644 index 00000000..7f38b1ec --- /dev/null +++ b/src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs @@ -0,0 +1,40 @@ +using SharpNeat.Experiments; +using SharpNeat.Experiments.ConfigModels; +using SharpNeat.IO; +using SharpNeat.NeuralNets; + +namespace SharpNeat.Tasks.Gymnasium; + +internal class GymnasiumExperimentFactory : INeatExperimentFactory +{ + /// + public string Id => "gymnasium"; + + /// + public INeatExperiment CreateExperiment(Stream jsonConfigStream) + { + // Load experiment JSON config. + ExperimentConfig experimentConfig = JsonUtils.Deserialize(jsonConfigStream); + + // Create an evaluation scheme object for the Gemnasium task. + var evalScheme = new GymnasiumEvaluationScheme(); + + // Create a NeatExperiment object with the evaluation scheme, + // and assign some default settings (these can be overridden by config). + var experiment = new NeatExperiment(evalScheme, Id) + { + IsAcyclic = true, + ActivationFnName = ActivationFunctionId.LeakyReLU.ToString() + }; + + // Apply configuration to the experiment instance. + experiment.Configure(experimentConfig); + return experiment; + } + + /// + public INeatExperiment CreateExperimentSinglePrecision(Stream jsonConfigStream) + { + throw new NotImplementedException(); + } +} diff --git a/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj b/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj index 887e0335..d6ac006d 100644 --- a/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj +++ b/src/SharpNeat.Windows.App/SharpNeat.Windows.App.csproj @@ -50,6 +50,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest @@ -71,6 +74,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest @@ -86,6 +92,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json b/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json new file mode 100644 index 00000000..a72909d2 --- /dev/null +++ b/src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json @@ -0,0 +1,34 @@ +{ + "name": "Gymnasium", + "isAcyclic": false, + "cyclesPerActivation": 1, + "activationFnName": "LeakyReLU", + "evolutionAlgorithm": { + "speciesCount": 15, + "elitismProportion": 0.5, + "selectionProportion": 0.5, + "offspringAsexualProportion": 0.5, + "offspringSexualProportion": 0.5, + "interspeciesMatingProportion": 0.01 + }, + "reproductionAsexual": { + "connectionWeightMutationProbability": 0.94, + "addNodeMutationProbability": 0.01, + "addConnectionMutationProbability": 0.025, + "deleteConnectionMutationProbability": 0.025 + }, + "reproductionSexual": { + "secondaryParentGeneProbability": 0.1 + }, + "populationSize": 225, + "initialInterconnectionsProportion": 0.05, + "connectionWeightScale": 5.0, + "complexityRegulationStrategy": { + "strategyName": "relative", + "relativeComplexityCeiling": 30, + "minSimplifcationGenerations": 10 + }, + "degreeOfParallelism": 16, + "enableHardwareAcceleratedNeuralNets": false, + "enableHardwareAcceleratedActivationFunctions": false +} \ No newline at end of file diff --git a/src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt b/src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt new file mode 100644 index 00000000..0ef4b5e2 --- /dev/null +++ b/src/SharpNeat.Windows.App/config/experiments-descriptions/gymnasium.txt @@ -0,0 +1 @@ +TBD \ No newline at end of file diff --git a/src/SharpNeat.Windows.App/config/experiments.json b/src/SharpNeat.Windows.App/config/experiments.json index 037f29fa..032dc8ac 100644 --- a/src/SharpNeat.Windows.App/config/experiments.json +++ b/src/SharpNeat.Windows.App/config/experiments.json @@ -143,6 +143,22 @@ "assemblyName": "SharpNeat.Tasks.Windows", "typeName": "SharpNeat.Tasks.Windows.GenerativeFunctionRegression.GenerativeFnRegressionUiFactory" } + }, + + // === Gymnasium === + { + "name": "Gymnasium", + "experimentFactory": { + "assemblyName": "SharpNeat.Tasks", + "typeName": "SharpNeat.Tasks.Gymnasium.GymnasiumExperimentFactory" + }, + "configFile": "config/experiments-config/gymnasium.config.json", + "descriptionFile": "config/experiments-descriptions/gymnasium.txt", + // UI settings. + "experimentUiFactory": { + "assemblyName": "SharpNeat.Tasks.Windows", + "typeName": "SharpNeat.Tasks.Windows.Gymnasium.GymnasiumExperimentUiFactory" + } } ] } diff --git a/src/SharpNeat.Windows.App/gymnasium/main.py b/src/SharpNeat.Windows.App/gymnasium/main.py new file mode 100644 index 00000000..f5fcf83d --- /dev/null +++ b/src/SharpNeat.Windows.App/gymnasium/main.py @@ -0,0 +1,149 @@ +import collections +import struct +import time +import traceback + +import gymnasium as gym +from gymnasium import spaces +from gymnasium.wrappers import ClipAction +import win32pipe +import win32file +from argparse import ArgumentParser +import numpy as np +import logging + +logging.basicConfig(filename='debug.log', encoding='utf-8', level=logging.FATAL) +logging.debug("start") + +parser = ArgumentParser() +parser.add_argument("-uuid", dest="uuid", default="test") +parser.add_argument("-render", dest="render", default="False") +parser.add_argument("-test", dest="test", default="True") +args = parser.parse_args() +render = args.render == "True" +test = args.test == "True" + +if not test: + pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid, + win32pipe.PIPE_ACCESS_DUPLEX, + win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT, + 1, 1024, 1024, 0, None) + logging.debug("Connecting pipe...") + win32pipe.ConnectNamedPipe(pipe, None) + logging.debug("Pipe connected") + +# env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None) +# env = gym.make("LunarLander-v2", render_mode="human" if render else None) +try: + # env = gym.make("LunarLander-v2", enable_wind=True, render_mode="human" if render else None) + env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None) + env = ClipAction(env) + + logging.debug("Environment created") + logging.debug("Environment action size: %s", str(env.action_space.shape[0])) + logging.debug("Environment action type: %s", str(env.action_space.dtype)) + logging.debug("Environment action type size: %s", str(env.action_space.dtype.itemsize)) + logging.debug("Environment action type char: %s", str(env.action_space.dtype.char)) +except Exception as e: + logging.error(e) + + +def run_episode(): + observation, info = env.reset() + + if not test: + logging.debug("Initial observation:", observation) + send_observation(observation, 0, False) + logging.debug("Initial observation sent") + + max_reward_history_len = 100 + total_reward = 0 + total_timesteps = 0 + latest_rewards = collections.deque(maxlen=max_reward_history_len) + + while 1: + logging.debug("Starting step") + + if not test: + a = read_action(env.action_space) + else: + a = env.action_space.sample() + + total_timesteps += 1 + + observation, reward, terminated, truncated, info = env.step(a) + logging.debug(observation) + + done = terminated or truncated + + # if reward != 0: + # print("reward %0.3f" % reward) + + total_reward += reward + latest_rewards.append(float(reward)) + + masked_done = done + + if total_timesteps >= max_reward_history_len: + low_performing = True + for historical_reward in latest_rewards: + if historical_reward > 0: + low_performing = False + break + if low_performing: + masked_done = True + + if not test: + send_observation(observation, float(total_reward), masked_done) + logging.debug("Observation sent") + + if render: + env.render() + time.sleep(0.01) + + if masked_done: + logging.debug("Terminated") + if not render: + # pipe.close() + env.close() + break + else: + env.close() + # env.reset() + # print(reward) + # input("Done") + # print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) + + +def send_observation(observation: np.array, reward: float, done: bool): + win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes( + np.array([int(done)]))) + + +def read_action(space: spaces.Space): + is_discrete = len(space.shape) == 0 + count = 1 if is_discrete else space.shape[0] + type_char = 'i' if is_discrete else space.dtype.char + item_size = 4 if is_discrete else space.dtype.itemsize + result, action_struct = win32file.ReadFile(pipe, item_size * count) + action_got = struct.unpack(count * type_char, action_struct) + return action_got[0] if is_discrete else action_got + + +def read_int_action(): + result, action_struct = win32file.ReadFile(pipe, 4) + action_got = struct.unpack('i', action_struct)[0] + return action_got + + +def read_float_action(count): + result, action_struct = win32file.ReadFile(pipe, 8 * count) + action_got = struct.unpack('dddd', action_struct) + return action_got + + +try: + run_episode() +except Exception as e: + logging.error(str(e)) + logging.error(traceback.format_exc())