Merge pull request #37 from huggingface/allow-fixed-shapes

Use fixed shapes
huggingface · Aug 2, 2023 · 2f5ac17 · 2f5ac17
2 parents 22d6c6e + e2b1832
commit 2f5ac17
Show file tree

Hide file tree

Showing 4 changed files with 43 additions and 26 deletions.
diff --git a/src/exporters/coreml/config.py b/src/exporters/coreml/config.py
@@ -181,17 +181,43 @@ def inputs(self) -> "OrderedDict[str, InputDescription]":
         return common_inputs
 
     @property
-    def inferSequenceLengthFromConfig(self) -> bool:
+    def infer_sequence_length_from_config(self) -> bool:
+        """When True, will use the max sequence length from the model configuration."""
         return False
 
     @property
-    def maxSequenceLength(self) -> int:
-        if self.inferSequenceLengthFromConfig:
+    def max_sequence_length(self) -> int:
+        """
+        Retrieve the max sequence length from the model configuration, or use a hardcoded value (currently 128).
+        This can be subclassed to support custom lengths.
+        """
+        if self.infer_sequence_length_from_config:
             # Alternatives such as `n_positions` are automatically mapped to `max_position_embeddings`
             if hasattr(self._config, "max_position_embeddings"):
                 return self._config.max_position_embeddings
         return 128
 
+    @property
+    def use_flexible_shapes(self) -> bool:
+        """
+        When True, inputs are allowed to use sequence lengths of `1` up to `maxSequenceLength`.
+        Unfortunately, this currently prevents the model from running on GPU or the Neural Engine.
+        We default to `False`, but this can be overridden in custom configurations.
+        """
+        return False
+
+    @property
+    def input_ids_sequence_length(self) -> Union[Tuple, int]:
+        """
+        Sequence lengths supported for the `input_ids`.
+
+        - When returning a tuple, flexible shapes will be used. The tuple must contain two items,
+        representing the minimum and maximum possible sequence lengths.
+        - When returning an `int`, a fixed sequence length will be used.
+        """
+        return (1, self.max_sequence_length) if self.use_flexible_shapes else self.max_sequence_length
+
+
     @property
     def _input_descriptions(self) -> "OrderedDict[str, InputDescription]":
         if self.modality in ["text", "audio"] and self.seq2seq == "decoder":
@@ -244,7 +270,7 @@ def _input_descriptions(self) -> "OrderedDict[str, InputDescription]":
                         InputDescription(
                             "input_ids",
                             "Indices of input sequence tokens in the vocabulary",
-                            sequence_length=(1, self.maxSequenceLength),
+                            sequence_length=self.input_ids_sequence_length,
                         )
                     ),
                     (
@@ -268,7 +294,7 @@ def _input_descriptions(self) -> "OrderedDict[str, InputDescription]":
                         InputDescription(
                             "input_ids",
                             "Indices of input sequence tokens in the vocabulary",
-                            sequence_length=(1, self.maxSequenceLength),
+                            sequence_length=self.input_ids_sequence_length,
                         )
                     ),
                     (
@@ -794,7 +820,7 @@ def _generate_dummy_image(
 
     def _get_max_sequence_length(self, input_desc, default_length):
         if input_desc.sequence_length is None:
-            return default_length
+            return self.max_sequence_length
         elif isinstance(input_desc.sequence_length, tuple):
             sequence_length = input_desc.sequence_length[-1]
             if sequence_length == -1:
@@ -851,8 +877,12 @@ def generate_dummy_inputs(
             sequence_length = self._get_max_sequence_length(input_desc, 64)
 
             # don't want encoder and decoder to use same sequence length
+            # (unless shapes are fixed)
             if self.seq2seq == "decoder":
-                encoder_sequence_length = sequence_length + 7
+                if isinstance(input_desc.sequence_length, tuple):
+                    encoder_sequence_length = sequence_length + 7
+                else:
+                    encoder_sequence_length = sequence_length
 
             if self.task == "multiple-choice":
                 shape = (batch_size, self._config.num_labels, sequence_length)

diff --git a/src/exporters/coreml/convert.py b/src/exporters/coreml/convert.py
@@ -90,7 +90,7 @@ def get_shape(config, input_desc, dummy_input, axis=-1):
     shape = list(default_shape)
 
     # Does the input shape need to be flexible?
-    if config.use_past or config.seq2seq:
+    if config.use_past:
         #shape[0] = ct.RangeDim()  # batch size  #TODO
         shape[axis] = ct.RangeDim()
         default_shape = None
@@ -160,7 +160,8 @@ def get_input_types(
             input_desc = input_descs["encoder_outputs"]
             shape = list(dummy_inputs["encoder_outputs"][0].shape)
             #shape[0] = ct.RangeDim()  # batch size  #TODO
-            shape[1] = ct.RangeDim()
+            # TODO: only disable if we are using fixed shapes (which could be part of the configuration)
+            # shape[1] = ct.RangeDim()
             input_types.append(
                 ct.TensorType(name=input_desc.name, shape=ct.Shape(shape), dtype=np.float32)
             )

diff --git a/src/exporters/coreml/models.py b/src/exporters/coreml/models.py
@@ -178,7 +178,7 @@ def inputs(self) -> OrderedDict[str, InputDescription]:
                         InputDescription(
                             "input_ids",
                             "Indices of input sequence tokens in the vocabulary",
-                            sequence_length=(1, 128),
+                            sequence_length=self.input_ids_sequence_length,
                         )
                     ),
                     (
@@ -201,13 +201,6 @@ class ErnieCoreMLConfig(CoreMLConfig):
 class GPT2CoreMLConfig(CoreMLConfig):
     modality = "text"
 
-    @property
-    def inputs(self) -> OrderedDict[str, InputDescription]:
-        input_descs = super().inputs
-        # TODO: coremltools blows up and uses infinite RAM with flexible input shape
-        input_descs["input_ids"].sequence_length = 128
-        return input_descs
-
     def patch_pytorch_ops(self):
         def _fill(context, node):
             from coremltools.converters.mil import Builder as mb
@@ -313,13 +306,6 @@ def einsum(context, node):
 class GPTNeoCoreMLConfig(CoreMLConfig):
     modality = "text"
 
-    @property
-    def inputs(self) -> OrderedDict[str, InputDescription]:
-        input_descs = super().inputs
-        # TODO: coremltools blows up and uses infinite RAM with flexible input shape
-        input_descs["input_ids"].sequence_length = 128
-        return input_descs
-
 
 class GPTNeoXCoreMLConfig(CoreMLConfig):
     modality = "text"
@@ -443,7 +429,7 @@ def _input_descriptions(self) -> OrderedDict[str, InputDescription]:
                         InputDescription(
                             "input_ids",
                             "Indices of input sequence tokens in the vocabulary",
-                            sequence_length=(1, 128),
+                            sequence_length=self.input_ids_sequence_length,
                         )
                     ),
                     (

diff --git a/tests/test_coreml.py b/tests/test_coreml.py
@@ -57,7 +57,7 @@ def test_sequence_length(self):
         self.assertEqual(len(flexible_output), 1)
         self.assertEqual(flexible_output[0]["axis"], 1)
         self.assertEqual(flexible_output[0]["min"], 1)
-        self.assertEqual(flexible_output[0]["max"], config.maxSequenceLength)
+        self.assertEqual(flexible_output[0]["max"], config.max_sequence_length)
 
         config = TextCoreMLConfig(None, task="text-classification")
         flexible_outputs = config.get_flexible_outputs()