8.0b1 Release (#2232)

* 8.0b1 release * auto rerun flaky tests --------- Co-authored-by: Yifan Shen <[email protected]>
apple · Jun 10, 2024 · f391218 · f391218
1 parent 9883e8d
commit f391218
Show file tree

Hide file tree

Showing 325 changed files with 41,191 additions and 6,710 deletions.
diff --git a/NOTICE.txt b/NOTICE.txt
@@ -23,3 +23,26 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+
+
+
+This project contains content in the files coremltools/optimize/torch/layerwise_compression/_quant.py,
+coremltools/optimize/torch/layerwise_compression/algorithms.py,
+and coremltools/optimize/torch/layerwise_compression/layerwise_compressor.py which are adapted from
+gtpq (https://github.com/IST-DASLab/gptq/). It also contains content in the file coremltools/optimize/torch/layerwise_compression/algorithms.py which is adapted from sparsegpt (https://github.com/IST-DASLab/sparsegpt). The license for these follows:
+
+Apache License 2.0
+
+Copyright 2023 IST Austria Distributed Algorithms and Systems Lab
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	 http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/coremlpython/CoreMLPython.h b/coremlpython/CoreMLPython.h
@@ -15,11 +15,33 @@
 
 #import <CoreML/CoreML.h>
 
+
+#ifndef BUILT_WITH_MACOS15_SDK
+#define BUILT_WITH_MACOS15_SDK \
+  !(TARGET_OS_OSX && (!defined(__MAC_15_0) || __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_15_0))
+#endif
+
+// Print BUILT_WITH_MACOS15_SDK value
+#if BUILT_WITH_MACOS15_SDK
+#pragma message ("Building with macOS 15+ SDK")
+#else
+#pragma message ("Building without macOS 15 SDK")
+#endif
+
+
 namespace py = pybind11;
 
 namespace CoreML {
     namespace Python {
 
+
+        struct State {
+#if BUILT_WITH_MACOS15_SDK
+            // MLState must be wrapped in a C++ class for PyBind.
+            MLState* m_state = nil;
+#endif
+        };
+
         class Model {
         private:
             MLModel *m_model = nil;
@@ -35,13 +57,19 @@ namespace CoreML {
             Model(const Model&) = delete;
             Model& operator=(const Model&) = delete;
             ~Model();
-            explicit Model(const std::string& urlStr, const std::string& computeUnits);
+            explicit Model(const std::string& urlStr, const std::string& computeUnits, const std::string& functionName);
             explicit Model(MLModel* m_model, NSURL* compiledUrl, bool deleteCompiledModelOnExit);
 
-            py::dict predict(const py::dict& input) const;
             py::list batchPredict(const py::list& batch) const;
 
             py::str getCompiledModelPath() const;
+
+            py::dict predict(const py::dict& input, State* state=NULL) const;
+
+#if BUILT_WITH_MACOS15_SDK
+            State newState() const;
+#endif
+
         };
     }
 }
diff --git a/coremlpython/CoreMLPython.mm b/coremlpython/CoreMLPython.mm
@@ -42,7 +42,7 @@ bool usingMacOS13OrHigher() {
     }
 }
 
-Model::Model(const std::string& urlStr, const std::string& computeUnits) {
+Model::Model(const std::string& urlStr, const std::string& computeUnits, const std::string& functionName) {
     @autoreleasepool {
         NSError *error = nil;
 
@@ -80,6 +80,12 @@ bool usingMacOS13OrHigher() {
         MLModelConfiguration *configuration = [MLModelConfiguration new];
         setComputeUnit(configuration, computeUnits);
 
+        if (!functionName.empty()) {
+#if BUILT_WITH_MACOS15_SDK
+            configuration.functionName = [NSString stringWithUTF8String:functionName.c_str()];
+#endif
+        }
+
         // Create MLModel
         m_model = [MLModel modelWithContentsOfURL:compiledUrl configuration:configuration error:&error];
         Utils::handleError(error);
@@ -94,13 +100,28 @@ bool usingMacOS13OrHigher() {
 {
 }
 
-py::dict Model::predict(const py::dict& input) const {
+
+py::dict Model::predict(const py::dict& input, State* state) const {
     @autoreleasepool {
         NSError *error = nil;
         MLDictionaryFeatureProvider *inFeatures = Utils::dictToFeatures(input, &error);
         Utils::handleError(error);
-        id<MLFeatureProvider> outFeatures = [m_model predictionFromFeatures:static_cast<MLDictionaryFeatureProvider * _Nonnull>(inFeatures)
-                                                                      error:&error];
+
+        id<MLFeatureProvider> outFeatures;
+#if BUILT_WITH_MACOS15_SDK
+        if (state == NULL) {
+          outFeatures = [m_model predictionFromFeatures:static_cast<MLDictionaryFeatureProvider * _Nonnull>(inFeatures)
+                                                            error:&error];
+        } else {
+           outFeatures = [m_model predictionFromFeatures:static_cast<MLDictionaryFeatureProvider * _Nonnull>(inFeatures)
+                                                        usingState:state->m_state
+                                                             error:&error];
+        }
+#else
+        outFeatures = [m_model predictionFromFeatures:static_cast<MLDictionaryFeatureProvider * _Nonnull>(inFeatures)
+                                                error:&error];
+#endif
+
         Utils::handleError(error);
         return Utils::featuresToDict(outFeatures);
     }
@@ -163,6 +184,15 @@ bool usingMacOS13OrHigher() {
 }
 
 
+#if BUILT_WITH_MACOS15_SDK
+State Model::newState() const {
+    State result;
+    result.m_state = [m_model newState];
+    return result;
+}
+#endif
+
+
 py::bytes Model::autoSetSpecificationVersion(const py::bytes& modelBytes) {
 
     CoreML::Specification::Model model;
@@ -207,14 +237,20 @@ bool usingMacOS13OrHigher() {
     py::module m("libcoremlpython", "CoreML.Framework Python bindings");
 
     py::class_<Model>(m, "_MLModelProxy")
-        .def(py::init<const std::string&, const std::string&>())
+        .def(py::init<const std::string&, const std::string&, const std::string&>())
         .def("predict", &Model::predict)
         .def("batchPredict", &Model::batchPredict)
         .def("get_compiled_model_path", &Model::getCompiledModelPath)
         .def_static("auto_set_specification_version", &Model::autoSetSpecificationVersion)
         .def_static("maximum_supported_specification_version", &Model::maximumSupportedSpecificationVersion)
+#if BUILT_WITH_MACOS15_SDK
+        .def("newState", &Model::newState)
+#endif
         .def_static("compileModel", &Model::compileModel);
 
+
+    py::class_<State>(m, "_State", py::module_local());
+
     return m.ptr();
 }
 

diff --git a/coremltools/__init__.py b/coremltools/__init__.py
@@ -64,6 +64,9 @@
 # New versions for iOS 17.0
 _SPECIFICATION_VERSION_IOS_17 = 8
 
+# New versions for iOS 18.0
+_SPECIFICATION_VERSION_IOS_18 = 9
+
 
 class ComputeUnit(_Enum):
     '''
@@ -82,6 +85,7 @@ class ComputeUnit(_Enum):
     _SPECIFICATION_VERSION_IOS_15: "CoreML5",
     _SPECIFICATION_VERSION_IOS_16: "CoreML6",
     _SPECIFICATION_VERSION_IOS_17: "CoreML7",
+    _SPECIFICATION_VERSION_IOS_18: "CoreML8",
 }
 
 # Default specification version for each backend
@@ -94,7 +98,7 @@ class ComputeUnit(_Enum):
 # expose unified converter in coremltools package level
 from .converters import ClassifierConfig
 from .converters import ColorLayout as colorlayout
-from .converters import EnumeratedShapes, ImageType, RangeDim, Shape, TensorType, convert
+from .converters import EnumeratedShapes, ImageType, RangeDim, Shape, StateType, TensorType, convert
 from .converters.mil._deployment_compatibility import AvailableTarget as target
 from .converters.mil.mil.passes.defs import quantization as transform
 from .converters.mil.mil.passes.defs.quantization import ComputePrecision as precision

diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py
@@ -153,7 +153,7 @@ def __get_sklearn_version(version):
 
 # ---------------------------------------------------------------------------------------
 _HAS_TORCH = True
-_TORCH_MAX_VERSION = "2.2.0"
+_TORCH_MAX_VERSION = "2.3.0"
 _HAS_TORCH_EXPORT_API = False
 try:
     import torch

diff --git a/coremltools/converters/__init__.py b/coremltools/converters/__init__.py
@@ -4,16 +4,15 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 # expose directories as imports
-from . import libsvm
-from . import sklearn
-from . import xgboost
+from . import libsvm, sklearn, xgboost
 from ._converters_entry import convert
 from .mil import (
     ClassifierConfig,
     ColorLayout,
-    TensorType,
+    EnumeratedShapes,
     ImageType,
     RangeDim,
     Shape,
-    EnumeratedShapes,
+    StateType,
+    TensorType,
 )
diff --git a/coremltools/converters/_converters_entry.py b/coremltools/converters/_converters_entry.py
@@ -29,6 +29,7 @@
     InputType,
     RangeDim,
     Shape,
+    StateType,
     TensorType,
 )
 from coremltools.converters.mil.mil import Program, types
@@ -73,6 +74,7 @@ def convert(
     package_dir=None,
     debug=False,
     pass_pipeline: Optional[PassPipeline] = None,
+    states=None,
 ):
     """
     Convert a TensorFlow or PyTorch model to the Core ML model format as either
@@ -403,7 +405,7 @@ def skip_real_div_ops(op):
         returned.
 
         An enum with the following possible values:
-        
+
         * ``coremltools.ComputeUnit.ALL``: Use all compute units available, including the neural engine.
         * ``coremltools.ComputeUnit.CPU_ONLY``: Limit the model to only use the CPU.
         * ``coremltools.ComputeUnit.CPU_AND_GPU``: Use both the CPU and GPU, but not the neural engine.
@@ -477,6 +479,50 @@ def skip_real_div_ops(op):
 
              mlmodel = ct.convert(model, pass_pipeline=ct.PassPipeline.DEFAULT_PALETTIZATION)
 
+    states:
+        Create a stateful ``mlprogram`` model
+        by providing the ``StateType`` in the ``states`` argument (for details see `MIL Input Types <https://apple.github.io/coremltools/source/coremltools.converters.mil.input_types.html>`_).
+        The stateful model is useful when converting a large language model with KV-Cache.
+        The name of ``StateType`` must match the key of the PyTorch ``named_buffers()`` method in the source traced model.
+
+        The following example converts a torch model with a buffer called ``state_1``.
+
+        .. sourcecode:: python
+
+            class UpdateBufferModel(torch.nn.Module):
+                def __init__(self):
+                    super(UpdateBufferModel, self).__init__()
+                    self.register_buffer(
+                        "state_1", torch.tensor(np.array([0, 0, 0], dtype=np.float32))
+                    )
+
+                def forward(self, x):
+                    # In place update of the model state
+                    self.state_1.add_(x)
+                    return self.state_1
+
+
+            model = UpdateBufferModel()
+            traced_model = torch.jit.trace(model, torch.tensor([1, 2, 3], dtype=torch.float32))
+
+            inputs = [
+                ct.TensorType(shape=(1, 2)),
+            ]
+            states = [
+                ct.StateType(
+                    wrapped_type=ct.TensorType(
+                        shape=(1, 2),
+                    ),
+                    name="state_1",
+                ),
+            ]
+            mlmodel = ct.convert(
+                traced_model,
+                inputs=inputs,
+                states=states,
+                minimum_deployment_target=ct.target.iOS18,
+            )
+
     Returns
     -------
 
@@ -526,8 +572,7 @@ def skip_real_div_ops(op):
             >>> results = mlmodel.predict({"input": example_input.numpy()})
             >>> print(results['1651']) # 1651 is the node name given by PyTorch's JIT
 
-    See `Conversion Options <https://apple.github.io/coremltools/docs-guides/source/conversion-options.html>`_ for
-    more advanced options.
+    For more options see `Conversion Options <https://apple.github.io/coremltools/docs-guides/source/conversion-options.html>`_.
     """
     _check_deployment_target(minimum_deployment_target)
     outputs_as_strings, outputs_as_tensor_or_image_types = _validate_outputs_argument(outputs)
@@ -578,6 +623,15 @@ def skip_real_div_ops(op):
         and need_fp16_cast_pass
     )
 
+    # Verify the inputs cannot contains state
+    if states is None:
+        states = []
+    _verify_inputs_doesnot_contains_states(inputs)
+
+    # states can only passed if the source is pytorch
+    if len(states) > 0 and exact_source != "pytorch":
+        raise ValueError("'states' can only be passed with pytorch source model.")
+
     mlmodel = mil_convert(
         model,
         convert_from=exact_source,
@@ -592,6 +646,7 @@ def skip_real_div_ops(op):
         specification_version=specification_version,
         main_pipeline=pass_pipeline,
         use_default_fp16_io=use_default_fp16_io,
+        states=states,
     )
 
     if exact_target == "mlprogram" and mlmodel._input_has_infinite_upper_bound():
@@ -658,6 +713,20 @@ def _check_deployment_target(minimum_deployment_target):
         raise TypeError(msg.format(minimum_deployment_target))
 
 
+def _verify_inputs_doesnot_contains_states(
+    inputs: List[InputType],
+) -> None:
+    """
+    Verify that StateType is not present in the inputs.
+    """
+    if inputs is None:
+        return
+
+    for val in inputs:
+        if isinstance(val, StateType):
+            raise ValueError("'inputs' cannot contain an instance of StateType.")
+
+
 def _validate_outputs_argument(outputs):
     """
     - validate properties that the "outputs" argument must satisfy, for instance, it should either be a list
@@ -848,9 +917,9 @@ def _flatten_list(_inputs):
 
     elif exact_source == "pytorch":
         if _HAS_TORCH_EXPORT_API and isinstance(model, ExportedProgram):
-            if model.dialect != "EDGE":
+            if model.dialect not in ("ATEN", "EDGE"):
                 raise NotImplementedError(
-                    f"Conversion for models with only EDGE dialect is supported/tested. Provided Dialect: {model.dialect}"
+                    f"Conversion for models with only ATEN or EDGE dialect is supported/tested. Provided Dialect: {model.dialect}"
                 )
 
             # TODO: rdar://115845792 ([Executorch] Handle user provided inputs/outputs in the convert API)

diff --git a/coremltools/converters/mil/Makefile b/coremltools/converters/mil/Makefile
@@ -8,7 +8,6 @@ SRC_PACKAGES=.
 
 TF_IOS13_TEST=../tensorflow/test
 MIL_TEST="."
-MIL_TEST_INTERNAL="../../../../coremltools-internal/coremltools_internal/converters/mil"
 
 .PHONY: all lint test style checkstyle
 
@@ -26,7 +25,7 @@ lint:
 	${PYTHON} -m pylint -j 0 ${SRC_PACKAGES}
 
 test:
-	${PYTHON} -m pytest -W ignore::DeprecationWarning ${MIL_TEST} ${MIL_TEST_INTERNAL}
+	${PYTHON} -m pytest -W ignore::DeprecationWarning ${MIL_TEST}
 
 test_ref:
 	${PYTHON} -m pytest -W ignore::DeprecationWarning ${TF_IOS13_TEST}
diff --git a/coremltools/converters/mil/__init__.py b/coremltools/converters/mil/__init__.py
@@ -11,6 +11,6 @@
                   get_existing_symbol, get_new_symbol, get_new_variadic_symbol,
                   mil_list, register_op)
 from .input_types import (ClassifierConfig, ColorLayout, EnumeratedShapes,
-                          ImageType, InputType, RangeDim, Shape, TensorType)
+                          ImageType, InputType, RangeDim, Shape, TensorType, StateType)
 from .frontend.tensorflow.tf_op_registry import register_tf_op
 from .frontend.torch import register_torch_op