diff --git a/.file-header b/.file-header
deleted file mode 100644
index 83eef0801ee..00000000000
--- a/.file-header
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright \(c\) (\d{4}|\d{4}-\d{4}) Intel Corporation
-# Licensed under the Apache License, Version 2.0 \(the "License"\);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
new file mode 100644
index 00000000000..f4e8bb6c774
--- /dev/null
+++ b/.github/workflows/mypy.yml
@@ -0,0 +1,23 @@
+name: mypy
+
+on:
+ pull_request:
+ types:
+ - opened
+ - reopened
+ - synchronize
+
+jobs:
+ mypy:
+ runs-on: ubuntu-20.04
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v3
+ with:
+ python-version: 3.8.10
+ - name: Install NNCF
+ run: pip install -e .
+ - name: Install mypy
+ run: pip install mypy==1.8.0
+ - name: Run mypy
+ run: mypy --install-types --config-file=.mypy.ini --non-interactive
diff --git a/.mypy.ini b/.mypy.ini
new file mode 100644
index 00000000000..42b6329f72c
--- /dev/null
+++ b/.mypy.ini
@@ -0,0 +1,9 @@
+[mypy]
+files = nncf/common/sparsity
+follow_imports = silent
+strict = True
+
+# should be removed later
+# mypy recommends the following tool as an autofix:
+# https://github.com/hauntsaninja/no_implicit_optional
+implicit_optional = True
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5066e543001..11dd747ce2e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,3 +26,4 @@ repos:
hooks:
- id: markdownlint
args: [--config=.markdownlint.yaml]
+
diff --git a/Makefile b/Makefile
index 205c19101da..3a39add44f8 100644
--- a/Makefile
+++ b/Makefile
@@ -23,6 +23,8 @@ install-pre-commit:
install-onnx-test:
pip install -U pip
pip install -e .[onnx]
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=omz-tools&subdirectory=tools/model_tools"
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=accuracy_checker&subdirectory=tools/accuracy_checker"
pip install -r tests/onnx/requirements.txt
pip install -r tests/cross_fw/install/requirements.txt
pip install -r tests/cross_fw/examples/requirements.txt
@@ -50,14 +52,14 @@ test-examples-onnx:
install-openvino-test:
pip install -U pip
pip install -e .[openvino]
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=omz-tools&subdirectory=tools/model_tools"
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=accuracy_checker&subdirectory=tools/accuracy_checker"
pip install tensorflow==2.12.0
pip install -r tests/openvino/requirements.txt
pip install -r tests/cross_fw/install/requirements.txt
pip install -r tests/cross_fw/examples/requirements.txt
install-openvino-dev: install-openvino-test install-pre-commit
- pip install -r examples/experimental/openvino/bert/requirements.txt
- pip install -r examples/experimental/openvino/yolo_v5/requirements.txt
pip install -r examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt
pip install -r examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt
pip install -r examples/post_training_quantization/openvino/yolov8/requirements.txt
@@ -81,6 +83,8 @@ test-examples-openvino:
install-tensorflow-test:
pip install -U pip
pip install -e .[tf]
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=omz-tools&subdirectory=tools/model_tools"
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=accuracy_checker&subdirectory=tools/accuracy_checker"
pip install -r tests/tensorflow/requirements.txt
pip install -r tests/cross_fw/install/requirements.txt
pip install -r tests/cross_fw/examples/requirements.txt
@@ -105,6 +109,8 @@ test-examples-tensorflow:
install-torch-test:
pip install -U pip
pip install -e .[torch] --index-url https://download.pytorch.org/whl/cu118 --extra-index-url=https://pypi.org/simple # ticket 119128
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=omz-tools&subdirectory=tools/model_tools"
+ pip install "git+https://github.com/openvinotoolkit/open_model_zoo.git@37f60eb#egg=accuracy_checker&subdirectory=tools/accuracy_checker"
pip install -r tests/torch/requirements.txt --index-url https://download.pytorch.org/whl/cu118 --extra-index-url=https://pypi.org/simple
pip install -r tests/cross_fw/install/requirements.txt
pip install -r tests/cross_fw/examples/requirements.txt
@@ -170,3 +176,12 @@ test-examples:
# Pre commit check
pre-commit:
pre-commit run -a
+
+
+###############################################################################
+# Fuzzing tests
+install-fuzz-test: install-common-test
+ pip install -r tests/cross_fw/sdl/fuzz/requirements.txt
+
+test-fuzz:
+ python tests/cross_fw/sdl/fuzz/quantize_api.py
diff --git a/README.md b/README.md
index 88765c60477..cc201f0a4ff 100644
--- a/README.md
+++ b/README.md
@@ -274,41 +274,45 @@ For a quicker start with NNCF-powered compression, try sample notebooks and scri
A collection of ready-to-run Jupyter* notebooks tutorials and demos are available to explain and display NNCF compression algorithms for optimizing models for inference with the OpenVINO Toolkit.
-| Notebook Tutorial Name | Compression Algorithm | Backend | Domain |
-|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------:|:----------:|:-----------------------------------:|
-| [BERT Quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/105-language-quantize-bert/105-language-quantize-bert.ipynb) | Post-Training Quantization | OpenVINO | NLP |
-| [MONAI Segmentation Model Quantization](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/110-ct-segmentation-quantize) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F110-ct-segmentation-quantize%2F110-ct-scan-live-inference.ipynb) | Post-Training Quantization | OpenVINO | Segmentation |
-| [PyTorch Model Quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/112-pytorch-post-training-quantization-nncf) | Post-Training Quantization | PyTorch | Image Classification |
-| [TensorFlow Model Quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) | Post-Training Quantization | Tensorflow | Image Classification |
-| [Migrating from POT to NNCF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-yolov5-quantization-migration) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/111-yolov5-quantization-migration/111-yolov5-quantization-migration.ipynb) | Post-Training Quantization | OpenVINO | Object detection |
-| [Quantization with Accuracy Control](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control) | Post-Training Quantization with Accuracy Control | OpenVINO | Speech-to-Text, Object Detection |
-| [TensorFlow Training-Time Compression](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) | Training-Time Compression | Tensorflow | Image Classification |
-| [Joint Pruning, Quantization and Distillation for BERT](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/116-sparsity-optimization) | Joint Pruning, Quantization and Distillation | OpenVINO | NLP |
+| Notebook Tutorial Name | Compression Algorithm | Backend | Domain |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------:|:----------:|:-----------------------------------:|
+| [BERT Quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/105-language-quantize-bert/105-language-quantize-bert.ipynb) | Post-Training Quantization | OpenVINO | NLP |
+| [MONAI Segmentation Model Quantization](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/110-ct-segmentation-quantize) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F110-ct-segmentation-quantize%2F110-ct-scan-live-inference.ipynb) | Post-Training Quantization | OpenVINO | Segmentation |
+| [PyTorch Model Quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/112-pytorch-post-training-quantization-nncf) | Post-Training Quantization | PyTorch | Image Classification |
+| [TensorFlow Model Quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) | Post-Training Quantization | Tensorflow | Image Classification |
+| [Migrating from POT to NNCF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-yolov5-quantization-migration) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/111-yolov5-quantization-migration/111-yolov5-quantization-migration.ipynb) | Post-Training Quantization | OpenVINO | Object detection |
+| [Quantization with Accuracy Control](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control) | Post-Training Quantization with Accuracy Control | OpenVINO | Speech-to-Text, Object Detection |
+| [PyTorch Training-Time Compression](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/302-pytorch-quantization-aware-training) | Training-Time Compression | PyTorch | Image Classification |
+| [TensorFlow Training-Time Compression](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) | Training-Time Compression | Tensorflow | Image Classification |
+| [Joint Pruning, Quantization and Distillation for BERT](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/116-sparsity-optimization) | Joint Pruning, Quantization and Distillation | OpenVINO | NLP |
Below is a list of notebooks demonstrating OpenVINO conversion and inference together with NNCF compression for models from various domains.
-| Demo Model | Compression Algorithm | Backend | Domain |
-|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------:|:-----------------------------------------------------------------:|
-| [YOLOv8](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-object-detection.ipynb) | Post-Training Quantization | OpenVINO | Object Detection, KeyPoint Detection, Instance Segmentation |
-| [YOLOv7](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/226-yolov7-optimization) | Post-Training Quantization | OpenVINO | Object Detection |
-| [Segment Anything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/237-segment-anything) | Post-Training Quantization | OpenVINO | Panoptic Segmentation |
-| [OneFormer](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/249-oneformer-segmentation) | Post-Training Quantization | OpenVINO | Panoptic Segmentation |
-| [InstructPix2Pix](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/231-instruct-pix2pix-image-editing) | Post-Training Quantization | OpenVINO | Image-to-Image |
-| [CLIP](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification) | Post-Training Quantization | OpenVINO | Image-to-Text |
-| [BLIP](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/233-blip-visual-language-processing) | Post-Training Quantization | OpenVINO | Image-to-Text |
-| [Latent Consistency Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/263-latent-consistency-models-image-generation) | Post-Training Quantization | OpenVINO | Text-to-Image |
-| [Würstchen](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/265-wuerstchen-image-generation) | Post-Training Quantization | OpenVINO | Text-to-Image |
-| [ControlNet QR Code Monster](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/264-qrcode-monster) | Post-Training Quantization | OpenVINO | Text-to-Image |
-| [SDXL-turbo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/271-sdxl-turbo) | Post-Training Quantization | OpenVINO | Text-to-Image, Image-to-Image |
-| [DeepFloyd IF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/238-deepfloyd-if) | Post-Training Quantization, Weight Compression | OpenVINO | Text-to-Image, Image-to-Image |
-| [ImageBind](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/239-image-bind) | Post-Training Quantization | OpenVINO | Multi-Modal Retrieval |
-| [Distil-Whisper](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/267-distil-whisper-asr) | Post-Training Quantization | OpenVINO | Speech-to-Text |
-| [Whisper](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/227-whisper-subtitles-generation) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/227-whisper-subtitles-generation/227-whisper-convert.ipynb) | Post-Training Quantization | OpenVINO | Speech-to-Text |
-| [MMS Speech Recognition](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/255-mms-massively-multilingual-speech) | Post-Training Quantization | OpenVINO | Speech-to-Text |
-| [Grammar Error Correction](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/214-grammar-correction) | Post-Training Quantization | OpenVINO | NLP, Grammar Correction |
-| [LLM Instruction Following](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/275-llm-question-answering) | Weight Compression | OpenVINO | NLP, Instruction Following |
-| [Dolly 2.0](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/240-dolly-2-instruction-following) | Weight Compression | OpenVINO | NLP, Instruction Following |
-| [LLM Chat Bots](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) | Weight Compression | OpenVINO | NLP, Chat Bot |
+| Demo Model | Compression Algorithm | Backend | Domain |
+|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------:|:---------:|:--------------------------------------------------------------------:|
+| [YOLOv8](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-object-detection.ipynb) | Post-Training Quantization | OpenVINO | Object Detection, KeyPoint Detection, Instance Segmentation |
+| [YOLOv7](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/226-yolov7-optimization) | Post-Training Quantization | OpenVINO | Object Detection |
+| [EfficientSAM](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/274-efficient-sam) | Post-Training Quantization | OpenVINO | Image Segmentation |
+| [Segment Anything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/237-segment-anything) | Post-Training Quantization | OpenVINO | Image Segmentation |
+| [OneFormer](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/249-oneformer-segmentation) | Post-Training Quantization | OpenVINO | Image Segmentation |
+| [InstructPix2Pix](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/231-instruct-pix2pix-image-editing) | Post-Training Quantization | OpenVINO | Image-to-Image |
+| [CLIP](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification) | Post-Training Quantization | OpenVINO | Image-to-Text |
+| [BLIP](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/233-blip-visual-language-processing) | Post-Training Quantization | OpenVINO | Image-to-Text |
+| [Segmind-VegaRT](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/248-stable-diffusion-xl/248-segmind-vegart.ipynb) | Post-Training Quantization | OpenVINO | Text-to-Image |
+| [Latent Consistency Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/263-latent-consistency-models-image-generation) | Post-Training Quantization | OpenVINO | Text-to-Image |
+| [Würstchen](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/265-wuerstchen-image-generation) | Post-Training Quantization | OpenVINO | Text-to-Image |
+| [ControlNet QR Code Monster](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/264-qrcode-monster) | Post-Training Quantization | OpenVINO | Text-to-Image |
+| [SDXL-turbo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/271-sdxl-turbo) | Post-Training Quantization | OpenVINO | Text-to-Image, Image-to-Image |
+| [DeepFloyd IF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/238-deepfloyd-if) | Post-Training Quantization, Weight Compression | OpenVINO | Text-to-Image, Image-to-Image |
+| [ImageBind](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/239-image-bind) | Post-Training Quantization | OpenVINO | Multi-Modal Retrieval |
+| [Distil-Whisper](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/267-distil-whisper-asr) | Post-Training Quantization | OpenVINO | Speech-to-Text |
+| [Whisper](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/227-whisper-subtitles-generation) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/227-whisper-subtitles-generation/227-whisper-convert.ipynb) | Post-Training Quantization | OpenVINO | Speech-to-Text |
+| [MMS Speech Recognition](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/255-mms-massively-multilingual-speech) | Post-Training Quantization | OpenVINO | Speech-to-Text |
+| [Grammar Error Correction](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/214-grammar-correction) | Post-Training Quantization | OpenVINO | NLP, Grammar Correction |
+| [LLM Instruction Following](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/275-llm-question-answering) | Weight Compression | OpenVINO | NLP, Instruction Following |
+| [Dolly 2.0](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/240-dolly-2-instruction-following) | Weight Compression | OpenVINO | NLP, Instruction Following |
+| [Stable-Zephyr-3b](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/273-stable-zephyr-3b-chatbot) | Weight Compression | OpenVINO | NLP, Chat Bot |
+| [LLM Chat Bots](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) | Weight Compression | OpenVINO | NLP, Chat Bot |
### Post-Training Quantization Examples
@@ -352,13 +356,6 @@ NNCF may be straightforwardly integrated into training/evaluation pipelines of t
NNCF is used as a compression backend within the renowned `transformers` repository in HuggingFace Optimum Intel.
-### Git patches for third-party repository
-
-See [third_party_integration](./third_party_integration) for examples of code modifications (Git patches and base commit IDs are provided) that are necessary to integrate NNCF into the following repositories:
-
-- [huggingface-transformers](third_party_integration/huggingface_transformers/README.md)
-**NOTE**: this patch is deprecated and will be removed from NNCF repository in future releases.
-
## Installation Guide
For detailed installation instructions please refer to the [Installation](./docs/Installation.md) page.
@@ -383,8 +380,6 @@ NNCF is also available via [conda](https://anaconda.org/conda-forge/nncf):
conda install -c conda-forge nncf
```
-You may also use one of the Dockerfiles in the [docker](./docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](#demos-tutorials-and-samples).
-
### System requirements
- Ubuntu\* 18.04 or later (64-bit)
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 86b10fe2a43..58b37cd0a1a 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,5 +1,42 @@
# Release Notes
+## New in Release 2.9.0
+
+Post-training Quantization:
+
+- Features:
+ - (OpenVINO) Added modified AWQ algorithm for 4-bit data-aware weights compression. This algorithm applied only for patterns `MatMul->Multiply->Matmul`. For that `awq` optional parameter has been added to `nncf.compress_weights()` and can be used to minimize accuracy degradation of compressed models (note that this option increases the compression time).
+ - (ONNX) Introduced support for the ONNX backend in the `nncf.quantize_with_accuracy_control()` method. Users can now perform quantization with accuracy control for `onnx.ModelProto`. By leveraging this feature, users can enhance the accuracy of quantized models while minimizing performance impact.
+ - (ONNX) Added an example based on the YOLOv8n-seg model for demonstrating the usage of quantization with accuracy control for the ONNX backend.
+ - (PT) Added SmoothQuant algorithm for PyTorch backend in `nncf.quantize()`.
+ - (OpenVINO) Added [an example](examples/llm_compression/openvino/tiny_llama_find_hyperparams) with the hyperparameters tuning for the TinyLLama model.
+ - Introduced the `nncf.AdvancedAccuracyRestorerParameters`.
+ - Introduced the `subset_size` option for the `nncf.compress_weights()`.
+ - Introduced `TargetDevice.NPU` as the replacement for `TargetDevice.VPU`.
+- Fixes:
+ - Fixed API Enums serialization/deserialization issue.
+ - Fixed issue with required arguments for `revert_operations_to_floating_point_precision` method.
+- Improvements:
+ - (ONNX) Aligned statistics collection with OpenVINO and PyTorch backends.
+ - Extended `nncf.compress_weights()` with Convolution & Embeddings compression in order to reduce memory footprint.
+- Deprecations/Removals:
+ - (OpenVINO) Removed outdated examples with `nncf.quantize()` for BERT and YOLOv5 models.
+ - (OpenVINO) Removed outdated example with `nncf.quantize_with_accuracy_control()` for SSD MobileNetV1 FPN model.
+ - (PyTorch) Deprecated the `binarization` algorithm.
+ - Removed Post-training Optimization Tool as OpenVINO backend.
+ - Removed Dockerfiles.
+ - `TargetDevice.VPU` was replaced by `TargetDevice.NPU`.
+- Tutorials:
+ - [Post-Training Optimization of Stable Diffusion v2 Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/236-stable-diffusion-v2/236-stable-diffusion-v2-text-to-image.ipynb)
+ - [Post-Training Optimization of DeciDiffusion Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/259-decidiffusion-image-generation/259-decidiffusion-image-generation.ipynb)
+ - [Post-Training Optimization of DepthAnything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/280-depth-anything/280-depth-anything.ipynb)
+ - [Post-Training Optimization of Stable Diffusion ControlNet Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/235-controlnet-stable-diffusion/235-controlnet-stable-diffusion.ipynb)
+
+Compression-aware training:
+
+- Fixes
+ - (PyTorch) Fixed issue with `NNCFNetworkInterface.get_clean_shallow_copy` missed arguments.
+
## New in Release 2.8.1
Post-training Quantization:
@@ -22,7 +59,7 @@ Post-training Quantization:
- (OpenVINO) Changed default OpenVINO opset from 9 to 13.
- Features:
- (OpenVINO) Added 4-bit data-aware weights compression. For that `dataset` optional parameter has been added to `nncf.compress_weights()` and can be used to minimize accuracy degradation of compressed models (note that this option increases the compression time).
- - (PyTorch) Added support for PyTorch models with shared weights and custom PyTorch modules in `nncf.compress_weights()`. The weights compression algorithm for PyTorch models is now based on tracing the model graph. The `dataset` parameter is now required in `nncf.compress_weights()` for the compression of PyTorch models.
+ - (PyTorch) Added support for PyTorch models with shared weights and custom PyTorch modules in nncf.compress_weights(). The weights compression algorithm for PyTorch models is now based on tracing the model graph. The dataset parameter is now required in nncf.compress_weights() for the compression of PyTorch models.
- (Common) Renamed the `nncf.CompressWeightsMode.INT8` to `nncf.CompressWeightsMode.INT8_ASYM` and introduce `nncf.CompressWeightsMode.INT8_SYM` that can be efficiently used with dynamic 8-bit quantization of activations.
The original `nncf.CompressWeightsMode.INT8` enum value is now deprecated.
- (OpenVINO) Added support for quantizing the ScaledDotProductAttention operation from OpenVINO opset 13.
diff --git a/docker/README.md b/docker/README.md
deleted file mode 100644
index 7a1f4d37465..00000000000
--- a/docker/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Using docker
-
-## Step 1. Install docker
-
-Review the instructions for installation docker [here](https://docs.docker.com/engine/install/ubuntu/) and configure Docker
-to use a proxy server as [here](https://docs.docker.com/network/proxy/#configure-the-docker-client).
-
-## Step 2. Install nvidia-docker
-
-*Skip this step if you don't have GPU.*
-
-Review the instructions for installation docker [here](https://github.com/NVIDIA/nvidia-docker).
-
-## Step 3. Build image
-
-In the project folder run in terminal:
-
-```bash
-sudo docker image build --network=host
-```
-
-Use `--network` to duplicate the network settings of your localhost into context build.
-
-## Step 4. Run container
-
-Run in terminal:
-
-```bash
-sudo docker run \
--it \
---name= \
---runtime=nvidia \
---network=host \
---shm-size=1g \
---ulimit memlock=-1 \
---mount type=bind,source=,target= \
---mount type=bind,source=,target=/home/nncf \
-
- ```
-
-You should not use `--runtime=nvidia` if you want to use `--cpu-only` mode.
-
-Use `--shm-size` to increase the size of the shared memory directory.
-
-Now you have a working container and you can run examples.
diff --git a/docker/onnx/openvinoep/Dockerfile b/docker/onnx/openvinoep/Dockerfile
deleted file mode 100644
index fd5a5d6a3e6..00000000000
--- a/docker/onnx/openvinoep/Dockerfile
+++ /dev/null
@@ -1,57 +0,0 @@
-FROM ubuntu:20.04
-
-ARG PIP_EXTRA_INDEX_URL
-ARG PIP_TRUSTED_HOST
-ARG http_proxy
-ARG https_proxy
-ARG no_proxy
-
-RUN echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" && \
- echo "PIP_TRUSTED_HOST=${PIP_TRUSTED_HOST}" && \
- echo "http_proxy=${http_proxy}" && \
- echo "https_proxy=${https_proxy}" && \
- echo "no_proxy=${no_proxy}" && \
-
-ARG DEBIAN_FRONTEND=noninteractive
-ARG TZ=Etc/UTC
-
-RUN apt-get update && apt-get -y install --no-install-recommends \
- git=2.25 \
- build-essential=12.8 \
- python3.8-dev=3.8 \
- python3.8-venv=3.8 \
- python3-opencv=4.2 \
- && rm -rf /var/lib/apt/lists/*
-
-# Add user
-ARG BUILD_UID=1001
-ARG BUILD_USER=onnxruntimedev
-
-RUN adduser --uid $BUILD_UID $BUILD_USER && \
- usermod -a -G video,users ${BUILD_USER}
-ENV WORKDIR_PATH /home/${BUILD_USER}
-
-# Copy nncf
-WORKDIR ${WORKDIR_PATH}/nncf
-COPY nncf nncf
-COPY examples examples
-COPY tests tests
-COPY setup.py ./
-COPY README.md ./
-COPY Makefile ./
-
-WORKDIR ${WORKDIR_PATH}
-RUN chown -R ${BUILD_USER}:${BUILD_USER} nncf
-
-USER ${BUILD_USER}
-
-# Create & activate venv
-ENV VIRTUAL_ENV=${WORKDIR_PATH}/venv
-RUN python3 -m venv $VIRTUAL_ENV
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-
-WORKDIR ${WORKDIR_PATH}/nncf
-ENV PYTHONPATH=$PYTHONPATH:${WORKDIR_PATH}/nncf
-RUN make install-onnx-dev
-
-WORKDIR ${WORKDIR_PATH}
diff --git a/docker/onnx/openvinoep/build.sh b/docker/onnx/openvinoep/build.sh
deleted file mode 100755
index b49c484f9a3..00000000000
--- a/docker/onnx/openvinoep/build.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-SCRIPT_DIR=$(dirname "$0")
-WORK_DIR="${SCRIPT_DIR}/../../../"
-
-cd $WORK_DIR && echo "WORK_DIR=$PWD"
-
-docker build -t onnx_ptq_experimental:dev \
- --build-arg http_proxy=$http_proxy \
- --build-arg https_proxy=$https_proxy \
- --build-arg no_proxy=$no_proxy \
- --build-arg PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \
- --build-arg PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST \
- -f docker/onnx/openvinoep/Dockerfile \
- .
diff --git a/docker/tensorflow/gpu/Dockerfile b/docker/tensorflow/gpu/Dockerfile
deleted file mode 100644
index 8f44ab59b0c..00000000000
--- a/docker/tensorflow/gpu/Dockerfile
+++ /dev/null
@@ -1,44 +0,0 @@
-FROM nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04
-
-RUN apt-get update \
- && apt-get install -y --no-install-recommends \
- apt-transport-https=2.0 \
- git=2.25 && \
- rm -rf /var/lib/apt/lists/*
-
-RUN apt-get update \
- && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
- build-essential=12.8 \
- libgl1-mesa-glx=21.2 \
- libglib2.0-dev=2.64 \
- wget=1.20 \
- curl=7.68 \
- zip=3.0 \
- unzip=6.0 \
- nano=4.8 \
- openssh-server=1:8.2 \
- openssh-client=1:8.2 \
- sudo=1.8 \
- python3=3.8 \
- python3-dev=3.8 \
- python3-pip=20.0 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /usr/bin
-RUN ln -s python3.8 python
-
-RUN pip3 install --no-cache-dir --upgrade pip==23.3 \
- && pip3 install --no-cache-dir setuptools==69.0
-
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
-ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
-
-# nvidia-container-runtime
-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
-
-ENTRYPOINT ["cd /home/nncf \
- && python setup.py install --tf \
- && pip3 install -r examples/tensorflow/requirements.txt \
- && bash"]
diff --git a/docker/torch/cpu/Dockerfile b/docker/torch/cpu/Dockerfile
deleted file mode 100755
index eea028ef8df..00000000000
--- a/docker/torch/cpu/Dockerfile
+++ /dev/null
@@ -1,28 +0,0 @@
-FROM ubuntu:20.04
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
- apt-transport-https=2.0 \
- git=2.25 && \
- rm -rf /var/lib/apt/lists/*
-
-RUN apt-get update && \
- DEBIAN_FRONTEND=noninteractive \
- apt-get install -y --no-install-recommends \
- wget=1.20 \
- curl=7.68 \
- zip=3.0 \
- unzip=6.0 \
- nano=4.8 \
- openssh-server=1:8.2 \
- openssh-client=1:8.2 \
- sudo=1.8 \
- python3=3.8 \
- python3-dev=3.8 \
- python3-pip=20.0 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-ENTRYPOINT ["cd /home/nncf \
- && python setup.py install --torch \
- && pip3 install -r examples/torch/requirements.txt \
- && bash"]
diff --git a/docker/torch/gpu/Dockerfile b/docker/torch/gpu/Dockerfile
deleted file mode 100755
index 26e902f3c89..00000000000
--- a/docker/torch/gpu/Dockerfile
+++ /dev/null
@@ -1,44 +0,0 @@
-FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
- apt-transport-https=2.0 \
- git=2.25 && \
- rm -rf /var/lib/apt/lists/*
-
-# Required for nvidia-docker v1
-RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
- echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
-
-
-RUN apt-get update && \
- DEBIAN_FRONTEND=noninteractive \
- apt-get install -y --no-install-recommends \
- wget=1.20 \
- curl=7.68 \
- zip=3.0 \
- unzip=6.0 \
- nano=4.8 \
- openssh-server=1:8.2 \
- openssh-client=1:8.2 \
- sudo=1.8 \
- python3=3.8 \
- python3-dev=3.8 \
- python3-pip=20.0 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /usr/bin
-RUN ln -s python3.8 python
-
-
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
-ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
-
-# nvidia-container-runtime
-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
-
-ENTRYPOINT ["cd /home/nncf \
- && python setup.py install --torch \
- && pip3 install -r examples/torch/requirements.txt \
- && bash"]
diff --git a/docs/ConfigFile.md b/docs/ConfigFile.md
index ed131bf495b..2573509408d 100644
--- a/docs/ConfigFile.md
+++ b/docs/ConfigFile.md
@@ -27,7 +27,7 @@ Below is an example of the NNCF configuration file:
},
]
},
- "target_device": "VPU", // The target device, the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. The default "ANY" means compatible quantization supported by any HW. The parameter takes values from the set ('CPU', 'GPU', 'VPU', 'ANY', 'TRIAL', 'CPU_SPR'). Set this value to 'TRIAL' if you are going to use a custom quantization schema.. Optional.
+ "target_device": "NPU", // The target device, the specificity of which will be taken into account while compressing in order to obtain the best performance for this type of device. The default "ANY" means compatible quantization supported by any HW. The parameter takes values from the set ('CPU', 'GPU', 'NPU', 'ANY', 'TRIAL', 'CPU_SPR'). Set this value to 'TRIAL' if you are going to use a custom quantization schema.. Optional.
"compression": [ // One or more definitions for the compression algorithms to be applied to the model; either a single JSON object or an array of JSON objects. See README for each compression algorithm for a description of the available config parameters.
{
"algorithm": quantization,
diff --git a/docs/FAQ.md b/docs/FAQ.md
index 39d8a6e0bc3..21ccaa74144 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -151,7 +151,7 @@ class MyInitializingDataLoader(PTInitializingDataLoader):
# in this function, for instance, if your dataloader returns dictionaries where
# the input image is under key `"img"`, and your YOLOv8 model accepts the input
# images as 0-th `forward` positional arg, you would do:
- return dataloader_output["img"], {}
+ return (dataloader_output["img"],), {}
def get_target(self, dataloader_output: Any) -> Any:
# and in this function you should extract the "ground truth" value from your
diff --git a/docs/Installation.md b/docs/Installation.md
index 9f0dbce77f9..3aec19e8e0f 100644
--- a/docs/Installation.md
+++ b/docs/Installation.md
@@ -58,10 +58,6 @@ pip install git+https://github.com/openvinotoolkit/nncf@bd189e2#egg=nncf
Note that in order for this to work for pip versions >= 21.3, your Git version must be at least 2.22.
-## As a Docker image
-
-Use one of the Dockerfiles in the [docker](../docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](../README.md#model-compression-samples).
-
## Corresponding versions
The following table lists the recommended corresponding versions of backend packages
@@ -69,7 +65,8 @@ as well as the supported versions of Python:
| NNCF | OpenVINO | PyTorch | ONNX | TensorFlow | Python |
|-----------|------------|----------|----------|------------|--------|
-| `develop` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
+| `develop` | `2024.4.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
+| `2.9.0` | `2024.4.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
| `2.8.1` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
| `2.8.0` | `2023.3.0` | `2.1.2` | `1.13.1` | `2.12.0` | `3.8` |
| `2.7.0` | `2023.2.0` | `2.1` | `1.13.1` | `2.12.0` | `3.8` |
diff --git a/docs/api/source/conf.py b/docs/api/source/conf.py
index bb529ea22c5..81ef16c0696 100644
--- a/docs/api/source/conf.py
+++ b/docs/api/source/conf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -22,7 +22,7 @@
project = "NNCF"
html_title = "NNCF"
-copyright_ = "2023, Intel Corporation"
+copyright_ = "2024, Intel Corporation"
author = "Intel Corporation"
extensions = ["autoapi.extension", "sphinx.ext.autodoc", "sphinx.ext.linkcode"]
@@ -83,28 +83,30 @@ def collect_api_entities() -> APIInfo:
print(f"{modname}")
for obj_name, obj in inspect.getmembers(module):
objects_module = getattr(obj, "__module__", None)
- if objects_module == modname:
- if inspect.isclass(obj) or inspect.isfunction(obj):
- if hasattr(obj, api.API_MARKER_ATTR):
- marked_object_name = obj._nncf_api_marker
- # Check the actual name of the originally marked object
- # so that the classes derived from base API classes don't
- # all automatically end up in API
- if marked_object_name != obj.__name__:
- continue
- fqn = f"{modname}.{obj_name}"
- if hasattr(obj, api.CANONICAL_ALIAS_ATTR):
- canonical_import_name = getattr(obj, api.CANONICAL_ALIAS_ATTR)
- if canonical_import_name in canonical_imports_seen:
- assert False, f"Duplicate canonical_alias detected: {canonical_import_name}"
- retval.fqn_vs_canonical_name[fqn] = canonical_import_name
- retval.canonical_name_vs_fqn[canonical_import_name] = fqn
- canonical_imports_seen.add(canonical_import_name)
- if canonical_import_name == fqn:
- print(f"\t{obj_name}")
- else:
- print(f"\t{obj_name} -> {canonical_import_name}")
- retval.api_names_vs_obj_dict[fqn] = obj
+ if (
+ objects_module == modname
+ and (inspect.isclass(obj) or inspect.isfunction(obj))
+ and hasattr(obj, api.API_MARKER_ATTR)
+ ):
+ marked_object_name = obj._nncf_api_marker
+ # Check the actual name of the originally marked object
+ # so that the classes derived from base API classes don't
+ # all automatically end up in API
+ if marked_object_name != obj.__name__:
+ continue
+ fqn = f"{modname}.{obj_name}"
+ if hasattr(obj, api.CANONICAL_ALIAS_ATTR):
+ canonical_import_name = getattr(obj, api.CANONICAL_ALIAS_ATTR)
+ if canonical_import_name in canonical_imports_seen:
+ assert False, f"Duplicate canonical_alias detected: {canonical_import_name}"
+ retval.fqn_vs_canonical_name[fqn] = canonical_import_name
+ retval.canonical_name_vs_fqn[canonical_import_name] = fqn
+ canonical_imports_seen.add(canonical_import_name)
+ if canonical_import_name == fqn:
+ print(f"\t{obj_name}")
+ else:
+ print(f"\t{obj_name} -> {canonical_import_name}")
+ retval.api_names_vs_obj_dict[fqn] = obj
print()
skipped_str = "\n".join([f"{k}: {v}" for k, v in skipped_modules.items()])
diff --git a/docs/compression_algorithms/CompressWeights.md b/docs/compression_algorithms/CompressWeights.md
index 83b38785703..927c3671bb3 100644
--- a/docs/compression_algorithms/CompressWeights.md
+++ b/docs/compression_algorithms/CompressWeights.md
@@ -4,14 +4,14 @@
#### The algorithm description
-The Weights Compression algorithm is aimed at compressing the weights of the models and can be used to optimize the model footprint and performance of large models where the size of weights is relatively larger than the size of activations, for example, Large Language Models (LLM). The algorithm compresses weights only for Linear and Embedding layers.
+The Weights Compression algorithm is aimed at compressing the weights of the models and can be used to optimize the model footprint and performance of large models where the size of weights is relatively larger than the size of activations, for example, Large Language Models (LLM). The algorithm compresses weights for Linear, Convolution and Embedding layers.
#### Supported modes
By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode.
OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is unsigned 4-bit integer and weights are quantized to it [symmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization) with a fixed zero point equals to 8. In case of INT4_ASYM mode - also unsigned 4-bit integer, but weight are quantized to it [asymmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point.
All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale).
-All embeddings and last linear layers are always compressed to 8-bit integer data type.
+All embeddings, convolutions and last linear layers are always compressed to 8-bit integer data type. To quantize embeddings and last linear layers to 4-bit, use `all_layers=True`.
Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit asymmetric integer data type.
#### User guide
@@ -30,7 +30,7 @@ from nncf import compress_weights, CompressWeightsMode
compressed_model = compress_weights(model, mode=CompressWeightsMode.INT8_SYM) # model is openvino.Model object
```
-- Compress weights symmetrically to 4-bit integer data type with group size = 128, except embeddings and last linear layers - they are compressed asymmetrically to 8-bit integer data type.
+- Compress weights symmetrically to 4-bit integer data type with group size = 128, except embeddings, convolutions and last linear layers - they are compressed asymmetrically to 8-bit integer data type.
```python
from nncf import compress_weights, CompressWeightsMode
@@ -39,7 +39,7 @@ compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM) #
- Generally, `INT4_SYM` mode is the fastest mixed-precision mode, but it may lead to a significant accuracy degradation or perplexity increase.
Compressing weights asymmetrically (`INT4_ASYM` mode) is the way to increase accuracy, however in turns it slows down inference a bit.
- If the accuracy or perplexity is still not satisfying, there are 2 more hyper-parameters to tune: `group_size` and `ratio`.
+ If the accuracy or perplexity is still not satisfying, there are 2 more hyper-parameters to tune: `group_size` and `ratio`. Please refer to the [example](https://github.com/openvinotoolkit/nncf/blob/develop/examples/llm_compression/openvino/tiny_llama_find_hyperparams) how to automatically tune these parameters.
Lower group size and less ratio of 4-bit layers usually improve accuracy at the sacrifice of inference speed.
Below is the example how to compress weights of 90% of layers to 4-bit integer asymmetrically with the group size 64, and
the rest of layers to 8-bit asymmetric integer data type. The same parametrization is applicable for `INT4_SYM` mode.
@@ -61,6 +61,64 @@ nncf_dataset = nncf.Dataset(data_source, transform_fn)
compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM, ratio=0.8, dataset=nncf_dataset) # model is openvino.Model object
```
+- Accuracy of the 4-bit compressed models also can be improved by using AWQ algorithm over data-based mixed-precision algorithm. It is capable to equalize some subset of weights to minimize difference between
+original precision and 4-bit.
+Below is the example how to compress 80% of layers to 4-bit integer with a default data-based mixed precision algorithm and AWQ.
+It requires to set `awq` to `True` additionally to data-based mixed-precision algorithm.
+
+```python
+from datasets import load_dataset
+from functools import partial
+from nncf import compress_weights, CompressWeightsMode, Dataset
+from optimum.intel.openvino import OVModelForCausalLM
+from transformers import AutoTokenizer
+
+def transform_func(item, tokenizer, input_shapes):
+ text = item['text']
+ tokens = tokenizer(text)
+
+ res = {'input_ids': np.expand_dims(np.array(tokens['input_ids']), 0),
+ 'attention_mask': np.expand_dims(np.array(tokens['attention_mask']), 0)}
+
+ if 'position_ids' in input_shapes:
+ position_ids = np.cumsum(res['attention_mask'], axis=1) - 1
+ position_ids[res['attention_mask'] == 0] = 1
+ res['position_ids'] = position_ids
+
+ for name, shape in input_shapes.items():
+ if name in res:
+ continue
+ res[name] = np.zeros(shape)
+
+ return res
+
+def get_input_shapes(model, batch_size = 1):
+ inputs = {}
+
+ for val in model.model.inputs:
+ name = val.any_name
+ shape = list(val.partial_shape.get_min_shape())
+ shape[0] = batch_size
+ inputs[name] = shape
+
+ return inputs
+
+# load your model and tokenizer
+model = OVModelForCausalLM.from_pretrained(...)
+tokenizer = AutoTokenizer.from_pretrained(...)
+
+# prepare dataset for compression
+dataset = load_dataset('wikitext', 'wikitext-2-v1', split='train')
+dataset = dataset.filter(lambda example: len(example["text"]) > 80)
+input_shapes = get_input_shapes(model)
+nncf_dataset = Dataset(dataset, partial(transform_func, tokenizer=tokenizer,
+ input_shapes=input_shapes))
+
+model.model = compress_weights(model.model, mode=CompressWeightsMode.INT4_SYM, ratio=0.8, dataset=nncf_dataset, awq=True)
+
+model.save_pretrained(...)
+```
+
- `NF4` mode can be considered for improving accuracy, but currently models quantized to nf4 should not be faster models
quantized to 8-bit asymmetric integer. Here's the example how to compress weights to nf4 data type with group size = 128.
Different `group_size` and `ratio` are also supported.
@@ -258,6 +316,11 @@ Here is the word perplexity with data-free and data-aware mixed-precision INT4-I