#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
#           This file was automatically generated from src/transformers/models/modernvbert/modular_modernvbert.py.
#               Do NOT edit this file manually as any edits will be overwritten by the generation of
#             the file from the modular. If any change should be done, please apply the change to the
#                          modular_modernvbert.py file directly. One of our CI enforces this.
#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Copyright 2026 Illuin Technology and contributors, and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Literal

from ...configuration_utils import PretrainedConfig
from ..auto import CONFIG_MAPPING, AutoConfig


class ModernVBertConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a [`ModernVBert`] model. It is used to
    instantiate a ModernVBert model according to the specified arguments and defines the model architecture.
    e.g. [ModernVBERT/modernvbert](https://huggingface.co/ModernVBERT/modernvbert).

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
    See the documentation for [`PretrainedConfig`] for more details.

    Args:
        text_config (`AutoConfig`, *optional*): Configuration for the text encoder.
        vision_config (`ModernVBertVisionConfig`, *optional*): Configuration for the vision encoder.
        image_token_id (`int | None`, *optional*, defaults to 50407): The token id reserved for image tokens inserted into the text stream.
        pixel_shuffle_factor (`int | None`, *optional*, defaults to 4): Scale factor used by any pixel-shuffle / upsampling operations in the vision head.
        initializer_range (`float | None`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        initializer_cutoff_factor (`float | None`, *optional*, defaults to 2.0): The cutoff factor for the truncated_normal_initializer for initializing all weight matrices.
        classifier_pooling (`Literal["cls", "mean"]`, *optional*, defaults to `"cls"`): The pooling strategy to use for classification tasks.
        classifier_dropout (`float | None`, *optional*, defaults to 0.0): The dropout probability for the classification head.
        classifier_bias (`bool | None`, *optional*, defaults to `False`): Whether to add a bias term to the classification head.

    Example:
    ```python
    >>> from transformers import ModernVBertConfig

    >>> # Initializing configuration
    >>> configuration = ModernVBertConfig()

    >>> # Initializing a model from the configuration (model class is implemented in
    >>> # `modernvbert.modeling_modernvbert`)

    >>> from transformers import ModernVBertModel
    >>> model = ModernVBertModel(configuration)

    >>> # Accessing the model configuration
    >>> cfg = model.config
    ```"""

    model_type = "modernvbert"
    sub_configs: dict[str, Any] = {"text_config": AutoConfig, "vision_config": AutoConfig}

    def __init__(
        self,
        text_config=None,
        vision_config=None,
        image_token_id: int | None = 50407,
        pixel_shuffle_factor: int | None = 4,
        initializer_range: float | None = 0.02,
        initializer_cutoff_factor: float | None = 2.0,
        classifier_pooling: Literal["cls", "mean"] = "cls",
        classifier_dropout: float | None = 0.0,
        classifier_bias: bool | None = False,
        **kwargs,
    ):
        if classifier_pooling not in ["cls", "mean"]:
            raise ValueError(
                f'Invalid value for `classifier_pooling`, should be either "cls" or "mean", but is {classifier_pooling}.'
            )

        if text_config is None:
            text_config = CONFIG_MAPPING["modernbert"]()
        elif isinstance(text_config, dict):
            text_config = CONFIG_MAPPING["modernbert"](**text_config)
        self.text_config = text_config

        if vision_config is None:
            vision_config = CONFIG_MAPPING["siglip_vision_model"]()
        elif isinstance(vision_config, dict):
            vision_config = CONFIG_MAPPING["siglip_vision_model"](**vision_config)
        self.vision_config = vision_config

        self.pixel_shuffle_factor = pixel_shuffle_factor
        self.initializer_range = initializer_range
        self.initializer_cutoff_factor = initializer_cutoff_factor
        self.classifier_pooling = classifier_pooling
        self.classifier_dropout = classifier_dropout
        self.classifier_bias = classifier_bias

        super().__init__(image_token_id=image_token_id, **kwargs)


__all__ = ["ModernVBertConfig"]
