Source code for eodag.api.collection

# -*- coding: utf-8 -*-
# Copyright 2025, CS GROUP - France, https://www.csgroup.eu/
#
# This file is part of EODAG project
#     https://www.github.com/CS-SI/EODAG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import logging
import re
from collections import UserDict, UserList
from typing import TYPE_CHECKING, Any, Optional, cast

from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
from pydantic import ValidationError as PydanticValidationError
from pydantic import model_validator
from pydantic_core import ErrorDetails, InitErrorDetails, PydanticCustomError
from stac_pydantic.collection import Extent, Provider, SpatialExtent, TimeInterval
from stac_pydantic.links import Links

from eodag.api.product.metadata_mapping import NOT_AVAILABLE
from eodag.types.queryables import CommonStacMetadata
from eodag.types.stac_metadata import create_stac_metadata_model
from eodag.utils import STAC_VERSION
from eodag.utils.env import is_env_var_true
from eodag.utils.exceptions import ValidationError
from eodag.utils.repr import dict_to_html_table

if TYPE_CHECKING:
    from pydantic import ModelWrapValidatorHandler
    from typing_extensions import Self

    from eodag.api.core import EODataAccessGateway
    from eodag.api.search_result import SearchResult
    from eodag.types.queryables import QueryablesDict

logger = logging.getLogger("eodag.api.collection")

RFC3339_PATTERN = (
    r"^(\d{4})-(\d{2})-(\d{2})"
    r"(?:T(\d{2}):(\d{2}):(\d{2})(\.\d+)?"
    r"(Z|([+-])(\d{2}):(\d{2}))?)?$"
)


[docs] class Collection(BaseModel): """A class representing a collection. A Collection object is used to describe a group of related :class:`~eodag.api.product._product.EOProduct` objects. """ id: str = Field() title: Optional[str] = Field(default=None) description: str = Field(default=NOT_AVAILABLE) extent: Extent = Field( default=Extent( spatial=SpatialExtent(bbox=[[-180.0, -90.0, 180.0, 90.0]]), # type: ignore temporal=TimeInterval(interval=[[None, None]]), ), description=( "The temporal extent of the collection, following the STAC specification for extent definition (e.g. " '{"spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, ' '"temporal": {"interval": [["2024-06-10T12:00:00Z", None]]}}' "), with date/time strings in RFC 3339 format" ), ) keywords: Optional[list[str]] = Field(default=None) license: str = Field(default="other") links: Optional[Links] = Field(default=None) providers: Optional[list[Provider]] = Field(default=None) # summaries constellation: Optional[str] = Field(default=None) instruments: Optional[list[str]] = Field(default=None) platform: Optional[str] = Field(default=None) processing_level: Optional[str] = Field(default=None, alias="processing:level") sci_doi: Optional[str] = Field(default=None, alias="sci:doi") eodag_sensor_type: Optional[str] = Field(default=None, alias="eodag:sensor_type") # eodag-specific attribute alias: Optional[str] = Field( default=None, description="An alias given by a user to use his customized id intead of the internal id of EODAG", repr=False, ) # path to external collection metadata file (required by stac-fastapi-eodag) eodag_stac_collection: Optional[str] = Field( default=None, alias="stacCollection", exclude=True, repr=False ) # Private property to store the eodag internal id value. Not part of the model schema. _id: str = PrivateAttr() _dag: Optional[EODataAccessGateway] = PrivateAttr(default=None) model_config = ConfigDict( extra="forbid", validate_by_name=True, serialize_by_alias=True )
[docs] def model_post_init(self, context: Any) -> None: """Post-initialization method to set internal attributes.""" self._id = self.id
[docs] @classmethod def create_with_dag(cls, dag: EODataAccessGateway, **kwargs) -> Collection: """Create a Collection with a EODataAccessGateway instance. :param dag: The gateway instance to use to search products and to list queryables of the collection instance :param kwargs: The collection attributes """ instance = cls(**kwargs) instance._dag = dag return instance
[docs] @classmethod def get_collection_mtd_from_alias(cls, value: str) -> str: """Get collection metadata from alias >>> Collection.get_collection_mtd_from_alias('processing:level') 'processing_level' """ alias_map = { field_info.alias: name for name, field_info in cls.model_fields.items() if field_info.alias } return alias_map.get(value, value)
[docs] @model_validator(mode="after") def set_id_from_alias(self) -> Self: """If an alias exists, use it to update ``id`` attribute""" if self.alias is not None: self._id = self.id self.id = self.alias return self
[docs] @model_validator(mode="wrap") @classmethod def validate_collection( cls, values: dict[str, Any] | Self, handler: ModelWrapValidatorHandler[Self] ) -> Self: """Allow to create a collection instance with bad formatted attributes (except ``id``). Set incorrectly formatted attributes to ``None`` and ignore extra attributes. Log a warning about validation errors if ``EODAG_VALIDATE_COLLECTIONS`` environment variable is set to ``True``. """ errors: list[ErrorDetails] = [] continue_validation: bool = True # iterate over each step of validation where error(s) raise(s) while continue_validation: try: handler(values) except PydanticValidationError as e: tmp_errors = e.errors() # raise an error if the id is invalid if any(error["loc"][0] == "id" for error in tmp_errors): raise ValidationError.from_error(e) from e # convert values to dict if it is a model instance values_dict = values if isinstance(values, dict) else values.__dict__ # set incorrectly formatted attribute(s) to None and ignore its extra attribute(s) for error in tmp_errors: wrong_param = error["loc"][0] if not isinstance(wrong_param, str): continue if ( cls.get_collection_mtd_from_alias(wrong_param) not in cls.model_fields ): del values_dict[wrong_param] else: values_dict[wrong_param] = cls.model_fields[ cls.get_collection_mtd_from_alias(wrong_param) ].get_default() errors.extend(tmp_errors) else: continue_validation = False # log a warning if there were validation errors and the env var is set to True if errors and is_env_var_true("EODAG_VALIDATE_COLLECTIONS"): # log all errors at once error_title = f"collection {values_dict['id']}" init_errors: list[InitErrorDetails] = [ InitErrorDetails( type=PydanticCustomError(error["type"], error["msg"]), loc=error["loc"], input=error["input"], ) for error in errors ] pydantic_error = PydanticValidationError.from_exception_data( title=error_title, line_errors=init_errors ) logger.warning(pydantic_error) # Create a fresh instance with the cleaned values return handler(values)
def __str__(self) -> str: return f'{type(self).__name__}("{self.id}")' def __repr_str__(self, join_str: str) -> str: return join_str.join( repr(v) if a is None else f"{a}={v!r}" for a, v in self.__repr_args__() if v ) def _repr_html_(self, embedded: bool = False) -> str: thead = ( f"""<thead><tr><td style='text-align: left; color: grey;'> {type(self).__name__}("<span style='color: black'>{self.id}</span>")</td></tr></thead> """ if not embedded else "" ) tr_style = "style='background-color: transparent;'" if embedded else "" col_html_table = dict_to_html_table( self.model_dump(exclude={"alias"}), depth=1, brackets=False ) return ( f"<table>{thead}<tbody>" f"<tr {tr_style}><td style='text-align: left;'>" f"{col_html_table}</td></tr>" "</tbody></table>" ) def _ensure_dag(self) -> EODataAccessGateway: if self._dag is None: raise RuntimeError( f"Collection '{self.id}' needs EODataAccessGateway to perform this operation. " "Create with: Collection.create_with_dag(dag, id='...')" ) return self._dag
[docs] def search(self, **kwargs: Any) -> SearchResult: """Look for products of this collection matching criteria using the ``dag`` attribute of the instance. :param kwargs: Some other criteria that will be used to do the search, using parameters compatible with the provider :returns: A collection of EO products matching the criteria. :raises: :class:`~eodag.utils.exceptions.ValidationError`: If the `collection` argument is set in `kwargs`, since it is already defined by the instance """ dag = self._ensure_dag() collection_search_arg = "collection" if collection_search_arg in kwargs: raise ValidationError( f"{collection_search_arg} should not be set in kwargs since a collection instance is used", {collection_search_arg}, ) return dag.search(collection=self.id, **kwargs)
[docs] def list_queryables(self, **kwargs: Any) -> QueryablesDict: """Fetch the queryable properties for this collection using the ``dag`` attribute of the instance. :param kwargs: additional filters for queryables :returns: A :class:`~eodag.api.product.queryables.QuerybalesDict` containing the EODAG queryable properties, associating parameters to their annotated type, and an ``additional_properties`` attribute :raises: :class:`~eodag.utils.exceptions.ValidationError`: If the `collection` argument is set in `kwargs`, since it is already defined by the instance """ dag = self._ensure_dag() collection_search_arg = "collection" if collection_search_arg in kwargs: raise ValidationError( f"{collection_search_arg} should not be set in kwargs since a collection instance is used", {collection_search_arg}, ) return dag.list_queryables(collection=self.id, **kwargs)
[docs] def serialize(self) -> dict[str, Any]: """Serialize the Collection instance to a STAC dictionary. :returns: A STAC dictionary representation of the Collection instance. """ stac_dict: dict[str, Any] = { "stac_version": STAC_VERSION, "type": "Collection", } stac_dict |= self.model_dump(mode="json", exclude_none=True, exclude={"alias"}) stac_dict.setdefault("links", []) stac_dict.setdefault("providers", []) not_in_summaries = [ "stac_version", "type", "id", "title", "description", "extent", "keywords", "license", "links", "providers", ] summaries = dict() for k, v in stac_dict.items(): if k not in not_in_summaries: if isinstance(v, list): summaries[k] = v elif isinstance(v, str): summaries[k] = v.split(",") else: summaries[k] = [v] stac_dict["summaries"] = summaries # Remove empty items and items moved to summaries keys_to_remove = [ k for k in stac_dict.keys() if k not in not_in_summaries and k != "summaries" ] for k in keys_to_remove: del stac_dict[k] # add extensions summaries_model = cast(CommonStacMetadata, create_stac_metadata_model()) summaries_validated = summaries_model.model_construct( _fields_set=None, **summaries ) stac_dict["stac_extensions"] = summaries_validated.get_conformance_classes() return stac_dict
[docs] class CollectionsDict(UserDict[str, Collection]): """A UserDict object which values are :class:`~eodag.api.collection.Collection` objects, keyed by provider ``id``. :param collections: A list of collections :cvar data: List of collections """ def __init__( self, collections: list[Collection], ) -> None: super().__init__() self.data = {col._id: col for col in collections} def __str__(self) -> str: return "{" + ", ".join(f'"{col}": {col_f}' for col, col_f in self.items()) + "}" def __repr__(self) -> str: return str(self)
[docs] class CollectionsList(UserList[Collection]): """An object representing a collection of :class:`~eodag.api.collection.Collection`. :param collections: A list of collections :cvar data: List of collections """ def __init__( self, collections: list[Collection], ) -> None: super().__init__(collections) def __str__(self) -> str: return f"{type(self).__name__}([{', '.join(str(col) for col in self)}])" def __repr__(self) -> str: return str(self) def _repr_html_(self, embedded: bool = False) -> str: # mock "thead" tag by reproduicing its style to make "details" and "summary" tags work properly mock_thead = ( f"""<details class='foldable'> <summary style='text-align: left; color: grey; font-size: 12px;'> {type(self).__name__}&ensp;({len(self)}) </summary> """ if not embedded else "" ) tr_style = "style='background-color: transparent;'" if embedded else "" return ( f"{mock_thead}<table><tbody>" + "".join( [ f"""<tr {tr_style}><td style='text-align: left;'> <details> <summary style='color: grey; font-family: monospace;'> {i}&ensp; {type(col).__name__}("<span style='color: black'>{col.id}</span>") </summary> {re.sub(r"(<thead>.*|.*</thead>)", "", col._repr_html_())} </details> </td></tr> """ for i, col in enumerate(self) ] ) + "</tbody></table></details>" )