diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..78d362b --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +DB_ENGINE=django.db.backends.postgresql +DB_NAME=postgres +DB_USER=postgres +DB_PASSWORD=postgres +DB_HOST=backend_db +DB_PORT=5432 +TELEMETRY_URL=http://otel-collector:4317 +ELASTICSEARCH_INDEX=http://elasticsearch:9200 +ELASTICSEARCH_USERNAME=elastic +ELASTICSEARCH_PASS=changeme +URL_WHITELIST=http://localhost:8000,http://localhost,http://localhost:3000 +DEBUG=True +SECRET_KEY=your-secret-key +REDIS_URL=redis://redis:6379/1 diff --git a/.github/workflows/deploy-to-ecs.yml b/.github/workflows/deploy-to-ecs.yml new file mode 100644 index 0000000..4fe1415 --- /dev/null +++ b/.github/workflows/deploy-to-ecs.yml @@ -0,0 +1,147 @@ +name: Deploy to Amazon ECS + +on: + push: + branches: + - dev + workflow_dispatch: + +env: + AWS_REGION: ${{ secrets.AWS_REGION }} + ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }} + ECS_CLUSTER: ${{ secrets.ECS_CLUSTER }} + ECS_EXECUTION_ROLE_ARN: ${{ secrets.ECS_EXECUTION_ROLE_ARN }} + APP_NAME: dataspace + APP_PORT: 8000 + DB_ENGINE: django.db.backends.postgresql + DB_PORT: 5432 + DEBUG_MODE: "False" + TELEMETRY_URL: http://otel-collector:4317 + CPU_UNITS: 256 + MEMORY_UNITS: 512 + SSM_PATH_PREFIX: /dataspace + ENVIRONMENT: ${{ secrets.ENVIRONMENT || 'dev' }} + +jobs: + deploy-infrastructure: + name: Deploy Infrastructure + runs-on: ubuntu-latest + environment: development + if: github.event_name == 'workflow_dispatch' || contains(github.event.head_commit.modified, 'aws/cloudformation') + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Deploy CloudFormation stack + run: | + aws cloudformation deploy \ + --template-file aws/cloudformation/dataspace-infrastructure.yml \ + --stack-name dataspace-${{ env.ENVIRONMENT }}-infrastructure \ + --parameter-overrides \ + Environment=${{ env.ENVIRONMENT }} \ + VpcId=${{ secrets.VPC_ID }} \ + SubnetIds=${{ secrets.SUBNET_IDS }} \ + DBUsername=${{ secrets.DB_USERNAME }} \ + DBPassword=${{ secrets.DB_PASSWORD }} \ + DBName=${{ secrets.DB_NAME }} \ + ElasticsearchPassword=${{ secrets.ELASTICSEARCH_PASSWORD }} \ + DjangoSecretKey=${{ secrets.DJANGO_SECRET_KEY }} \ + --capabilities CAPABILITY_IAM \ + --no-fail-on-empty-changeset + + deploy-app: + name: Deploy Application + runs-on: ubuntu-latest + environment: development + needs: deploy-infrastructure + if: always() # Run even if infrastructure deployment is skipped + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Build, tag, and push image to Amazon ECR + id: build-image + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + IMAGE_TAG: ${{ github.sha }} + run: | + docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . + docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG + echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT + + - name: Download task definition and get EFS ID + run: | + aws ecs describe-task-definition --task-definition dataspace --query taskDefinition > aws/current-task-definition.json + aws ecs describe-task-definition --task-definition dataspace-otel-collector --query taskDefinition > aws/current-otel-task-definition.json + # Get the EFS ID from CloudFormation export + EFS_ID=$(aws cloudformation list-exports --query "Exports[?Name=='dataspace-${{ env.ENVIRONMENT }}-MigrationsFileSystemId'].Value" --output text) + echo "EFS_ID=$EFS_ID" >> $GITHUB_ENV + + - name: Update container image only + id: task-def-app + uses: aws-actions/amazon-ecs-render-task-definition@v1 + with: + task-definition: aws/current-task-definition.json + container-name: dataspace + image: ${{ steps.build-image.outputs.image }} + + - name: Deploy main application ECS task definition + uses: aws-actions/amazon-ecs-deploy-task-definition@v1 + with: + task-definition: ${{ steps.task-def-app.outputs.task-definition }} + service: ${{ secrets.ECS_SERVICE }} + cluster: ${{ env.ECS_CLUSTER }} + wait-for-service-stability: true + + deploy-otel: + name: Deploy OpenTelemetry Collector + runs-on: ubuntu-latest + environment: development + needs: deploy-app + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Download current OpenTelemetry task definition + id: download-otel-taskdef + run: | + aws ecs describe-task-definition \ + --task-definition dataspace-otel-collector \ + --query taskDefinition > aws/current-otel-task-definition.json + cat aws/current-otel-task-definition.json + + - name: Deploy OpenTelemetry ECS task definition + uses: aws-actions/amazon-ecs-deploy-task-definition@v1 + with: + task-definition: aws/current-otel-task-definition.json + service: ${{ secrets.ECS_OTEL_SERVICE }} + cluster: ${{ env.ECS_CLUSTER }} + wait-for-service-stability: true diff --git a/.gitignore b/.gitignore index 434ee87..707db78 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,7 @@ resources/ .env api/migrations/* authorization/migrations/* + + +# AWS files +aws/.env.* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 186db56..25a71ad 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,6 +5,7 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml + exclude: ^aws/cloudformation/.*\.yml$ - id: check-added-large-files - id: debug-statements @@ -20,6 +21,17 @@ repos: - id: isort args: ["--profile", "black"] +- repo: local + hooks: + - id: cloudformation-validate + name: AWS CloudFormation Validation + description: Validates CloudFormation templates using AWS CLI + entry: bash -c 'aws cloudformation validate-template --template-body file://$0 || exit 1' + language: system + files: ^aws/cloudformation/.*\.yml$ + require_serial: true + pass_filenames: true + - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.9.0 hooks: diff --git a/Dockerfile b/Dockerfile index a16a322..30967ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,10 +12,18 @@ RUN echo 'deb http://archive.debian.org/debian stretch main contrib non-free' >> WORKDIR /code COPY . /code/ -RUN pip install psycopg2-binary +RUN pip install psycopg2-binary uvicorn RUN pip install -r requirements.txt -#RUN python manage.py migrate + +# Create healthcheck script +RUN echo '#!/bin/bash\nset -e\npython -c "import sys; import django; django.setup(); sys.exit(0)"' > /code/healthcheck.sh \ + && chmod +x /code/healthcheck.sh EXPOSE 8000 -#CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"] \ No newline at end of file + +# Make entrypoint script executable +RUN chmod +x /code/docker-entrypoint.sh + +ENTRYPOINT ["/code/docker-entrypoint.sh"] +CMD ["uvicorn", "DataSpace.asgi:application", "--host", "0.0.0.0", "--port", "8000"] diff --git a/api/activities/decorators.py b/api/activities/decorators.py index 2d64441..ab7cbf7 100644 --- a/api/activities/decorators.py +++ b/api/activities/decorators.py @@ -39,7 +39,7 @@ def decorator(func: F) -> F: def wrapper(*args: Any, **kwargs: Any) -> Any: # Extract request from args (typically the first or second argument in view functions) request = None - for arg in args: + for arg in list(args) + list(kwargs.values()): if isinstance(arg, HttpRequest): request = arg break diff --git a/api/models/UseCase.py b/api/models/UseCase.py index f085e70..cd4febb 100644 --- a/api/models/UseCase.py +++ b/api/models/UseCase.py @@ -54,6 +54,7 @@ class UseCase(models.Model): ) started_on = models.DateField(blank=True, null=True) completed_on = models.DateField(blank=True, null=True) + platform_url = models.URLField(blank=True, null=True) def save(self, *args: Any, **kwargs: Any) -> None: if self.title and not self.slug: diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py index 372511a..84c4d73 100644 --- a/api/schema/dataset_schema.py +++ b/api/schema/dataset_schema.py @@ -18,6 +18,7 @@ ResourceChartDetails, ResourceChartImage, Sector, + UseCase, ) from api.models.Dataset import Tag from api.models.DatasetMetadata import DatasetMetadata @@ -31,7 +32,12 @@ from api.types.type_organization import TypeOrganization from api.types.type_resource_chart import TypeResourceChart from api.types.type_resource_chart_image import TypeResourceChartImage -from api.utils.enums import DatasetAccessType, DatasetLicense, DatasetStatus +from api.utils.enums import ( + DatasetAccessType, + DatasetLicense, + DatasetStatus, + UseCaseStatus, +) from api.utils.graphql_telemetry import trace_resolver from authorization.models import DatasetPermission, OrganizationMembership, Role, User from authorization.permissions import ( @@ -469,20 +475,30 @@ def get_publishers(self, info: Info) -> List[Union[TypeOrganization, TypeUser]]: published_datasets = Dataset.objects.filter( status=DatasetStatus.PUBLISHED.value ) + published_ds_organizations = published_datasets.values_list( + "organization_id", flat=True + ) + published_usecases = UseCase.objects.filter( + status=UseCaseStatus.PUBLISHED.value + ) + published_uc_organizations = published_usecases.values_list( + "organization_id", flat=True + ) + published_organizations = set(published_ds_organizations) | set( + published_uc_organizations + ) # Get unique organizations that have published datasets org_publishers = Organization.objects.filter( - id__in=published_datasets.filter(organization__isnull=False).values_list( - "organization_id", flat=True - ) + id__in=published_organizations ).distinct() + published_ds_users = published_datasets.values_list("user_id", flat=True) + published_uc_users = published_usecases.values_list("user_id", flat=True) + published_users = set(published_ds_users) | set(published_uc_users) + # Get unique individual users who have published datasets without an organization - individual_publishers = User.objects.filter( - id__in=published_datasets.filter(organization__isnull=True).values_list( - "user_id", flat=True - ) - ).distinct() + individual_publishers = User.objects.filter(id__in=published_users).distinct() # Convert to GraphQL types org_types = [TypeOrganization.from_django(org) for org in org_publishers] @@ -564,6 +580,10 @@ def add_update_dataset_metadata( dataset = Dataset.objects.get(id=dataset_id) except Dataset.DoesNotExist as e: raise DjangoValidationError(f"Dataset with ID {dataset_id} does not exist.") + if dataset.status != DatasetStatus.DRAFT.value: + raise DjangoValidationError( + f"Dataset with ID {dataset_id} is not in draft status." + ) if update_metadata_input.description: dataset.description = update_metadata_input.description @@ -616,11 +636,14 @@ def update_dataset( dataset = Dataset.objects.get(id=dataset_id) except Dataset.DoesNotExist as e: raise ValueError(f"Dataset with ID {dataset_id} does not exist.") - + if dataset.status != DatasetStatus.DRAFT.value: + raise ValueError(f"Dataset with ID {dataset_id} is not in draft status.") + if update_dataset_input.title.strip() == "": + raise ValueError("Title cannot be empty.") if update_dataset_input.title: - dataset.title = update_dataset_input.title + dataset.title = update_dataset_input.title.strip() if update_dataset_input.description: - dataset.description = update_dataset_input.description + dataset.description = update_dataset_input.description.strip() if update_dataset_input.access_type: dataset.access_type = update_dataset_input.access_type if update_dataset_input.license: diff --git a/api/schema/organization_data_schema.py b/api/schema/organization_data_schema.py index 7d5744f..0620b23 100644 --- a/api/schema/organization_data_schema.py +++ b/api/schema/organization_data_schema.py @@ -4,6 +4,7 @@ import strawberry import strawberry_django +from django.db.models import Q from strawberry.types import Info from api.models import Dataset, Organization, Sector, UseCase @@ -58,7 +59,12 @@ def organization_published_use_cases( try: # Get published use cases for this organization queryset = UseCase.objects.filter( - usecaseorganizationrelationship__organization_id=organization_id, + ( + Q(organization__id=organization_id) + | Q( + usecaseorganizationrelationship__organization_id=organization_id + ) + ), status=UseCaseStatus.PUBLISHED.value, ).distinct() return TypeUseCase.from_django_list(queryset) diff --git a/api/schema/organization_schema.py b/api/schema/organization_schema.py index 06d1c86..9507c43 100644 --- a/api/schema/organization_schema.py +++ b/api/schema/organization_schema.py @@ -85,6 +85,15 @@ def organizations( return [TypeOrganization.from_django(org) for org in queryset] + @strawberry_django.field(permission_classes=[IsAuthenticated]) + def all_organizations(self, info: Info) -> List[TypeOrganization]: + """Get all organizations.""" + user = info.context.user + if not user or getattr(user, "is_anonymous", True): + logging.warning("Anonymous user or no user found in context") + return [] + return [TypeOrganization.from_django(org) for org in Organization.objects.all()] + @strawberry_django.field def organization(self, info: Info, id: str) -> Optional[TypeOrganization]: """Get organization by ID.""" diff --git a/api/schema/resource_schema.py b/api/schema/resource_schema.py index 194842e..b502ace 100644 --- a/api/schema/resource_schema.py +++ b/api/schema/resource_schema.py @@ -159,6 +159,11 @@ def _create_file_resource_schema(resource: Resource) -> None: return +def _reset_file_resource_schema(resource: Resource) -> None: + ResourceSchema.objects.filter(resource=resource).delete() + data_table = index_resource_data(resource) + + def _update_file_resource_schema( resource: Resource, updated_schema: List[SchemaUpdate] ) -> None: @@ -262,6 +267,7 @@ def create_file_resources( file=file, size=file.size, resource=resource ) _validate_file_details_and_update_format(resource) + _create_file_resource_schema(resource) resources.append(TypeResource.from_django(resource)) return resources @@ -351,6 +357,8 @@ def update_file_resource( size=file_resource_input.file.size, resource=resource, ) + _validate_file_details_and_update_format(resource) + _create_file_resource_schema(resource) if file_resource_input.preview_details: _update_resource_preview_details(file_resource_input, resource) diff --git a/api/schema/sector_schema.py b/api/schema/sector_schema.py index d89ab1d..497063e 100644 --- a/api/schema/sector_schema.py +++ b/api/schema/sector_schema.py @@ -1,14 +1,16 @@ import uuid -from typing import List, Optional +from typing import Any, List, Optional import strawberry import strawberry_django from strawberry import auto from strawberry.types import Info from strawberry_django.mutations import mutations +from strawberry_django.pagination import OffsetPaginationInput from api.models import Sector -from api.types.type_sector import TypeSector +from api.types.type_sector import SectorFilter, SectorOrder, TypeSector +from api.utils.enums import DatasetStatus @strawberry.input @@ -40,6 +42,39 @@ def sector(self, info: Info, id: uuid.UUID) -> Optional[TypeSector]: except Sector.DoesNotExist: raise ValueError(f"Sector with ID {id} does not exist.") + @strawberry_django.field( + filters=SectorFilter, + pagination=True, + order=SectorOrder, + ) + def active_sectors( + self, + info: Info, + filters: Optional[SectorFilter] = strawberry.UNSET, + pagination: Optional[OffsetPaginationInput] = strawberry.UNSET, + order: Optional[SectorOrder] = strawberry.UNSET, + ) -> list[TypeSector]: + """Get sectors with published datasets.""" + # Start with base queryset filtering for active sectors + queryset = Sector.objects.filter( + datasets__status=DatasetStatus.PUBLISHED + ).distinct() + + # Apply filters if provided + if filters is not strawberry.UNSET: + queryset = strawberry_django.filters.apply(filters, queryset, info) + + # Apply ordering if provided + if order is not strawberry.UNSET: + queryset = strawberry_django.ordering.apply(order, queryset, info) + + # Apply pagination if provided + if pagination is not strawberry.UNSET: + # Apply pagination to the list + queryset = strawberry_django.pagination.apply(pagination, queryset) + + return [TypeSector.from_django(instance) for instance in queryset] + @strawberry.type class Mutation: diff --git a/api/schema/tags_schema.py b/api/schema/tags_schema.py index 5401d82..5c549cb 100644 --- a/api/schema/tags_schema.py +++ b/api/schema/tags_schema.py @@ -3,7 +3,11 @@ from strawberry.types import Info from api.models import Tag -from api.schema.base_mutation import BaseMutation, MutationResponse +from api.schema.base_mutation import ( + BaseMutation, + DjangoValidationError, + MutationResponse, +) from api.utils.graphql_telemetry import trace_resolver from authorization.permissions import IsAuthenticated @@ -12,7 +16,9 @@ class Mutation: """Mutations for tags.""" - @strawberry_django.mutation(handle_django_errors=False) + @strawberry_django.mutation( + handle_django_errors=False, permission_classes=[IsAuthenticated] + ) @trace_resolver( name="delete_tag", attributes={"component": "tag", "operation": "mutation"} ) @@ -36,6 +42,6 @@ def delete_tags(self, info: Info, tag_ids: list[str]) -> MutationResponse[bool]: try: tags = Tag.objects.filter(id__in=tag_ids) except Tag.DoesNotExist: - raise ValueError(f"Tags with IDs {tag_ids} do not exist.") + raise DjangoValidationError(f"Tags with IDs {tag_ids} do not exist.") tags.delete() return MutationResponse.success_response(True) diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py index 6abd010..4e89660 100644 --- a/api/schema/usecase_schema.py +++ b/api/schema/usecase_schema.py @@ -10,6 +10,7 @@ import strawberry_django from django.db import models from strawberry import auto +from strawberry.file_uploads import Upload from strawberry.types import Info from strawberry_django.mutations import mutations from strawberry_django.pagination import OffsetPaginationInput @@ -32,7 +33,11 @@ TypeUseCaseOrganizationRelationship, relationship_type, ) -from api.utils.enums import OrganizationRelationshipType, UseCaseStatus +from api.utils.enums import ( + OrganizationRelationshipType, + UseCaseRunningStatus, + UseCaseStatus, +) from api.utils.graphql_telemetry import trace_resolver from authorization.models import User from authorization.types import TypeUser @@ -59,13 +64,23 @@ class UpdateUseCaseMetadataInput: sectors: List[uuid.UUID] +use_case_running_status = strawberry.enum(UseCaseRunningStatus) # type: ignore + + @strawberry_django.partial(UseCase, fields="__all__", exclude=["datasets"]) class UseCaseInputPartial: """Input type for use case updates.""" id: str - slug: auto - summary: auto + logo: Optional[Upload] = strawberry.field(default=None) + running_status: Optional[use_case_running_status] = UseCaseRunningStatus.INITIATED + title: Optional[str] = None + summary: Optional[str] = None + platform_url: Optional[str] = None + tags: Optional[List[str]] = None + sectors: Optional[List[uuid.UUID]] = None + started_on: Optional[datetime.date] = None + completed_on: Optional[datetime.date] = None @strawberry.type(name="Query") @@ -241,7 +256,7 @@ class Mutation: """Mutations for use cases.""" create_use_case: TypeUseCase = mutations.create(UseCaseInput) - update_use_case: TypeUseCase = mutations.update(UseCaseInputPartial, key_attr="id") + # update_use_case: TypeUseCase = mutations.update(UseCaseInputPartial, key_attr="id") @strawberry_django.mutation( handle_django_errors=True, @@ -326,12 +341,52 @@ def add_update_usecase_metadata( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {usecase_id} does not exist.") + if usecase.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") + if update_metadata_input.tags is not None: _update_usecase_tags(usecase, update_metadata_input.tags) _add_update_usecase_metadata(usecase, metadata_input) _update_usecase_sectors(usecase, update_metadata_input.sectors) return TypeUseCase.from_django(usecase) + @strawberry_django.mutation(handle_django_errors=False) + @trace_resolver( + name="update_use_case", + attributes={"component": "usecase", "operation": "mutation"}, + ) + def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase: + usecase_id = data.id + try: + usecase = UseCase.objects.get(id=usecase_id) + except UseCase.DoesNotExist: + raise ValueError(f"UseCase with ID {usecase_id} does not exist.") + + if usecase.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {usecase_id} is not in draft status.") + + if data.title is not None: + if data.title.strip() == "": + raise ValueError("Title cannot be empty.") + usecase.title = data.title.strip() + if data.summary is not None: + usecase.summary = data.summary.strip() + if data.platform_url is not None: + usecase.platform_url = data.platform_url.strip() + if data.started_on is not None: + usecase.started_on = data.started_on + if data.completed_on is not None and data.completed_on is not strawberry.UNSET: + usecase.completed_on = data.completed_on + if ( + data.running_status is not None + and data.running_status is not strawberry.UNSET + ): + usecase.running_status = data.running_status + if data.logo is not None and data.logo is not strawberry.UNSET: + usecase.logo = data.logo + usecase.save() + return TypeUseCase.from_django(usecase) + @strawberry_django.mutation( handle_django_errors=False, extensions=[ @@ -371,6 +426,9 @@ def add_dataset_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.datasets.add(dataset) use_case.save() return TypeUseCase.from_django(use_case) @@ -384,12 +442,13 @@ def remove_dataset_from_use_case( dataset = Dataset.objects.get(id=dataset_id) except Dataset.DoesNotExist: raise ValueError(f"Dataset with ID {dataset_id} does not exist.") - try: use_case = UseCase.objects.get(id=use_case_id) except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") use_case.datasets.remove(dataset) use_case.save() return TypeUseCase.from_django(use_case) @@ -409,6 +468,9 @@ def update_usecase_datasets( except UseCase.DoesNotExist: raise ValueError(f"Use Case with ID {use_case_id} doesn't exist") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.datasets.set(datasets) use_case.save() return TypeUseCase.from_django(use_case) @@ -487,6 +549,9 @@ def add_contributor_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.contributors.add(user) use_case.save() return TypeUseCase.from_django(use_case) @@ -511,6 +576,9 @@ def remove_contributor_from_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.contributors.remove(user) use_case.save() return TypeUseCase.from_django(use_case) @@ -545,6 +613,9 @@ def update_usecase_contributors( except UseCase.DoesNotExist: raise ValueError(f"Use Case with ID {use_case_id} doesn't exist") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + use_case.contributors.set(users) use_case.save() return TypeUseCase.from_django(use_case) @@ -569,6 +640,9 @@ def add_supporting_organization_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + # Create or get the relationship relationship, created = UseCaseOrganizationRelationship.objects.get_or_create( usecase=use_case, @@ -621,6 +695,9 @@ def add_partner_organization_to_use_case( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + # Create or get the relationship relationship, created = UseCaseOrganizationRelationship.objects.get_or_create( usecase=use_case, @@ -691,6 +768,9 @@ def update_usecase_organization_relationships( except UseCase.DoesNotExist: raise ValueError(f"UseCase with ID {use_case_id} does not exist.") + if use_case.status != UseCaseStatus.DRAFT: + raise ValueError(f"UseCase with ID {use_case_id} is not in draft status.") + # Clear existing relationships UseCaseOrganizationRelationship.objects.filter(usecase=use_case).delete() diff --git a/api/types/type_organization.py b/api/types/type_organization.py index be51d46..d0c3e77 100644 --- a/api/types/type_organization.py +++ b/api/types/type_organization.py @@ -2,6 +2,7 @@ import strawberry import strawberry_django +from django.db.models import Q from strawberry import Info, auto from api.models import Organization @@ -57,9 +58,9 @@ def published_use_cases_count(self, info: Info) -> int: return 0 use_cases = UseCase.objects.filter( - usecaseorganizationrelationship__organization_id=org_id, # type: ignore + (Q(organization__id=org_id) | Q(usecaseorganizationrelationship__organization_id=org_id)), # type: ignore status=UseCaseStatus.PUBLISHED.value, - ) + ).distinct() return use_cases.count() except Exception: return 0 diff --git a/api/types/type_usecase.py b/api/types/type_usecase.py index 32d3c92..5242da2 100644 --- a/api/types/type_usecase.py +++ b/api/types/type_usecase.py @@ -58,6 +58,9 @@ class TypeUseCase(BaseType): organization: Optional[TypeOrganization] = strawberry.field( description="Organization associated with this use case" ) + platform_url: Optional[str] = strawberry.field( + description="URL of the platform where this use case is published" + ) @strawberry.field( description="Check if this use case is created by an individual user." diff --git a/api/utils/data_indexing.py b/api/utils/data_indexing.py index 3f2af76..679a2c8 100644 --- a/api/utils/data_indexing.py +++ b/api/utils/data_indexing.py @@ -163,12 +163,7 @@ def index_resource_data(resource: Resource) -> Optional[ResourceDataTable]: if col in existing_schemas: existing_description = existing_schemas[col]["description"] # Check for None and non-auto-generated descriptions - if ( - existing_description is not None - and not existing_description.startswith( - "Description of column" - ) - ): + if existing_description is not None: description = existing_description logger.info( f"Preserved custom description for column {col}" diff --git a/api/views/paginated_elastic_view.py b/api/views/paginated_elastic_view.py index a44a556..9305175 100644 --- a/api/views/paginated_elastic_view.py +++ b/api/views/paginated_elastic_view.py @@ -91,7 +91,7 @@ def get(self, request: HttpRequest) -> Response: aggregations.pop("metadata") for agg in metadata_aggregations: label: str = agg["key"]["metadata_label"] - value: str = agg["key"]["metadata_value"] + value: str = agg["key"].get("metadata_value", "") if label not in aggregations: aggregations[label] = {} aggregations[label][value] = agg["doc_count"] diff --git a/api/views/search_dataset.py b/api/views/search_dataset.py index 9e8d269..a1231c8 100644 --- a/api/views/search_dataset.py +++ b/api/views/search_dataset.py @@ -169,7 +169,12 @@ def add_aggregations(self, search: Search) -> Search: metadata_bucket = search.aggs.bucket("metadata", "nested", path="metadata") composite_sources = [ - {"metadata": {"terms": {"field": "metadata.label.keyword"}}} + { + "metadata_label": { + "terms": {"field": "metadata.metadata_item.label"} + } + }, + {"metadata_value": {"terms": {"field": "metadata.value"}}}, ] composite_agg = A( "composite", diff --git a/authorization/schema/mutation.py b/authorization/schema/mutation.py index 8810370..52b0f73 100644 --- a/authorization/schema/mutation.py +++ b/authorization/schema/mutation.py @@ -7,7 +7,12 @@ import structlog from strawberry.types import Info -from api.models import Dataset, Organization +from api.models import Dataset +from api.schema.base_mutation import ( + BaseMutation, + DjangoValidationError, + MutationResponse, +) from api.utils.graphql_telemetry import trace_resolver from authorization.models import OrganizationMembership, Role, User from authorization.permissions import IsAuthenticated @@ -95,37 +100,51 @@ def update_user(self, info: Info, input: UpdateUserInput) -> TypeUser: return TypeUser.from_django(user) - @strawberry_django.mutation( + @strawberry.mutation + @BaseMutation.mutation( permission_classes=[IsAuthenticated, HasOrganizationAdminRole], - ) - @trace_resolver( - name="add_user_to_organization", - attributes={"component": "user", "operation": "mutation"}, + trace_name="add_user_to_organization", + trace_attributes={"component": "user", "operation": "mutation"}, + track_activity={ + "verb": "added", + "get_data": lambda result, **kwargs: { + "user_id": str(result.user.id), + "organization_id": str(result.organization.id), + "role_id": str(result.role.id), + }, + }, ) def add_user_to_organization( self, info: Info, input: AddRemoveUserToOrganizationInput - ) -> TypeOrganizationMembership: + ) -> MutationResponse[TypeOrganizationMembership]: """Add a user to an organization with a specific role.""" try: user = User.objects.get(id=input.user_id) organization = info.context.context.get("organization") role = Role.objects.get(id=input.role_id) + # If user trying to change self role, should raise error + if user.id == info.context.user.id: + raise DjangoValidationError("You cannot change your own role.") + # Check if the membership already exists membership, created = OrganizationMembership.objects.get_or_create( user=user, organization=organization, defaults={"role": role} ) - # If the membership exists but the role is different, update it - if not created and membership.role != role: - membership.role = role - membership.save() + # If the membership exists, raise error + if not created: + raise DjangoValidationError( + "User is already a member of this organization." + ) - return TypeOrganizationMembership.from_django(membership) + return MutationResponse.success_response( + TypeOrganizationMembership.from_django(membership) + ) except User.DoesNotExist: - raise ValueError(f"User with ID {input.user_id} does not exist.") + raise DjangoValidationError(f"User with ID {input.user_id} does not exist.") except Role.DoesNotExist: - raise ValueError(f"Role with ID {input.role_id} does not exist.") + raise DjangoValidationError(f"Role with ID {input.role_id} does not exist.") @strawberry.mutation def assign_organization_role( diff --git a/aws/cloudformation/dataspace-infrastructure.yml b/aws/cloudformation/dataspace-infrastructure.yml new file mode 100644 index 0000000..08a4256 --- /dev/null +++ b/aws/cloudformation/dataspace-infrastructure.yml @@ -0,0 +1,382 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'DataSpace Application Infrastructure' + +Parameters: + Environment: + Description: Environment name (dev, staging, prod) + Type: String + Default: dev + AllowedValues: + - dev + - staging + - prod + + VpcId: + Description: ID of the VPC + Type: AWS::EC2::VPC::Id + + SubnetIds: + Description: List of subnet IDs for the application + Type: List + + DBUsername: + Description: Database username + Type: String + NoEcho: true + + DBPassword: + Description: Database password + Type: String + NoEcho: true + + DBName: + Description: Database name + Type: String + Default: dataspace + + DBInstanceClass: + Description: Database instance class + Type: String + Default: db.t3.small + + ElasticsearchInstanceType: + Description: Elasticsearch instance type + Type: String + Default: t3.small.elasticsearch + + ElasticsearchPassword: + Description: Elasticsearch password + Type: String + NoEcho: true + Default: changeme + + RedisNodeType: + Description: Redis node type + Type: String + Default: cache.t3.small + + DjangoSecretKey: + Description: Django secret key + Type: String + NoEcho: true + +Resources: + # Security Groups + DatabaseSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for RDS database + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 5432 + ToPort: 5432 + SourceSecurityGroupId: !Ref ECSSecurityGroup + + ECSSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for ECS tasks + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 8000 + ToPort: 8000 + CidrIp: 0.0.0.0/0 + + ElasticsearchSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for Elasticsearch + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 443 + ToPort: 443 + SourceSecurityGroupId: !Ref ECSSecurityGroup + + RedisSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Security group for Redis + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 6379 + ToPort: 6379 + SourceSecurityGroupId: !Ref ECSSecurityGroup + + # Database + DatabaseSubnetGroup: + Type: AWS::RDS::DBSubnetGroup + Properties: + DBSubnetGroupDescription: Subnet group for DataSpace database + SubnetIds: !Ref SubnetIds + + Database: + Type: AWS::RDS::DBInstance + Properties: + AllocatedStorage: 30 + DBInstanceClass: !Ref DBInstanceClass + Engine: postgres + EngineVersion: '17.4' + MasterUsername: !Ref DBUsername + MasterUserPassword: !Ref DBPassword + DBName: !Ref DBName + VPCSecurityGroups: + - !GetAtt DatabaseSecurityGroup.GroupId + DBSubnetGroupName: !Ref DatabaseSubnetGroup + MultiAZ: false + StorageType: gp2 + Tags: + - Key: Name + Value: !Sub dataspace-${Environment}-db + DeletionPolicy: Snapshot + + # Elasticsearch Domain + ElasticsearchDomain: + Type: AWS::Elasticsearch::Domain + Properties: + DomainName: !Sub dataspace-${Environment} + ElasticsearchVersion: '7.10' + ElasticsearchClusterConfig: + InstanceType: !Ref ElasticsearchInstanceType + InstanceCount: 1 + EBSOptions: + EBSEnabled: true + VolumeType: gp2 + VolumeSize: 10 + AccessPolicies: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' + Action: 'es:*' + Resource: !Sub 'arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/dataspace-${Environment}/*' + AdvancedSecurityOptions: + Enabled: true + InternalUserDatabaseEnabled: true + MasterUserOptions: + MasterUserName: elastic + MasterUserPassword: !Ref ElasticsearchPassword + EncryptionAtRestOptions: + Enabled: true + NodeToNodeEncryptionOptions: + Enabled: true + DomainEndpointOptions: + EnforceHTTPS: true + VPCOptions: + SecurityGroupIds: + - !GetAtt ElasticsearchSecurityGroup.GroupId + SubnetIds: + - !Select [0, !Ref SubnetIds] + # Note: AWS::Elasticsearch::Domain does not support DeletionPolicy: Snapshot + + # Redis Cache + RedisSubnetGroup: + Type: AWS::ElastiCache::SubnetGroup + Properties: + Description: Subnet group for DataSpace Redis + SubnetIds: !Ref SubnetIds + + RedisCluster: + Type: AWS::ElastiCache::CacheCluster + Properties: + CacheNodeType: !Ref RedisNodeType + Engine: redis + NumCacheNodes: 1 + VpcSecurityGroupIds: + - !GetAtt RedisSecurityGroup.GroupId + CacheSubnetGroupName: !Ref RedisSubnetGroup + Tags: + - Key: Name + Value: !Sub dataspace-${Environment}-redis + + # ECS Cluster + ECSCluster: + Type: AWS::ECS::Cluster + Properties: + ClusterName: !Sub dataspace-${Environment}-cluster + CapacityProviders: + - FARGATE + - FARGATE_SPOT + DefaultCapacityProviderStrategy: + - CapacityProvider: FARGATE + Weight: 1 + Tags: + - Key: Name + Value: !Sub dataspace-${Environment}-cluster + + # IAM Roles + ECSTaskExecutionRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: ecs-tasks.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy + Policies: + - PolicyName: SSMParameterAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - ssm:GetParameters + - ssm:GetParameter + Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/dataspace/*' + + # SSM Parameters + DBHostParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_HOST + Type: String + Value: !GetAtt Database.Endpoint.Address + Description: Database host + + DBNameParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_NAME + Type: String + Value: !Ref DBName + Description: Database name + + DBUserParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_USER + Type: String + Value: !Ref DBUsername + Description: Database username + + DBPasswordParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/DB_PASSWORD + Type: String + Value: !Ref DBPassword + Description: Database password + Tier: Standard + + ElasticsearchIndexParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/ELASTICSEARCH_INDEX + Type: String + Value: !GetAtt ElasticsearchDomain.DomainEndpoint + Description: Elasticsearch endpoint + + ElasticsearchUsernameParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/ELASTICSEARCH_USERNAME + Type: String + Value: elastic + Description: Elasticsearch username + + ElasticsearchPasswordParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/ELASTICSEARCH_PASS + Type: String + Value: !Ref ElasticsearchPassword + Description: Elasticsearch password + Tier: Standard + + RedisHostParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/REDIS_HOST + Type: String + Value: !GetAtt RedisCluster.RedisEndpoint.Address + Description: Redis host + + SecretKeyParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/SECRET_KEY + Type: String + Value: !Ref DjangoSecretKey + Description: Django secret key + Tier: Standard + + URLWhitelistParameter: + Type: AWS::SSM::Parameter + Properties: + Name: /dataspace/URL_WHITELIST + Type: String + Value: !Sub 'https://dataspace-${Environment}.yourdomain.com' + Description: URL whitelist + + MigrationsFileSystem: + Type: AWS::EFS::FileSystem + Properties: + PerformanceMode: generalPurpose + Encrypted: true + FileSystemTags: + - Key: Name + Value: {"Fn::Sub": "${AWS::StackName}-migrations"} + + MigrationsAccessPoint: + Type: AWS::EFS::AccessPoint + Properties: + FileSystemId: {"Ref": "MigrationsFileSystem"} + PosixUser: + Uid: "1000" + Gid: "1000" + RootDirectory: + Path: "/migrations" + CreationInfo: + OwnerUid: "1000" + OwnerGid: "1000" + Permissions: "755" + + # Create EFS mount targets in the first subnet + MigrationsFileSystemMountTarget: + Type: AWS::EFS::MountTarget + Properties: + FileSystemId: !Ref MigrationsFileSystem + SubnetId: !Select [0, !Ref SubnetIds] + SecurityGroups: + - !Ref ECSSecurityGroup + +Outputs: + ClusterName: + Description: ECS Cluster Name + Value: !Ref ECSCluster + + DatabaseEndpoint: + Description: Database endpoint + Value: !GetAtt Database.Endpoint.Address + + ElasticsearchEndpoint: + Description: Elasticsearch endpoint + Value: !GetAtt ElasticsearchDomain.DomainEndpoint + + RedisEndpoint: + Description: Redis endpoint + Value: {"Fn::GetAtt": ["RedisCluster", "RedisEndpoint.Address"]} + + TaskExecutionRoleArn: + Description: ECS Task Execution Role ARN + Value: {"Fn::GetAtt": ["ECSTaskExecutionRole", "Arn"]} + Export: + Name: {"Fn::Sub": "${AWS::StackName}-ECSTaskExecutionRoleArn"} + + MigrationsFileSystemId: + Description: EFS File System ID for migrations + Value: {"Ref": "MigrationsFileSystem"} + + MigrationsAccessPointId: + Description: EFS Access Point ID for migrations + Value: {"Ref": "MigrationsAccessPoint"} + Export: + Name: {"Fn::Sub": "${AWS::StackName}-MigrationsFileSystemId"} diff --git a/aws/multi-service-architecture.md b/aws/multi-service-architecture.md new file mode 100644 index 0000000..6bfc34c --- /dev/null +++ b/aws/multi-service-architecture.md @@ -0,0 +1,249 @@ +# DataSpace Multi-Service Architecture for AWS ECS + +This document outlines the architecture for deploying the DataSpace application and its dependent services to AWS ECS. + +## Architecture Overview + +The DataSpace application consists of several services that work together: + +1. **Backend Application (dataspace)** - The main Django/Python application +2. **PostgreSQL Database (backend_db)** - Database for the application +3. **Elasticsearch** - For search functionality +4. **Redis** - For caching and possibly message queuing +5. **Telemetry Services** - Including OpenTelemetry Collector + +## AWS Services Mapping + +For production deployment on AWS, we use the following mapping: + +| Local Service | AWS Service | Justification | +|---------------|-------------|---------------| +| dataspace (backend) | ECS Fargate | Containerized application, managed by ECS | +| backend_db | Amazon RDS for PostgreSQL | Managed database service with backups, high availability | +| elasticsearch | Amazon Elasticsearch Service | Managed Elasticsearch with scaling and security | +| redis | Amazon ElastiCache for Redis | Managed Redis with high availability | +| otel-collector | ECS Fargate (separate task) | Deployed as a separate container service | + +## Deployment Architecture + +### Infrastructure as Code + +All AWS resources are provisioned using CloudFormation templates located in `aws/cloudformation/`. The main template `dataspace-infrastructure.yml` creates: + +1. **Security Groups** - For RDS, Elasticsearch, Redis, and ECS services +2. **Amazon RDS PostgreSQL** - Managed database with subnet group +3. **Amazon Elasticsearch Service** - Managed Elasticsearch domain with security and access policies +4. **Amazon ElastiCache Redis** - Managed Redis cluster +5. **ECS Cluster** - With Fargate and Fargate Spot capacity providers +6. **IAM Roles** - For ECS task execution with appropriate permissions +7. **SSM Parameters** - For storing sensitive connection information + +### ECS Task Definitions + +The application is deployed using two main ECS task definitions: + +1. **Main Application (`aws/task-definition.json`)** - Deploys the Django application container with: + - Environment variables for configuration + - Secrets from SSM Parameter Store for sensitive data + - Health checks and logging configuration + - Network configuration for service discovery + +2. **OpenTelemetry Collector (`aws/otel-collector-task-definition.json`)** - Deploys the telemetry collector with: + - Port mappings for various telemetry protocols + - Volume mounts for configuration + - Health checks and logging + +### Managed Services Integration + +#### Amazon RDS PostgreSQL + +The PostgreSQL database is provisioned as a managed RDS instance with: + +- Automated backups +- Security group restrictions (only accessible from ECS tasks) +- Credentials stored in SSM Parameter Store +- Connection information injected into the application container as environment variables + +#### Amazon Elasticsearch Service + +Elasticsearch is provisioned as a managed service with: + +- Fine-grained access control +- HTTPS encryption +- Security group restrictions +- Connection information stored in SSM Parameter Store + +#### Amazon ElastiCache Redis + +Redis is provisioned as a managed ElastiCache cluster with: + +- Security group restrictions +- Connection information stored in SSM Parameter Store +- Host and port injected into the application container + +## CI/CD Pipeline + +The deployment is automated using GitHub Actions workflow (`.github/workflows/deploy-to-ecs.yml`) that: + +1. **Triggers** on pushes to the `dev` branch or manual workflow dispatch +2. **Deploys Infrastructure** using CloudFormation (conditionally based on changes) +3. **Builds and Pushes** Docker images to Amazon ECR +4. **Deploys Application** using ECS task definitions with environment variable substitution +5. **Deploys OpenTelemetry Collector** as a separate ECS service + +### Idempotent Infrastructure Creation + +The CloudFormation template is designed to be idempotent by: + +1. Using the `--no-fail-on-empty-changeset` flag in CloudFormation deployment +2. Setting appropriate `DeletionPolicy` and `UpdateReplacePolicy` attributes on resources +3. Using conditional resource creation based on environment parameters + +This ensures that: +- If resources already exist, they won't be recreated unnecessarily +- Database and Elasticsearch data is preserved during updates +- Application code can be updated independently of infrastructure + +## Environment Variables and Secrets Management + +The deployment uses a three-tier approach to configuration: + +1. **GitHub Repository Secrets** - For AWS credentials and sensitive parameters +2. **Environment Variables** - For non-sensitive configuration in CI/CD and ECS tasks +3. **AWS SSM Parameter Store** - For service connection information and secrets + +### Required GitHub Secrets + +- `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` - AWS credentials +- `AWS_REGION` - Target AWS region +- `ECR_REPOSITORY` - ECR repository name +- `ECS_CLUSTER` - ECS cluster name +- `ECS_SERVICE` - Main application ECS service name +- `ECS_OTEL_SERVICE` - OpenTelemetry collector ECS service name +- `ECS_EXECUTION_ROLE_ARN` - ECS task execution role ARN +- `VPC_ID` and `SUBNET_IDS` - VPC and subnet IDs +- `DB_USERNAME`, `DB_PASSWORD`, `DB_NAME` - Database credentials +- `ELASTICSEARCH_PASSWORD` - Elasticsearch password +- `DJANGO_SECRET_KEY` - Django secret key +- `ENVIRONMENT` - Deployment environment (dev, staging, prod) + +## Scaling and High Availability + +The architecture supports scaling and high availability through: + +1. **ECS Fargate** - Automatic scaling based on CPU/memory usage +2. **RDS Multi-AZ** - Optional database high availability +3. **ElastiCache Replication** - Optional Redis replication +4. **Elasticsearch Multi-Node** - Optional Elasticsearch cluster scaling + +## Monitoring and Observability + +The deployment includes observability through: + +1. **CloudWatch Logs** - For all ECS services +2. **OpenTelemetry Collector** - For metrics, traces, and logs collection +3. **Health Checks** - For all services to ensure availability + +## Security Considerations + +The deployment implements security best practices: + +1. **IAM Least Privilege** - Task execution role with minimal permissions +2. **Security Groups** - Restrict access between services +3. **Secrets Management** - Sensitive data in SSM Parameter Store +4. **Network Isolation** - Services in private subnets where appropriate +5. **HTTPS** - For all external communication + +This approach uses AWS managed services where possible and ECS only for custom application containers: + +- **Backend Application**: ECS Fargate Task/Service +- **Database**: Amazon RDS +- **Elasticsearch**: Amazon Elasticsearch Service +- **Redis**: Amazon ElastiCache +- **Telemetry**: Amazon Elasticsearch Service + ECS for collectors/agents + +### Option 2: ECS for Everything + +This approach deploys everything as containers in ECS: + +- **Backend Application**: ECS Fargate Task/Service +- **Database**: ECS Fargate Task with PostgreSQL container + EBS volume +- **Elasticsearch**: ECS Fargate Task with Elasticsearch container + EBS volume +- **Redis**: ECS Fargate Task with Redis container +- **Telemetry**: ECS Fargate Tasks for all telemetry services + +### Recommended Approach + +We recommend **Option 1** for production workloads because: + +1. Managed services handle backups, high availability, and security patches +2. Reduced operational overhead +3. Better scalability and reliability +4. Separation of concerns + +## Implementation Plan + +### 1. Create AWS Managed Services + +First, create the necessary managed services: + +- **RDS PostgreSQL Instance** +- **ElastiCache Redis Cluster** +- **Amazon Elasticsearch Service Domain(s)** + +### 2. Update Task Definition for Backend Application + +The task definition we've already created focuses on the backend application. It needs to be updated with connection information for the managed services. + +### 3. Create Task Definitions for Custom Services + +For services that don't have AWS managed equivalents (like otel-collector), create separate task definitions. + +### 4. Update CI/CD Pipeline + +Update the GitHub Actions workflow to: + +1. Deploy infrastructure changes if needed (using Terraform or CloudFormation) +2. Deploy application containers to ECS + +## Example: RDS Configuration + +```bash +# Create RDS instance +aws rds create-db-instance \ + --db-instance-identifier dataspace-db \ + --db-instance-class db.t3.small \ + --engine postgres \ + --master-username ${DB_USERNAME} \ + --master-user-password ${DB_PASSWORD} \ + --allocated-storage 20 +``` + +## Example: ElastiCache Configuration + +```bash +# Create ElastiCache cluster +aws elasticache create-cache-cluster \ + --cache-cluster-id dataspace-redis \ + --engine redis \ + --cache-node-type cache.t3.small \ + --num-cache-nodes 1 +``` + +## Example: Amazon Elasticsearch Service + +```bash +# Create Elasticsearch domain +aws es create-elasticsearch-domain \ + --domain-name dataspace-search \ + --elasticsearch-version 7.10 \ + --elasticsearch-cluster-config InstanceType=t3.small.elasticsearch,InstanceCount=1 \ + --ebs-options EBSEnabled=true,VolumeType=gp2,VolumeSize=10 +``` + +## Next Steps + +1. Create CloudFormation or Terraform templates for the infrastructure +2. Update the ECS task definition with connection information for managed services +3. Create separate task definitions for services that need to run in ECS +4. Update the CI/CD pipeline to deploy all components diff --git a/aws/otel-collector-task-definition.json b/aws/otel-collector-task-definition.json new file mode 100644 index 0000000..cc5e1d3 --- /dev/null +++ b/aws/otel-collector-task-definition.json @@ -0,0 +1,81 @@ +{ + "family": "${APP_NAME}-otel-collector", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "${APP_NAME}-otel-collector", + "image": "otel/opentelemetry-collector:latest", + "essential": true, + "portMappings": [ + { + "containerPort": 4317, + "hostPort": 4317, + "protocol": "tcp" + }, + { + "containerPort": 4318, + "hostPort": 4318, + "protocol": "tcp" + }, + { + "containerPort": 55680, + "hostPort": 55680, + "protocol": "tcp" + }, + { + "containerPort": 55681, + "hostPort": 55681, + "protocol": "tcp" + } + ], + "environment": [ + { "name": "ENVIRONMENT", "value": "${ENVIRONMENT}" } + ], + "mountPoints": [ + { + "sourceVolume": "otel-config", + "containerPath": "/etc/otel-collector-config.yml", + "readOnly": true + } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${APP_NAME}-otel-collector", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:13133/ || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "volumes": [ + { + "name": "otel-config", + "dockerVolumeConfiguration": { + "scope": "task", + "driver": "local", + "labels": { + "app": "${APP_NAME}", + "component": "otel-collector" + } + } + } + ], + "requiresCompatibilities": ["FARGATE"], + "cpu": "256", + "memory": "512", + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "${APP_NAME}" }, + { "key": "Component", "value": "otel-collector" }, + { "key": "ManagedBy", "value": "GitHub-Actions" } + ] +} diff --git a/aws/otel-collector-task-definition.json.template b/aws/otel-collector-task-definition.json.template new file mode 100644 index 0000000..966cdee --- /dev/null +++ b/aws/otel-collector-task-definition.json.template @@ -0,0 +1,67 @@ +{ + "family": "dataspace-otel-collector", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "${OTEL_CPU_UNITS}", + "memory": "${OTEL_MEMORY_UNITS}", + "containerDefinitions": [ + { + "name": "otel-collector", + "image": "otel/opentelemetry-collector:${OTEL_VERSION}", + "essential": true, + "portMappings": [ + { + "containerPort": 4317, + "hostPort": 4317, + "protocol": "tcp" + }, + { + "containerPort": 4318, + "hostPort": 4318, + "protocol": "tcp" + }, + { + "containerPort": 8888, + "hostPort": 8888, + "protocol": "tcp" + }, + { + "containerPort": 8889, + "hostPort": 8889, + "protocol": "tcp" + } + ], + "environment": [ + { "name": "OTEL_RESOURCE_ATTRIBUTES", "value": "service.name=dataspace-telemetry,deployment.environment=${ENVIRONMENT}" }, + { "name": "OTEL_CONFIG", "value": "receivers:\n otlp:\n protocols:\n grpc:\n http:\n prometheus:\n config:\n scrape_configs:\n - job_name: 'otel-collector'\n scrape_interval: 10s\n static_configs:\n - targets: ['0.0.0.0:8888']\nexporters:\n logging:\n verbosity: detailed\n prometheus:\n endpoint: 0.0.0.0:8889\nservice:\n pipelines:\n traces:\n receivers: [otlp]\n exporters: [logging]\n metrics:\n receivers: [otlp, prometheus]\n exporters: [prometheus, logging]" }, + { "name": "OTEL_CONFIG_PATH", "value": "/etc/otel/config.yaml" } + ], + "command": [ + "--config=env:OTEL_CONFIG" + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/dataspace-otel", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs", + "awslogs-create-group": "true" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:8888/health || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "volumes": [], + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "dataspace-telemetry" }, + { "key": "ManagedBy", "value": "CloudFormation" } + ] +} diff --git a/aws/redis-task-definition.json b/aws/redis-task-definition.json new file mode 100644 index 0000000..2a823bd --- /dev/null +++ b/aws/redis-task-definition.json @@ -0,0 +1,53 @@ +{ + "family": "${APP_NAME}-redis", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "${APP_NAME}-redis", + "image": "redis:alpine", + "essential": true, + "portMappings": [ + { + "containerPort": 6379, + "hostPort": 6379, + "protocol": "tcp" + } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${APP_NAME}-redis", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "redis-cli ping | grep -q 'PONG'"], + "interval": 10, + "timeout": 5, + "retries": 5, + "startPeriod": 10 + }, + "mountPoints": [ + { + "sourceVolume": "redis-data", + "containerPath": "/data" + } + ] + } + ], + "volumes": [ + { + "name": "redis-data", + "efsVolumeConfiguration": { + "fileSystemId": "${EFS_ID}", + "rootDirectory": "/redis-data", + "transitEncryption": "ENABLED" + } + } + ], + "requiresCompatibilities": ["FARGATE"], + "cpu": "512", + "memory": "1024" +} diff --git a/aws/task-definition.json b/aws/task-definition.json new file mode 100644 index 0000000..e8b3539 --- /dev/null +++ b/aws/task-definition.json @@ -0,0 +1,61 @@ +{ + "family": "${APP_NAME}", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "containerDefinitions": [ + { + "name": "${APP_NAME}", + "image": "${ECR_REPOSITORY_URI}:${IMAGE_TAG}", + "essential": true, + "portMappings": [ + { + "containerPort": "${APP_PORT}", + "hostPort": "${APP_PORT}", + "protocol": "tcp" + } + ], + "environment": [ + { "name": "DB_ENGINE", "value": "${DB_ENGINE}" }, + { "name": "DB_PORT", "value": "${DB_PORT}" }, + { "name": "DEBUG", "value": "${DEBUG_MODE}" }, + { "name": "TELEMETRY_URL", "value": "${TELEMETRY_URL}" }, + { "name": "REDIS_PORT", "value": "6379" } + ], + "secrets": [ + { "name": "DB_HOST", "valueFrom": "${SSM_PATH_PREFIX}/DB_HOST" }, + { "name": "DB_NAME", "valueFrom": "${SSM_PATH_PREFIX}/DB_NAME" }, + { "name": "DB_USER", "valueFrom": "${SSM_PATH_PREFIX}/DB_USER" }, + { "name": "DB_PASSWORD", "valueFrom": "${SSM_PATH_PREFIX}/DB_PASSWORD" }, + { "name": "SECRET_KEY", "valueFrom": "${SSM_PATH_PREFIX}/SECRET_KEY" }, + { "name": "ELASTICSEARCH_INDEX", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_INDEX" }, + { "name": "ELASTICSEARCH_USERNAME", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_USERNAME" }, + { "name": "ELASTICSEARCH_PASS", "valueFrom": "${SSM_PATH_PREFIX}/ELASTICSEARCH_PASS" }, + { "name": "URL_WHITELIST", "valueFrom": "${SSM_PATH_PREFIX}/URL_WHITELIST" }, + { "name": "REDIS_HOST", "valueFrom": "${SSM_PATH_PREFIX}/REDIS_HOST" } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/${APP_NAME}", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:${APP_PORT}/health/ || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "requiresCompatibilities": ["FARGATE"], + "cpu": "${CPU_UNITS}", + "memory": "${MEMORY_UNITS}", + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "${APP_NAME}" }, + { "key": "ManagedBy", "value": "GitHub-Actions" } + ] +} diff --git a/aws/task-definition.json.template b/aws/task-definition.json.template new file mode 100644 index 0000000..544bff8 --- /dev/null +++ b/aws/task-definition.json.template @@ -0,0 +1,75 @@ +{ + "family": "dataspace", + "executionRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "taskRoleArn": "${ECS_EXECUTION_ROLE_ARN}", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "${CPU_UNITS}", + "memory": "${MEMORY_UNITS}", + "volumes": [ + { + "name": "migrations-volume" + } + ], + "containerDefinitions": [ + { + "name": "dataspace", + "image": "${ECR_REPOSITORY}:${IMAGE_TAG}", + "essential": true, + "portMappings": [ + { + "containerPort": ${APP_PORT}, + "hostPort": ${APP_PORT}, + "protocol": "tcp" + } + ], + "mountPoints": [ + { + "sourceVolume": "migrations-volume", + "containerPath": "/code/api/migrations", + "readOnly": false + } + ], + "environment": [ + { "name": "DEBUG", "value": "${DEBUG_MODE}" }, + { "name": "APP_PORT", "value": "${APP_PORT}" }, + { "name": "DB_ENGINE", "value": "${DB_ENGINE}" }, + { "name": "DB_PORT", "value": "${DB_PORT}" }, + { "name": "TELEMETRY_URL", "value": "${TELEMETRY_URL}" } + ], + "secrets": [ + { "name": "DB_HOST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_HOST" }, + { "name": "DB_NAME", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_NAME" }, + { "name": "DB_USER", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_USER" }, + { "name": "DB_PASSWORD", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/DB_PASSWORD" }, + { "name": "ELASTICSEARCH_INDEX", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_INDEX" }, + { "name": "ELASTICSEARCH_USERNAME", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_USERNAME" }, + { "name": "ELASTICSEARCH_PASS", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/ELASTICSEARCH_PASS" }, + { "name": "REDIS_HOST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/REDIS_HOST" }, + { "name": "SECRET_KEY", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/SECRET_KEY" }, + { "name": "URL_WHITELIST", "valueFrom": "arn:aws:ssm:${AWS_REGION}:${AWS_ACCOUNT_ID}:parameter/dataspace/URL_WHITELIST" } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/dataspace", + "awslogs-region": "${AWS_REGION}", + "awslogs-stream-prefix": "ecs", + "awslogs-create-group": "true" + } + }, + "healthCheck": { + "command": ["CMD-SHELL", "/code/healthcheck.sh && curl -f http://localhost:${APP_PORT}/health/ || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "tags": [ + { "key": "Environment", "value": "${ENVIRONMENT}" }, + { "key": "Application", "value": "dataspace" }, + { "key": "ManagedBy", "value": "CloudFormation" } + ] +} diff --git a/docker-compose.yml b/docker-compose.yml index 3fc5191..607d99b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,6 +22,10 @@ services: timeout: 10s retries: 3 start_period: 40s + logging: + options: + max-size: "10m" + max-file: "3" backend_db: image: "postgres:14.4" @@ -42,6 +46,10 @@ services: timeout: 5s retries: 5 start_period: 10s + logging: + options: + max-size: "10m" + max-file: "3" elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2 @@ -74,6 +82,10 @@ services: timeout: 10s retries: 3 start_period: 40s + logging: + options: + max-size: "10m" + max-file: "3" redis: image: "redis:alpine" @@ -90,6 +102,10 @@ services: timeout: 5s retries: 5 start_period: 10s + logging: + options: + max-size: "10m" + max-file: "3" telemetry_elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2 @@ -116,6 +132,10 @@ services: test: curl -s http://localhost:9200/_cluster/health | grep -vq '"status":"red"' + logging: + options: + max-size: "10m" + max-file: "3" kibana: image: docker.elastic.co/kibana/kibana:7.16.2 @@ -171,6 +191,10 @@ services: test: curl --write-out 'HTTP %{http_code}' --fail --silent --output /dev/null http://localhost:8200/ + logging: + options: + max-size: "10m" + max-file: "3" otel-collector: image: otel/opentelemetry-collector:latest @@ -184,6 +208,10 @@ services: condition: service_healthy ports: - 4317:4317 + logging: + options: + max-size: "10m" + max-file: "3" volumes: backend_db_data: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000..05d482c --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,70 @@ +#!/bin/bash +set -e + +# Wait for database to be ready +echo "Waiting for database..." +python << END +import sys +import time +import psycopg2 +import os + +# Get database connection details from environment variables +host = os.environ.get("DB_HOST", "localhost") +port = os.environ.get("DB_PORT", "5432") +dbname = os.environ.get("DB_NAME", "postgres") +user = os.environ.get("DB_USER", "postgres") +password = os.environ.get("DB_PASSWORD", "postgres") + +# Try to connect to the database +start_time = time.time() +timeout = 30 +while True: + try: + conn = psycopg2.connect( + host=host, + port=port, + dbname=dbname, + user=user, + password=password + ) + conn.close() + print("Database is ready!") + break + except psycopg2.OperationalError as e: + if time.time() - start_time > timeout: + print(f"Could not connect to database after {timeout} seconds: {e}") + sys.exit(1) + print("Waiting for database to be ready...") + time.sleep(2) +END + +# Ensure migrations directory exists with proper permissions +echo "Ensuring migrations directory exists..." +mkdir -p /code/api/migrations +chmod -R 777 /code/api/migrations +touch /code/api/migrations/__init__.py + +# Run makemigrations first to ensure migration files are created +echo "Running makemigrations..." +python manage.py makemigrations --noinput + +# Run migrations +echo "Running migrations..." +python manage.py migrate --noinput + +# Create superuser if needed +if [ "$DJANGO_SUPERUSER_USERNAME" ] && [ "$DJANGO_SUPERUSER_PASSWORD" ] && [ "$DJANGO_SUPERUSER_EMAIL" ]; then + echo "Creating superuser..." + python manage.py createsuperuser --noinput +fi + +# Collect static files +if [ "$COLLECT_STATIC" = "true" ]; then + echo "Collecting static files..." + python manage.py collectstatic --noinput +fi + +# Start server +echo "Starting server..." +exec "$@"