PersonalizeInfraStack
/personalize_cdk/personalize_infra_stack.py
import json
from aws_cdk import (
Stack,
RemovalPolicy,
CfnOutput,
aws_s3 as s3,
aws_iam as iam,
aws_personalize as personalize,
)
from constructs import Construct
class PersonalizeInfraStack(Stack):
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
super().__init__(scope, construct_id, **kwargs)
# ===================
# S3 Bucket for Data
# ===================
# The removal_policy=RemovalPolicy.DESTROY and auto_delete_objects=True properties ensure the bucket and its contents are deleted when you run cdk destroy, which is useful for development and testing.
data_bucket = s3.Bucket(
self, "PersonalizeDataBucket",
bucket_name=f"personalize-data-{self.account}-{self.region}",
removal_policy=RemovalPolicy.DESTROY,
auto_delete_objects=True,
versioned=True
)
# Bucket policy for Personalize
data_bucket.add_to_resource_policy(iam.PolicyStatement(
effect=iam.Effect.ALLOW,
principals=[iam.ServicePrincipal("personalize.amazonaws.com")],
actions=["s3:GetObject", "s3:ListBucket"],
resources=[
data_bucket.bucket_arn,
data_bucket.bucket_arn + "/*"
]
))
# ===================
# IAM Role for Personalize
# ===================
personalize_role = iam.Role(
self, "PersonalizeRole",
assumed_by=iam.ServicePrincipal("personalize.amazonaws.com"),
description="Role for Amazon Personalize to access S3"
)
# Add Personalize permissions
personalize_role.add_to_policy(
iam.PolicyStatement(
effect=iam.Effect.ALLOW,
actions=[
"s3:GetObject",
"s3:ListBucket"
],
resources=[
data_bucket.bucket_arn,
data_bucket.bucket_arn + "/*"
]
)
)
# ===================
# Schema
# ===================
ecommerce_schema = {
"type": "record",
"name": "Interactions",
"namespace": "com.amazonaws.personalize.schema",
"fields": [
{"name": "USER_ID", "type": "string"},
{"name": "ITEM_ID", "type": "string"},
{"name": "TIMESTAMP", "type": "long"},
{"name": "EVENT_TYPE", "type": "string"}
],
"version": "1.0"
}
interactions_schema = personalize.CfnSchema(
self, "InteractionsSchema",
name="cdk-interactions-schema",
schema=json.dumps(ecommerce_schema),
domain="ECOMMERCE"
)
# ===================
# Dataset Group
# ===================
dataset_group = personalize.CfnDatasetGroup(
self, "DatasetGroup",
name="cdk-recommendation-dataset-group",
domain="ECOMMERCE" # or "VIDEO_ON_DEMAND" or "CUSTOM"
)
# ===================
# Dataset
# ===================
interactions_dataset = personalize.CfnDataset(
self, "InteractionsDataset",
dataset_group_arn=dataset_group.attr_dataset_group_arn,
dataset_type="Interactions",
name="cdk-interactions-dataset",
schema_arn=interactions_schema.attr_schema_arn
)
interactions_dataset.add_dependency(dataset_group)
interactions_dataset.add_dependency(interactions_schema)
# ===================
# Outputs
# ===================
CfnOutput(self, "BucketName", value=data_bucket.bucket_name)
CfnOutput(self, "RoleArn", value=personalize_role.role_arn)
CfnOutput(self, "DatasetGroupArn", value=dataset_group.attr_dataset_group_arn)
CfnOutput(self, "SchemaArn", value=interactions_schema.attr_schema_arn)
CfnOutput(self, "DatasetArn", value=interactions_dataset.attr_dataset_arn)
# Store for use in pipeline
self.data_bucket = data_bucket
self.personalize_role = personalize_role
self.dataset_group = dataset_group
self.interactions_dataset = interactions_dataset