diff --git a/bin/single-node/.env b/bin/single-node/.env index 2c949de0fbe..dafec10cccc 100644 --- a/bin/single-node/.env +++ b/bin/single-node/.env @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# Public host and ports exposed by the deployment TEXERA_HOST=http://localhost TEXERA_PORT=8080 MINIO_PORT=9000 @@ -27,37 +28,58 @@ TEXERA_SERVICE_LOG_LEVEL=INFO IMAGE_REGISTRY=ghcr.io/apache IMAGE_TAG=latest +# Admin credentials for Texera +USER_SYS_ADMIN_USERNAME=texera +USER_SYS_ADMIN_PASSWORD=texera + +# Postgres root credentials POSTGRES_USER=texera POSTGRES_PASSWORD=password -MINIO_ROOT_USER=texera_minio -MINIO_ROOT_PASSWORD=password +# S3 (MinIO) credentials +STORAGE_S3_AUTH_USERNAME=texera_minio +STORAGE_S3_AUTH_PASSWORD=password +# LakeFS server configuration LAKEFS_INSTALLATION_USER_NAME=texera-admin LAKEFS_INSTALLATION_ACCESS_KEY_ID=AKIAIOSFOLKFSSAMPLES LAKEFS_INSTALLATION_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY LAKEFS_BLOCKSTORE_TYPE=s3 LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://texera-minio:9000 -LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=texera_minio -LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=password LAKEFS_AUTH_ENCRYPT_SECRET_KEY=random_string_for_lakefs LAKEFS_LOGGING_LEVEL=INFO LAKEFS_STATS_ENABLED=1 LAKEFS_DATABASE_TYPE=postgres LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://texera:password@texera-postgres:5432/texera_lakefs?sslmode=disable +# Lakekeeper server configuration +LAKEKEEPER__PG_DATABASE_URL_READ=postgres://texera:password@texera-postgres:5432/texera_lakekeeper +LAKEKEEPER__PG_DATABASE_URL_WRITE=postgres://texera:password@texera-postgres:5432/texera_lakekeeper +LAKEKEEPER__PG_ENCRYPTION_KEY=texera_key +LAKEKEEPER_BASE_URI=http://texera-lakekeeper:8181 + +# Texera storage endpoints STORAGE_S3_ENDPOINT=http://texera-minio:9000 +STORAGE_S3_REGION=us-west-2 STORAGE_LAKEFS_ENDPOINT=http://texera-lakefs:8000/api/v1 STORAGE_JDBC_URL=jdbc:postgresql://texera-postgres:5432/texera_db?currentSchema=texera_db,public STORAGE_JDBC_USERNAME=texera STORAGE_JDBC_PASSWORD=password -FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT=http://file-service:9092/api/dataset/presign-download -FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT=http://file-service:9092/api/dataset/did/upload + +# Iceberg catalog selector (valid values: rest, postgres) +STORAGE_ICEBERG_CATALOG_TYPE=rest + +# Iceberg REST catalog client configuration +STORAGE_ICEBERG_CATALOG_REST_URI=http://texera-lakekeeper:8181/catalog +STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME=texera +STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET=texera-iceberg + +# Postgres-backed Iceberg catalog STORAGE_ICEBERG_CATALOG_POSTGRES_URI_WITHOUT_SCHEME=texera-postgres:5432/texera_iceberg_catalog STORAGE_ICEBERG_CATALOG_POSTGRES_USERNAME=texera STORAGE_ICEBERG_CATALOG_POSTGRES_PASSWORD=password -# Admin credentials for Texera (used for login and example data loading) -USER_SYS_ADMIN_USERNAME=texera -USER_SYS_ADMIN_PASSWORD=texera \ No newline at end of file +# File service endpoints +FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT=http://file-service:9092/api/dataset/presign-download +FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT=http://file-service:9092/api/dataset/did/upload diff --git a/bin/single-node/docker-compose.yml b/bin/single-node/docker-compose.yml index be9b3e30bd9..bfbccf44f4f 100644 --- a/bin/single-node/docker-compose.yml +++ b/bin/single-node/docker-compose.yml @@ -26,9 +26,37 @@ services: - "${MINIO_PORT:-9000}:9000" env_file: - .env + environment: + - MINIO_ROOT_USER=${STORAGE_S3_AUTH_USERNAME} + - MINIO_ROOT_PASSWORD=${STORAGE_S3_AUTH_PASSWORD} volumes: - minio_data:/data command: server --console-address ":9001" /data + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:9000/minio/health/live"] + interval: 5s + timeout: 3s + retries: 10 + + # One-shot init container that creates the Iceberg warehouse bucket on first + # startup. MinIO's server image has no built-in "create bucket on boot" flag, + # so this sidecar uses the official `mc` client to do it idempotently. + minio-init: + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: texera-minio-init + depends_on: + minio: + condition: service_healthy + env_file: + - .env + restart: "no" + entrypoint: ["/bin/sh", "-c"] + command: + - | + set -e + mc alias set local "$$STORAGE_S3_ENDPOINT" "$$STORAGE_S3_AUTH_USERNAME" "$$STORAGE_S3_AUTH_PASSWORD" + mc mb --ignore-existing "local/$$STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET" + echo "MinIO bucket '$$STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET' is ready." # PostgreSQL with PGroonga extension for full-text search. # Used by lakeFS and Texera's metadata storage. @@ -63,6 +91,8 @@ services: environment: # This port also need to be changed if the port of MinIO service is changed - LAKEFS_BLOCKSTORE_S3_PRE_SIGNED_ENDPOINT=${TEXERA_HOST}:${MINIO_PORT:-9000} + - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=${STORAGE_S3_AUTH_USERNAME} + - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=${STORAGE_S3_AUTH_PASSWORD} entrypoint: ["/bin/sh", "-c"] command: - | @@ -75,6 +105,132 @@ services: timeout: 5s retries: 10 + # Lakekeeper migration init container + # This runs once to migrate the database before the lakekeeper server starts + lakekeeper-migrate: + image: vakamo/lakekeeper:v0.11.0 + container_name: texera-lakekeeper-migrate + depends_on: + postgres: + condition: service_healthy + env_file: + - .env + restart: "no" + entrypoint: ["/home/nonroot/lakekeeper"] + command: ["migrate"] + + # Lakekeeper is the Iceberg REST catalog service + lakekeeper: + image: vakamo/lakekeeper:v0.11.0 + container_name: texera-lakekeeper + restart: always + depends_on: + postgres: + condition: service_healthy + minio: + condition: service_started + lakekeeper-migrate: + condition: service_completed_successfully + env_file: + - .env + entrypoint: ["/home/nonroot/lakekeeper"] + command: ["serve"] + healthcheck: + test: ["CMD", "/home/nonroot/lakekeeper", "healthcheck"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 10s + + # One-shot init container that creates the Lakekeeper default project and + # the Iceberg warehouse pointing at the MinIO bucket prepared by minio-init. + lakekeeper-init: + image: alpine:3.19 + container_name: texera-lakekeeper-init + depends_on: + lakekeeper: + condition: service_healthy + minio-init: + condition: service_completed_successfully + env_file: + - .env + restart: "no" + entrypoint: [ "/bin/sh", "-c" ] + command: + - | + set -e + + echo "Installing dependencies..." + apk add --no-cache curl ca-certificates + + check_status() { + if [ "$$1" -ge 200 ] && [ "$$1" -lt 300 ]; then + echo "Created $$2 successfully (HTTP $$1)." + elif [ "$$1" -eq 409 ]; then + echo "$$2 already exists (HTTP 409). Treating as success." + else + echo "Failed to create $$2. HTTP Code: $$1" + echo "ERROR RESPONSE:" + if [ -f /tmp/response.txt ]; then cat /tmp/response.txt; fi + echo "" + exit 1 + fi + } + + echo "Step 1: Initializing Default Project..." + PROJECT_PAYLOAD='{"project-id": "00000000-0000-0000-0000-000000000000", "project-name": "default"}' + + PROJECT_CODE=$$(curl -s -o /tmp/response.txt -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -d "$$PROJECT_PAYLOAD" \ + "$$LAKEKEEPER_BASE_URI/management/v1/project" || echo "000") + + check_status "$$PROJECT_CODE" "Default Project" + + + echo "Step 2: Initializing Warehouse '$$STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME'..." + CREATE_PAYLOAD=$$(cat </dev/null; then + echo "Lakekeeper Warehouse already exists (storage profile overlap). Treating as success." + else + check_status "$$WAREHOUSE_CODE" "Lakekeeper Warehouse" + fi + + echo "Initialization sequence completed successfully!" + + # Part2: Specification of Texera's micro-services # FileService provides endpoints for Texera's dataset management file-service: @@ -166,6 +322,10 @@ services: depends_on: workflow-compiling-service: condition: service_started + lakekeeper: + condition: service_healthy + lakekeeper-init: + condition: service_completed_successfully env_file: - .env volumes: