diff --git a/.secrets.baseline b/.secrets.baseline index e0030466f1f..bd2a7f382c2 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -272,6 +272,29 @@ "line_number": 11 } ], + "examples/monitoring/monitoring-quickstart.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/monitoring/monitoring-quickstart.ipynb", + "hashed_secret": "8d921d6d629bc22832e5fae42dfc828b8ce5cf47", + "is_verified": false, + "line_number": 606 + }, + { + "type": "Base64 High Entropy String", + "filename": "examples/monitoring/monitoring-quickstart.ipynb", + "hashed_secret": "37b47d0b2461457e316f1b0be0eef0f9599d440d", + "is_verified": false, + "line_number": 780 + }, + { + "type": "Base64 High Entropy String", + "filename": "examples/monitoring/monitoring-quickstart.ipynb", + "hashed_secret": "be6715cc8d40a964c7bd1fd8eff5e840d61ad598", + "is_verified": false, + "line_number": 875 + } + ], "examples/online_store/milvus_tutorial/docker-compose.yml": [ { "type": "Secret Keyword", @@ -934,7 +957,7 @@ "filename": "infra/feast-operator/api/v1/featurestore_types.go", "hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c", "is_verified": false, - "line_number": 885 + "line_number": 889 } ], "infra/feast-operator/api/v1/zz_generated.deepcopy.go": [ @@ -943,21 +966,21 @@ "filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go", "hashed_secret": "f914fc9324de1bec1ad13dec94a8ea2ddb41fc87", "is_verified": false, - "line_number": 785 + "line_number": 810 }, { "type": "Secret Keyword", "filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go", "hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c", "is_verified": false, - "line_number": 846 + "line_number": 871 }, { "type": "Secret Keyword", "filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go", "hashed_secret": "c2028031c154bbe86fd69bef740855c74b927dcf", "is_verified": false, - "line_number": 1496 + "line_number": 1516 } ], "infra/feast-operator/api/v1alpha1/featurestore_types.go": [ @@ -1140,14 +1163,14 @@ "filename": "infra/feast-operator/internal/controller/services/repo_config.go", "hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c", "is_verified": false, - "line_number": 129 + "line_number": 133 }, { "type": "Secret Keyword", "filename": "infra/feast-operator/internal/controller/services/repo_config.go", "hashed_secret": "e2fb052132fd6a07a56af2013e0b62a1f510572c", "is_verified": false, - "line_number": 220 + "line_number": 224 } ], "infra/feast-operator/internal/controller/services/services.go": [ diff --git a/Makefile b/Makefile index 799bc9c42fc..1c150abad35 100644 --- a/Makefile +++ b/Makefile @@ -105,7 +105,10 @@ install-python-dependencies-minimal: ## Install minimal Python dependencies usin # Used in github actions/ci install-python-dependencies-ci: ## Install Python CI dependencies using uv pip sync # Create virtualenv if it doesn't exist - uv venv .venv + @if [ ! -d .venv ]; then \ + echo "Creating virtualenv..."; \ + uv venv .venv; \ + fi # Install CPU-only torch first to prevent CUDA dependency issues (Linux only) @if [ "$$(uname -s)" = "Linux" ]; then \ echo "Installing dependencies with torch CPU index for Linux..."; \ diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 1b0b0961d79..ab1d5a80e1b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -57,6 +57,7 @@ * [MCP - AI Agent Example](../examples/mcp_feature_store/README.md) * [Feast-Powered AI Agent](../examples/agent_feature_store/README.md) * [Demo Notebooks](tutorials/demo-notebooks.md) +* [Feature Quality Monitoring Quickstart](../examples/monitoring/monitoring-quickstart.ipynb) ## How-to Guides @@ -90,6 +91,7 @@ * [Adding or reusing tests](how-to-guides/adding-or-reusing-tests.md) * [Starting Feast servers in TLS(SSL) Mode](how-to-guides/starting-feast-servers-tls-mode.md) * [Importing Features from dbt](how-to-guides/dbt-integration.md) +* [Feature Quality Monitoring](how-to-guides/feature-monitoring.md) ## Reference diff --git a/docs/how-to-guides/feature-monitoring.md b/docs/how-to-guides/feature-monitoring.md new file mode 100644 index 00000000000..c79b955ed2e --- /dev/null +++ b/docs/how-to-guides/feature-monitoring.md @@ -0,0 +1,388 @@ +# Feature Quality Monitoring + +## Overview + +Feast's data quality monitoring system computes, stores, and serves statistical metrics for every registered feature. It gives you visibility into feature health — distributions, null rates, percentiles, histograms — across batch data and feature serving logs. + +This guide covers: + +1. [Prerequisites](#1-prerequisites) +2. [Auto-baseline on registration](#2-auto-baseline-on-registration) +3. [Scheduled monitoring with the CLI](#3-scheduled-monitoring-with-the-cli) +4. [Monitoring feature serving logs](#4-monitoring-feature-serving-logs) +5. [Reading metrics via REST API](#5-reading-metrics-via-rest-api) +6. [On-demand exploration (transient compute)](#6-on-demand-exploration) +7. [Integrating with orchestrators](#7-integrating-with-orchestrators) +8. [Supported backends](#8-supported-backends) + +## 1. Prerequisites + +Monitoring works with any supported offline store backend. No additional infrastructure or configuration is needed — monitoring tables are created automatically on first use. + +**Minimum setup:** + +- A Feast project with at least one feature view and a configured offline store +- Feast SDK installed (`pip install feast`) + +**For serving log monitoring:** + +- At least one feature service with `logging_config` set (see [step 4](#4-monitoring-feature-serving-logs)) + +## 2. Auto-baseline on registration + +When you run `feast apply` to register new features, Feast automatically queues baseline metric computation: + +```bash +$ feast apply +Applying changes... +Created feature view 'driver_stats' with 3 features + → Queued baseline metrics computation (DQM job: abc-123) +Done! +``` + +The baseline reads all available source data and stores the resulting statistics with `is_baseline=TRUE`. This serves as the reference distribution for future drift detection. + +Baseline computation is: +- **Non-blocking** — `feast apply` returns immediately; computation runs asynchronously +- **Idempotent** — only features without existing baselines are computed; re-running `feast apply` won't recompute existing baselines + +### Disabling auto-baseline + +To skip automatic baseline computation on `feast apply`, set the DQM config in `feature_store.yaml`: + +```yaml +dqm: + auto_baseline: false +``` + +When using the Feast operator, set this in the `FeatureStore` CR: + +```yaml +apiVersion: feast.dev/v1 +kind: FeatureStore +spec: + feastProject: my_project + dqm: + autoBaseline: false +``` + +## 3. Scheduled monitoring with the CLI + +### Auto mode (recommended for production) + +Schedule a single daily job that computes all granularities automatically: + +```bash +feast monitor run +``` + +This detects the latest event timestamp in the source data and computes metrics for 5 time windows: + +| Granularity | Window | +|-------------|--------| +| `daily` | Last 1 day | +| `weekly` | Last 7 days | +| `biweekly` | Last 14 days | +| `monthly` | Last 30 days | +| `quarterly` | Last 90 days | + +No date arguments needed. One scheduled job produces all granularities. + +### Targeting a specific feature view + +```bash +feast monitor run --feature-view driver_stats +``` + +### Explicit date range and granularity + +```bash +feast monitor run \ + --feature-view driver_stats \ + --start-date 2025-01-01 \ + --end-date 2025-01-07 \ + --granularity weekly +``` + +### Setting a manual baseline + +```bash +feast monitor run \ + --feature-view driver_stats \ + --start-date 2025-01-01 \ + --end-date 2025-03-31 \ + --granularity daily \ + --set-baseline +``` + +### CLI reference + +``` +Usage: feast monitor run [OPTIONS] + +Options: + -p, --project TEXT Feast project name (defaults to feature_store.yaml) + -v, --feature-view TEXT Feature view name (omit for all) + -f, --feature-name TEXT Feature name(s), repeatable (omit for all) + --start-date TEXT Start date YYYY-MM-DD (omit for auto-detect) + --end-date TEXT End date YYYY-MM-DD (omit for auto-detect) + -g, --granularity One of: daily, weekly, biweekly, monthly, quarterly + --set-baseline Mark this computation as baseline + --source-type One of: batch, log, all (default: batch) + --help Show this message and exit. +``` + +## 4. Monitoring feature serving logs + +If your feature services have logging configured, you can compute metrics from the actual features served to models in production. + +### Setting up feature service logging + +In your feature definitions: + +```python +from feast import FeatureService, LoggingConfig +from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import ( + PostgreSQLLoggingDestination, +) + +driver_service = FeatureService( + name="driver_service", + features=[driver_stats_fv], + logging_config=LoggingConfig( + destination=PostgreSQLLoggingDestination(table_name="feast_driver_logs"), + sample_rate=1.0, + ), +) +``` + +### Computing log metrics + +**Auto mode (all feature services with logging):** + +```bash +feast monitor run --source-type log +``` + +**Specific feature service:** + +```bash +feast monitor run --source-type log --feature-view driver_service +``` + +**Both batch and log in one run:** + +```bash +feast monitor run --source-type all +``` + +Log metrics are stored with `data_source_type="log"` alongside batch metrics in the same monitoring tables. Feature names from the log schema (e.g., `driver_stats__conv_rate`) are automatically normalized back to their original names (`conv_rate`) and associated with the correct feature view — enabling batch-vs-log comparison and drift detection. + +### Via REST API + +```bash +# Compute log metrics +POST /monitoring/compute/log +{ + "project": "my_project", + "feature_service_name": "driver_service", + "granularity": "daily" +} + +# Auto-compute all log metrics +POST /monitoring/auto_compute/log +{ + "project": "my_project" +} +``` + +## 5. Reading metrics via REST API + +All read endpoints support cascading filters: `project` → `feature_service_name` → `feature_view_name` → `feature_name` → `granularity` → `data_source_type`. + +### Per-feature metrics + +``` +GET /monitoring/metrics/features?project=my_project&feature_view_name=driver_stats&granularity=daily +``` + +**Response:** + +```json +[ + { + "project_id": "my_project", + "feature_view_name": "driver_stats", + "feature_name": "conv_rate", + "feature_type": "numeric", + "metric_date": "2025-03-26", + "granularity": "daily", + "data_source_type": "batch", + "row_count": 15000, + "null_count": 12, + "null_rate": 0.0008, + "mean": 0.523, + "stddev": 0.189, + "min_val": 0.001, + "max_val": 0.998, + "p50": 0.51, + "p75": 0.68, + "p90": 0.82, + "p95": 0.89, + "p99": 0.96, + "histogram": { + "bins": [0.0, 0.05, 0.1, "..."], + "counts": [120, 340, 560, "..."], + "bin_width": 0.05 + } + } +] +``` + +### Per-feature-view aggregates + +``` +GET /monitoring/metrics/feature_views?project=my_project&feature_view_name=driver_stats +``` + +### Per-feature-service aggregates + +``` +GET /monitoring/metrics/feature_services?project=my_project&feature_service_name=driver_service +``` + +### Baseline + +``` +GET /monitoring/metrics/baseline?project=my_project&feature_view_name=driver_stats +``` + +### Time-series (for trend charts) + +``` +GET /monitoring/metrics/timeseries?project=my_project&feature_name=conv_rate&granularity=daily&start_date=2025-01-01&end_date=2025-03-31 +``` + +### Filtering batch vs. log metrics + +Add `data_source_type=batch` or `data_source_type=log` to any read endpoint: + +``` +GET /monitoring/metrics/features?project=my_project&data_source_type=log +``` + +### Full endpoint reference + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/monitoring/compute` | Submit batch DQM job | +| `POST` | `/monitoring/auto_compute` | Auto-detect dates, all granularities | +| `POST` | `/monitoring/compute/transient` | On-demand compute (not stored) | +| `POST` | `/monitoring/compute/log` | Compute from serving logs | +| `POST` | `/monitoring/auto_compute/log` | Auto-detect log dates, all granularities | +| `GET` | `/monitoring/jobs/{job_id}` | DQM job status | +| `GET` | `/monitoring/metrics/features` | Per-feature metrics | +| `GET` | `/monitoring/metrics/feature_views` | Per-view aggregates | +| `GET` | `/monitoring/metrics/feature_services` | Per-service aggregates | +| `GET` | `/monitoring/metrics/baseline` | Baseline metrics | +| `GET` | `/monitoring/metrics/timeseries` | Time-series data | + +## 6. On-demand exploration + +When you need metrics for an arbitrary date range (e.g., "show me the distribution for Jan 5 to Jan 20"), use the transient compute endpoint. It reads source data for the exact range, computes fresh statistics, and returns them directly without storing. + +```bash +POST /monitoring/compute/transient +{ + "project": "my_project", + "feature_view_name": "driver_stats", + "feature_names": ["conv_rate"], + "start_date": "2025-01-05", + "end_date": "2025-01-20" +} +``` + +This is necessary because pre-computed histograms from different date ranges have different bin edges and cannot be merged losslessly. + +## 7. Integrating with orchestrators + +### Airflow + +```python +from airflow.operators.bash import BashOperator + +monitor_task = BashOperator( + task_id="feast_monitor", + bash_command="feast monitor run", + cwd="/path/to/feast/repo", +) +``` + +### Kubeflow Pipelines (KFP) + +```python +from kfp import dsl + +@dsl.component(base_image="feast-image:latest") +def monitor_features(): + import subprocess + subprocess.run(["feast", "monitor", "run"], check=True, cwd="/feast/repo") +``` + +### Cron + +```cron +# Daily at 2:00 AM UTC +0 2 * * * cd /path/to/feast/repo && feast monitor run >> /var/log/feast-monitor.log 2>&1 +``` + +### Monitoring both batch and log in one job + +```bash +feast monitor run --source-type all +``` + +## 8. Supported backends + +Monitoring works natively with all offline stores that serve as compute engines for Feast materialization: + +| Backend | Compute | Storage | +|---------|---------|---------| +| PostgreSQL | SQL push-down | `INSERT ON CONFLICT` | +| Snowflake | SQL push-down | `MERGE` with `VARIANT` JSON | +| BigQuery | SQL push-down | `MERGE` into BQ tables | +| Redshift | SQL push-down | `MERGE` via Data API | +| Spark | SparkSQL push-down | Parquet tables | +| Oracle | SQL via Ibis | `MERGE` from `DUAL` | +| DuckDB | In-memory SQL | Parquet files | +| Dask | PyArrow compute | Parquet files | + +Backends not listed above fall back to Python-based computation — the offline store's `pull_all_from_table_or_query()` returns a PyArrow Table, and metrics are computed using `pyarrow.compute` and `numpy`. + +## What metrics are computed + +**Per-feature (full profile):** + +| Metric | Numeric | Categorical | +|--------|:-------:|:-----------:| +| row_count, null_count, null_rate | Yes | Yes | +| mean, stddev, min, max | Yes | — | +| p50, p75, p90, p95, p99 | Yes | — | +| histogram (JSONB) | Binned distribution | Top-N values with counts | + +**Per-feature-view and per-feature-service (aggregate summaries):** + +| Metric | Description | +|--------|-------------| +| total_row_count | Total rows in the view | +| total_features | Number of features | +| features_with_nulls | Count of features with any nulls | +| avg_null_rate, max_null_rate | Aggregate null rate statistics | + +## RBAC + +Monitoring respects Feast's existing RBAC: + +- **Compute operations** (`POST /monitoring/compute`, `/auto_compute`, `/compute/log`, `/auto_compute/log`) require `AuthzedAction.UPDATE` +- **Transient compute** (`POST /monitoring/compute/transient`) requires `AuthzedAction.DESCRIBE` +- **Read operations** (`GET /monitoring/metrics/*`) require `AuthzedAction.DESCRIBE` diff --git a/docs/reference/feature-servers/python-feature-server.md b/docs/reference/feature-servers/python-feature-server.md index 654c4b9f938..4802599866d 100644 --- a/docs/reference/feature-servers/python-feature-server.md +++ b/docs/reference/feature-servers/python-feature-server.md @@ -352,11 +352,14 @@ feature_server: push: true # push request counters materialization: true # materialization counters & duration freshness: true # feature freshness gauges + offline_features: true # offline store retrieval counters & latency + audit_logging: false # structured JSON audit logs (see below) ``` Any category set to `false` will emit no metrics and start no background threads (e.g., setting `freshness: false` prevents the registry polling -thread from starting). All categories default to `true`. +thread from starting). All categories default to `true` except +`audit_logging`, which defaults to `false`. ### Available metrics @@ -375,6 +378,9 @@ thread from starting). All categories default to `true`. | `feast_materialization_result_total` | Counter | `feature_view`, `status` | `materialization` | Materialization runs (success/failure) | | `feast_materialization_duration_seconds` | Histogram | `feature_view` | `materialization` | Materialization duration per feature view | | `feast_feature_freshness_seconds` | Gauge | `feature_view`, `project` | `freshness` | Seconds since last materialization | +| `feast_offline_store_request_total` | Counter | `method`, `status` | `offline_features` | Total offline store retrieval requests | +| `feast_offline_store_request_latency_seconds` | Histogram | `method` | `offline_features` | Latency of offline store retrieval operations | +| `feast_offline_store_row_count` | Histogram | `method` | `offline_features` | Rows returned by offline store retrieval | ### Per-ODFV transformation metrics @@ -405,6 +411,70 @@ The `odfv_name` label lets you filter or group by individual ODFV, and the `mode` label (`python`, `pandas`, `substrait`) lets you compare transformation engines. +### Audit logging + +Feast can emit structured JSON audit log entries for every online and offline +feature retrieval. These are written via the standard `feast.audit` Python +logger, so you can route them to a dedicated file, SIEM, or log aggregator +independently of application logs. + +Audit logging is **disabled by default**. Enable it in `feature_store.yaml`: + +```yaml +feature_server: + type: local + metrics: + enabled: true + audit_logging: true +``` + +**Online audit log** (emitted per `/get-online-features` call): + +```json +{ + "event": "online_feature_request", + "timestamp": "2026-05-11T08:30:00.123456+00:00", + "requestor_id": "user@example.com", + "entity_keys": ["driver_id"], + "entity_count": 3, + "feature_views": ["driver_hourly_stats"], + "feature_count": 3, + "status": "success", + "latency_ms": 12.34 +} +``` + +**Offline audit log** (emitted per `RetrievalJob.to_arrow()` call): + +```json +{ + "event": "offline_feature_retrieval", + "timestamp": "2026-05-11T08:31:00.456789+00:00", + "method": "to_arrow", + "start_time": "2026-05-11T08:30:59.226789+00:00", + "end_time": "2026-05-11T08:31:00.456789+00:00", + "feature_views": ["driver_hourly_stats"], + "feature_count": 3, + "row_count": 500, + "status": "success", + "duration_ms": 1230.0 +} +``` + +The `requestor_id` field in online audit logs is populated from the +security manager's current user when authentication is configured, and +falls back to `"anonymous"` otherwise. + +To route audit logs to a separate file: + +```python +import logging + +handler = logging.FileHandler("/var/log/feast/audit.log") +handler.setFormatter(logging.Formatter("%(message)s")) +logging.getLogger("feast.audit").addHandler(handler) +``` + ### Scraping with Prometheus ```yaml diff --git a/examples/monitoring/monitoring-quickstart.ipynb b/examples/monitoring/monitoring-quickstart.ipynb new file mode 100644 index 00000000000..77101ffff51 --- /dev/null +++ b/examples/monitoring/monitoring-quickstart.ipynb @@ -0,0 +1,1256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature Quality Monitoring — Quickstart\n", + "\n", + "This notebook walks you through Feast's data quality monitoring end-to-end:\n", + "\n", + "1. Set up a feature store with a PostgreSQL offline store\n", + "2. Register features and trigger baseline computation\n", + "3. Compute metrics across multiple granularities\n", + "4. Read metrics via the Python SDK and REST API\n", + "5. Set up serving log monitoring\n", + "6. Use on-demand exploration for custom date ranges\n", + "\n", + "**Prerequisites:** A running PostgreSQL instance and `feast[postgres]` installed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Install Feast" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "!uv pip install -q 'feast[postgres]'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Configure the Feature Store\n", + "\n", + "Create a minimal `feature_store.yaml` with a PostgreSQL offline store." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Working directory: /var/folders/cn/z7vz24yj25d8fjqdrs9jbsh00000gn/T/feast_monitoring_demo_kze7m3sk\n" + ] + } + ], + "source": [ + "import os\n", + "import tempfile\n", + "\n", + "REPO_DIR = tempfile.mkdtemp(prefix=\"feast_monitoring_demo_\")\n", + "os.makedirs(REPO_DIR, exist_ok=True)\n", + "print(f\"Working directory: {REPO_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "feature_store.yaml written.\n" + ] + } + ], + "source": [ + "# Adjust these to match your PostgreSQL instance\n", + "PG_HOST = os.environ.get(\"FEAST_PG_HOST\", \"localhost\")\n", + "PG_PORT = os.environ.get(\"FEAST_PG_PORT\", \"5432\")\n", + "PG_DB = os.environ.get(\"FEAST_PG_DB\", \"feast\")\n", + "PG_USER = os.environ.get(\"FEAST_PG_USER\", \"feast\")\n", + "PG_PASS = os.environ.get(\"FEAST_PG_PASS\", \"feast\")\n", + "\n", + "PG_SSLMODE = os.environ.get(\"FEAST_PG_SSLMODE\", \"disable\")\n", + "\n", + "feature_store_yaml = f\"\"\"\n", + "project: monitoring_demo\n", + "registry:\n", + " registry_type: sql\n", + " path: postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}?sslmode={PG_SSLMODE}\n", + "provider: local\n", + "offline_store:\n", + " type: postgres\n", + " host: {PG_HOST}\n", + " port: {PG_PORT}\n", + " database: {PG_DB}\n", + " user: {PG_USER}\n", + " password: {PG_PASS}\n", + " sslmode: {PG_SSLMODE}\n", + "online_store:\n", + " type: sqlite\n", + " path: {REPO_DIR}/online_store.db\n", + "entity_key_serialization_version: 3\n", + "\"\"\"\n", + "\n", + "with open(os.path.join(REPO_DIR, \"feature_store.yaml\"), \"w\") as f:\n", + " f.write(feature_store_yaml)\n", + "\n", + "print(\"feature_store.yaml written.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Create Sample Data and Feature Definitions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample data: 5000 rows, 60 days\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
driver_idevent_timestampconv_rateacc_rateavg_daily_tripsvehicle_typecreated
011482025-02-080.3483070.79439014compact2025-02-08
115392025-02-210.3059450.74904625van2025-02-21
214872025-01-290.7916410.78449217sedan2025-01-29
318212025-01-150.2673080.72622617sedan2025-01-15
414372025-02-120.5446180.72956811suv2025-02-12
\n", + "
" + ], + "text/plain": [ + " driver_id event_timestamp conv_rate acc_rate avg_daily_trips \\\n", + "0 1148 2025-02-08 0.348307 0.794390 14 \n", + "1 1539 2025-02-21 0.305945 0.749046 25 \n", + "2 1487 2025-01-29 0.791641 0.784492 17 \n", + "3 1821 2025-01-15 0.267308 0.726226 17 \n", + "4 1437 2025-02-12 0.544618 0.729568 11 \n", + "\n", + " vehicle_type created \n", + "0 compact 2025-02-08 \n", + "1 van 2025-02-21 \n", + "2 sedan 2025-01-29 \n", + "3 sedan 2025-01-15 \n", + "4 suv 2025-02-12 " + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from datetime import datetime, timedelta\n", + "\n", + "np.random.seed(42)\n", + "\n", + "N_ROWS = 5000\n", + "N_DAYS = 60\n", + "\n", + "base_date = datetime(2025, 1, 1)\n", + "timestamps = [base_date + timedelta(days=int(d)) for d in np.random.randint(0, N_DAYS, N_ROWS)]\n", + "\n", + "df = pd.DataFrame({\n", + " \"driver_id\": np.random.randint(1000, 2000, N_ROWS),\n", + " \"event_timestamp\": timestamps,\n", + " \"conv_rate\": np.clip(np.random.normal(0.5, 0.2, N_ROWS), 0, 1),\n", + " \"acc_rate\": np.clip(np.random.normal(0.7, 0.15, N_ROWS), 0, 1),\n", + " \"avg_daily_trips\": np.random.poisson(20, N_ROWS).astype(\"int32\"),\n", + " \"vehicle_type\": np.random.choice([\"sedan\", \"suv\", \"truck\", \"van\", \"compact\"], N_ROWS),\n", + " \"created\": timestamps,\n", + "})\n", + "\n", + "print(f\"Sample data: {len(df)} rows, {N_DAYS} days\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "!uv pip install -q 'psycopg2'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded sample data into PostgreSQL table 'driver_stats_source'.\n" + ] + } + ], + "source": [ + "# Load sample data into PostgreSQL'\n", + "from sqlalchemy import create_engine\n", + "\n", + "engine = create_engine(f\"postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}\")\n", + "df.to_sql(\"driver_stats_source\", engine, if_exists=\"replace\", index=False)\n", + "print(\"Loaded sample data into PostgreSQL table 'driver_stats_source'.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature definitions written.\n" + ] + } + ], + "source": [ + "# Write feature definitions\n", + "definitions = '''\n", + "from datetime import timedelta\n", + "from feast import Entity, FeatureView, FeatureService, Field\n", + "from feast.types import Float32, Int32, String\n", + "from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (\n", + " PostgreSQLSource,\n", + ")\n", + "\n", + "driver = Entity(name=\"driver\", join_keys=[\"driver_id\"])\n", + "\n", + "driver_stats_source = PostgreSQLSource(\n", + " name=\"driver_stats_source\",\n", + " query=\"SELECT * FROM driver_stats_source\",\n", + " timestamp_field=\"event_timestamp\",\n", + " created_timestamp_column=\"created\",\n", + ")\n", + "\n", + "driver_stats_fv = FeatureView(\n", + " name=\"driver_stats\",\n", + " entities=[driver],\n", + " ttl=timedelta(days=365),\n", + " schema=[\n", + " Field(name=\"conv_rate\", dtype=Float32),\n", + " Field(name=\"acc_rate\", dtype=Float32),\n", + " Field(name=\"avg_daily_trips\", dtype=Int32),\n", + " Field(name=\"vehicle_type\", dtype=String),\n", + " ],\n", + " source=driver_stats_source,\n", + ")\n", + "\n", + "driver_service = FeatureService(\n", + " name=\"driver_service\",\n", + " features=[driver_stats_fv],\n", + ")\n", + "'''\n", + "\n", + "with open(os.path.join(REPO_DIR, \"definitions.py\"), \"w\") as f:\n", + " f.write(definitions)\n", + "\n", + "print(\"Feature definitions written.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Apply — Registers Features & Triggers Baseline\n", + "\n", + "Running `feast apply` registers the feature definitions and automatically queues baseline metric computation." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/var/folders/cn/z7vz24yj25d8fjqdrs9jbsh00000gn/T/feast_monitoring_demo_kze7m3sk/definitions.py:9: DeprecationWarning: Entity value_type will be mandatory in the next release. Please specify a value_type for entity 'driver'.\n", + " driver = Entity(name=\"driver\", join_keys=[\"driver_id\"])\n", + "The `path` of the `RegistryConfig` starts with a plain `postgresql` string. We are updating this to `postgresql+psycopg` to ensure that the `psycopg3` driver is used by `sqlalchemy`. If you want to use `psycopg2` pass `postgresql+psycopg2` explicitely to `path`. To silence this warning, pass `postgresql+psycopg` explicitely to `path`.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Features registered. Baseline computation queued.\n" + ] + } + ], + "source": [ + "import sys\n", + "from feast import FeatureStore\n", + "\n", + "sys.path.insert(0, REPO_DIR)\n", + "from definitions import driver, driver_stats_source, driver_stats_fv, driver_service\n", + "\n", + "store = FeatureStore(repo_path=REPO_DIR)\n", + "store.apply([driver, driver_stats_source, driver_stats_fv, driver_service])\n", + "print(\"Features registered. Baseline computation queued.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Compute Batch Metrics\n", + "\n", + "### 5a. Auto-compute (recommended for production)\n", + "\n", + "Auto-compute detects the latest event timestamp and generates metrics for all 5 granularities: `daily`, `weekly`, `biweekly`, `monthly`, and `quarterly`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computed metrics for 20 features\n", + "Granularities: ['biweekly', 'daily', 'monthly', 'quarterly', 'weekly']\n" + ] + } + ], + "source": [ + "from feast.monitoring.monitoring_service import MonitoringService\n", + "\n", + "monitoring = MonitoringService(store)\n", + "\n", + "result = monitoring.auto_compute(\n", + " project=\"monitoring_demo\",\n", + ")\n", + "\n", + "print(f\"Computed metrics for {result.get('computed_features', 'N/A')} features\")\n", + "print(f\"Granularities: {result.get('granularities', [])}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5b. Targeted compute (specific date range)\n", + "\n", + "Compute `weekly` metrics for a specific window." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'status': 'completed', 'granularity': 'weekly', 'computed_features': 4, 'computed_feature_views': 1, 'computed_feature_services': 1, 'metric_dates': ['2025-01-01'], 'duration_ms': 43}\n" + ] + } + ], + "source": [ + "from datetime import date\n", + "\n", + "result = monitoring.compute_metrics(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " start_date=date(2025, 1, 1),\n", + " end_date=date(2025, 1, 7),\n", + " granularity=\"weekly\",\n", + ")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5c. Set a manual baseline\n", + "\n", + "Use `set_baseline=True` to mark the computed metrics as the reference distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Baseline set.\n" + ] + } + ], + "source": [ + "result = monitoring.compute_metrics(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " start_date=date(2025, 1, 1),\n", + " end_date=date(2025, 2, 28),\n", + " granularity=\"daily\",\n", + " set_baseline=True,\n", + ")\n", + "\n", + "print(\"Baseline set.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Read Metrics\n", + "\n", + "### Per-feature metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date: 2025-01-01 Mean: 0.4989 Null rate: 0.0000 Rows: 4922\n", + "Date: 2025-02-28 Mean: 0.5201 Null rate: 0.0000 Rows: 104\n" + ] + } + ], + "source": [ + "metrics = monitoring.get_feature_metrics(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " feature_name=\"conv_rate\",\n", + " data_source_type=\"batch\",\n", + " granularity=\"daily\",\n", + ")\n", + "\n", + "for m in metrics[:3]:\n", + " print(f\"Date: {m['metric_date']} Mean: {m['mean']:.4f} Null rate: {m['null_rate']:.4f} Rows: {m['row_count']}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Categorical feature metrics\n", + "\n", + "Categorical features (like `vehicle_type`) produce value-count histograms instead of numeric statistics." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date: 2025-01-01 Type: categorical Rows: 4922 Null rate: 0.0000\n", + " Unique values: 5 Other count: 0\n", + " van: 1051\n", + " suv: 1028\n", + " sedan: 970\n", + " truck: 954\n", + " compact: 919\n", + "Date: 2025-02-28 Type: categorical Rows: 104 Null rate: 0.0000\n", + " Unique values: 5 Other count: 0\n", + " compact: 26\n", + " truck: 24\n", + " sedan: 19\n", + " van: 18\n", + " suv: 17\n" + ] + } + ], + "source": [ + "cat_metrics = monitoring.get_feature_metrics(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " feature_name=\"vehicle_type\",\n", + " data_source_type=\"batch\",\n", + " granularity=\"daily\",\n", + ")\n", + "\n", + "for m in cat_metrics[:3]:\n", + " print(f\"Date: {m['metric_date']} Type: {m['feature_type']} \"\n", + " f\"Rows: {m['row_count']} Null rate: {m['null_rate']:.4f}\")\n", + " if m.get(\"histogram\"):\n", + " hist = m[\"histogram\"]\n", + " print(f\" Unique values: {hist['unique_count']} Other count: {hist['other_count']}\")\n", + " for entry in hist[\"values\"]:\n", + " print(f\" {entry['value']}: {entry['count']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANlNJREFUeJzt3QmcjeX///HPjDFjmca+G7ufnYTKUrLvhUhSiEqRkqTUD1HiS5SQoujrG/mWolWSXWTJvmdrpiKyjK0sM/f/8bl+//t0zmxmXMOZmfN6Ph7HmPvc576v+77PzFzv+1pOkOM4jgAAAACAhWCbFwMAAACAIlgAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gA8IsPPvhAgoKCZOPGjVdd96677jKP1NLtv/zyy9dYwsxNz4ueH2+lSpWSnj17Xvd9Hz582Oxb3wMu3W94eLjcKLw3ACDtESwA4Bq8/fbbPhXjQPXNN9+k2wp6ei5bWvvss8+kS5cuUqZMGcmRI4dUqFBBnn32WTl9+nSi63/xxRdyyy23SLZs2aREiRIyfPhwuXLlis86S5YskV69esn//M//mG3qth955BE5cuRIgu1p8NewFv/RsmXLFB+DlvWxxx6TAgUKSM6cOaVRo0ayadOmBOv997//lQcffFDKly9v9nEtNx1u5L6AQBLi7wIAwNV89913kh6DRf78+W/IHf4bZe/evRIcHJzqyvuUKVNSVYEvWbKk/PXXX5I1a9ZrKGXalE33HxKSef4EaiW5aNGiphKsQWH79u0yefJkcw60wpw9e3bPugsXLpT27dubSvKkSZPMuq+++qocO3ZMpk6d6lnv+eefl5MnT0rnzp1NxfrgwYNmm1999ZVs2bJFChcu7FOG4sWLy+jRo32WaZlSIi4uTtq0aSNbt26V5557zvxs6c+YlvGnn34y+3dpGXVZnTp15MSJE6k+VzdyX0CgyTy/VQFkWqGhof4uQkAICwu7rtvXO+JaqdPrqXfK/cnf+09r8+bNS3A3vVatWtKjRw+ZPXu2aWlwDRo0SKpXr24CuxuuIiIi5LXXXpOnn35aKlasaJZNmDBBGjRo4BM2tQWiYcOGJmBoGPGWK1cuE2yutfxr1qyRTz75RDp16mSW3Xfffaa1RFtT5syZ41n3P//5jxQrVsyUq2rVqul6X0CgoSsUgBT9IdZuACtWrEjw3Lvvvmue27Fjh2fZnj17zB/svHnzmgpc7dq1TdeLxFy8eFEGDhzo6ZLQoUMHOX78+FXHWPz999/mTrRWBnQfRYoUkY4dO8qBAweSPZbffvvNdO8oVKiQqUhXqVJFZsyYkarzoWMRdu7cac6H2+VDy6d3dPX/b7zxRoLXaEVGn/voo498xjjoudJKjVbs8uXLZyp2emzxffjhh6aiqHee9bzef//9Eh0dnaLyrl692txx1fNUtmxZc82SOi7vFpjLly/LiBEjzB1cfa2WTyuaixcvNs/rutoioLy7v3iPo3j99dflzTffNPvV871r165Ex1i49By2aNHCvBf0bvfIkSPFcRzP88uXLzev1a/e4m8zubK5y+K3ZGzevFlatWplroWO92jSpIn8+OOPiY4N+uGHH676vr2REuuio2VSu3fv9izT868PbeHwbrHp27evOc/6s+668847E7Rg6TJ9/3lvM354PHfuXKrLr/vVn0n9GXbpudWfjc8//9z8nnBFRkamumXNX/sCAg0tFgCuSrsNaEXr448/Nncr4/dB1sq5ezdPK9z169c3d/leeOEFU+nS12nXi08//dRT2XH1799f8uTJY+4UauVQK6FPPvmk2W5SYmNjpW3btqYPuFawtTJ+9uxZU+HVgKOV2MT88ccfcvvtt5uKoe5DKxPaLaR3795y5swZGTBgQIrOh5ZRy63n5KWXXjLLtKKifdD12PUO8TPPPOPzGl120003yT333OOzXCszWqHXLiRaiX3rrbfk1KlTMmvWLM86o0aNkqFDh5p19c6zVmC1C4tW8rQynDt37iTLqt1cmjdvbo5VK9Ja8dNzreW9Gl1fy6X7vPXWW8050sH22rWmWbNm0qdPH/n999/Nedc7u4mZOXOmCUpakdVgoZVSbbVI6rrqHXG9RmPHjpVvv/3W0/dfA0ZqpKRs3vR9e8cdd5hQMXjwYNNNSwOYVtg1QN52223W79sb7ejRo+ardvVx6ftFadj3piFOuzK5zydFQ4M+vLfp2rdvn/l5v3Tpknl/PfroozJs2LAUdXnT/eqYj/iVeH3fTZs2zWy7WrVqkhZu5L6AgOMAQAp07drVKViwoHPlyhXPsiNHjjjBwcHOyJEjPcuaNGniVKtWzfn77789y+Li4px69eo55cuX9yybOXOm3oZ2mjZtap53PfPMM06WLFmc06dPe5Y1bNjQPFwzZswwr50wYUKCcnpvS9cZPny45/vevXs7RYoUcf7880+f19x///1Orly5nAsXLqT4fFSpUsWnTK53333X7Hf37t2eZZcuXXLy58/v9OjRw7NMy6Xr3X333T6v79u3r1m+detW8/3hw4fN+Rg1apTPetu3b3dCQkISLI+vffv2TrZs2ZxffvnFs2zXrl1mm/H/BJQsWdKnjDVq1HDatGmT7Pb79euXYDvq0KFDZnlERIRz7NixRJ/T94BL96vL+vfv73Mtdf+hoaHO8ePHzbJly5aZ9fTr1baZVNkSe2/oedL9HDhwwLPs999/d2666SbnzjvvvKb3rb/p+13LtG/fPs+ycePGmfJHRUUlWL9OnTrO7bffnuw2X3nlFfP6JUuW+Czv1auX8/LLLzuffvqpM2vWLPO+1vXuu+++FJU1Z86cZhvxff3112Y73377bap+DtPLvoBAQ/segBTRGWd0cKd3FxTtUqB3n/U5pQM9ly5dau6sawvCn3/+aR466FG7t/z888+mK5I3vZPt3UVF7xrrnetffvklybJoy4feMdW7xvHFn0LVpXVJfV27du3M/92y6UPLFhMTk+isMKmlx67dhrSFwrVo0SKzn8T6n/fr18/ne/eYdNCtO9uPnmPdrneZdeCsdlFatmxZkmXR86j71tYiHdDrqlSpkjnmq9GWEL2Tr9ftWt17772mtSSl9K6/y21Z0jvg33//vVwvep50vIGeJ211cmn3ugceeMB0JdPWGtv37Y2k4wTef/99MzOU92BkHbSe1Hgafd+6zydm5cqVpmucvhcbN27s85zuS1tvtHvRQw89ZLoUaYuFtlbG706WGN1vUmXyLndauJH7AgINXaEApIh2UdHBmdrVQ/ueK/3/zTffbMY5qP3795tKu3bb0UdiNJxoNymXd4VXafcSpd2BkqLjKHQ6zdTM6qPdh3SKSe3qoI+kymZLK+MaXrRi98orr5hlGjL0mONXxpR3pU9pNy7toqHda5RW6vWcxl/PlVw3Ez1mrSQl9lo9f254SYp2P9KuW3p9taubvge00qgDf1OqdOnSKV5Xj9u7Yq/c95Z7Pq4HPU8XLlww5yQ+DWEa7HQ8i3b5s3nf6rXQAHstdGyN/vylxKpVq0z3Pg2P2o0u/naU9zgCl3ZZ8549ypuOBdJujPo+eO+991JUDg0106dPN6FQu7dpQNSbD940dGbJksXsN6kyeZc7pW7kvgD8g2ABIEX0Dp/e0Z0/f76ZmlHHK+gAVp1JxuX2nddZZ5K6I16uXDmf7/UPfWK8B+ymBbds2mqgM+UkJjUV5uR0797dzDijA7a1r7YOXNfBsSkZBBq/xUXLrct0LEhi5+p6fqicjuHQEKd3n/WOvlYodWD6O++84zPLUHLSupKWVIuUthbcSNfyvtUg/vDDD1/T/vQ9m5LPTdEpVO+++24TALRFMX741lYYpZ9FoQOTvekyHWcQn4YqHaejwUbDqI4VSgl3+24FX38e9PMivB06dMiMMdJyJfb5GO6ylE5b67qR+wLwD4IFgBTTLk///ve/zaBpnRVGK1FuNyjl3m3Wu+hNmza9buXQu/rr1q0zsxal9LMQ9G6lVoi0ApoWZUuqgqv0zr7uT1sqdNCv3g3XO/2J0RYJ77v62uqjYUIrQO6x6nnWddy79ymlZdCKfWJdmfQzK1JCB1trZVgfOmhXw4YO6naDRXLnIbX0uHVWKO/j1IG0yj0fbstA/A9+S6wLUkrLpudJPwAusXOid+o1EMavhF8LDdvujFqplZLKroZAfe8VLFjQBIDEQqe2MCodhO8dInSg+6+//mq6eHnTbowaKvQOv/7cu8EkJfRaKrcrXI0aNRIcv/tZGFoubWnR94B3ANefc702qX3v38h9AfgHwQJAimmFXCuaeudVg4VWTLwrxVqh0Vl0dDYdHSsQvxKiXU5S098+uX77X3/9tZlLP/7sS1oJT6xCqXeY9XXaRUlnjoo/J31qy6az3yT1qcZ6l7hr165mX3qetNUiqdYQnRJVK24une1J6bSnSvusDxkyxPRt1ylnvY9Nj1XvBus0sInRY9bK7IIFCyQqKsrTfUfLpGMvrkYrld7b1oqqtjh5T3Or50HpuUhudqqU0muqM2O5x6ffa3h0u9/ph+vpcWl/f21Bc2krWnwpLZtuT6+Btsxolys3xGirnF5DnWJXZ4uypT8PqamYp3YGKD0GrSjrtU3qvazdufRzKrQ7oM6c5ba86AfB6XvL/VwHdf78eWndurUZF6VjeZLqjqfjT7RF03vcgl4793Mu3NZLDYVJhXrdr7aw6Jgitww6lkhb/rRrYWo/Y+VG7gvAPwgWAFJMK3ha0Z07d66pdOhnFCRWUdaKmFamdfCmtmJoBW3t2rXmjqh21UiLrkY6Hat+jsD69evNwFktj/bl1i5H8ad0dY0ZM8ZUkLQVQctWuXJlUzHXQdv62vh9spOjnymhlTGtPGllW0OV9xgKLaNWkHV///rXv5LcjnbP0K4reqdZz5GGBx0wrHdc3RYL3YeGC630amVaW170ddotTe8wa9ezpGgg0Wlb9RzpudGpWzW8aAVz27ZtyR6jnh8NinqsGij1LrdWyLwHWOtz6qmnnjIVSK2o6hTA10IHz2pZtduPXiPt/qUB8sUXX/RUlLU7jn4StB6DVoT1/OgnQSc2PiY1ZdNzrHe49b2r50nDoQZkvVOvU9+md/r+0RYCnSpXB5vrw6VTv+r0wK5x48aZ95wGET0fGrQ1wGkrlI4pcXXr1s38fOnnvmgY9f7sCg2ZbrDTnx8N0vrQnwUdS6LvTe0qqe9Pndr1arSCr+MwtGVMP2fD/TRsbWHU97A3DZX6cG8I6M++G2K0RU0f6WVfQMDx97RUADKWxYsXmykZg4KCnOjo6ETX0Sk7u3fv7hQuXNjJmjWrU6xYMadt27bOvHnzEkzbuWHDBp/XJjadaPzpZpVODfvSSy85pUuXNvvQfXXq1MlnutD4U4qqP/74w0xDGhkZ6XmdTpE7bdq0VJ2Ho0ePmqlQdTpS3U9i01Dq9JQ6He+vv/6a4Dl3ulmd+lXLrdvJkyeP8+STTzp//fVXgvV1Gs8GDRqYqTL1UbFiRXMce/fuvWpZV6xY4dSqVctMp1qmTBnnnXfe8ew/uelmX331VefWW291cufO7WTPnt3sU6e31elzXTr9sE4RW6BAAfOecLfpTv+q05vGl9R0s3pcev2aN2/u5MiRwylUqJApZ2xsrM/rderZe++916yj56xPnz7Ojh07EmwzqbIl9d7YtGmT06JFCyc8PNxsu1GjRs6aNWt81knN+/ZG0n0n9UjsvTl//nzn5ptvdsLCwpzixYs7//u//+tzXd33Q1Lb1OdcBw8edDp37uyUKlXKTG2s507fb/o+856S92pOnjxppsjNly+f2YaWO/55Vu57N7FH/GuaHvYFBJIg/cff4QYAMqOaNWuaO/3aNz0+Haegd0f1LmhiHzYGAEBGw+dYAMB1oN2GtmzZYrpEAQAQCBhjAQBetAUhualLQ0NDTStEUrS/+k8//STjx483A3W9Z80CACAzI1gAgJc6deok++nJDRs29Pn08fh0cLN+sJx+2NpHH33k+TRfAAAyO8ZYAIAXnclGZ7VJbhpLd7YhAADwD4IFAAAAAGsM3gYAAABgjTEWFuLi4uT33383H1aV2Cf9AgAAABmZdm46e/asFC1aVIKDk2+TIFhY0FARGRnp72IAAAAA11V0dLQUL1482XUIFha0pcI90REREf4uDgAAAJCmzpw5Y26ku/Xe5BAsLLjdnzRUECwAAACQWaWk2z+DtwEAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAIA0Exvn+LsI8JMQf+04M5nw5VY5fsHfpQAAAPCvyPzh8kKHmv4uBvyEYJEGfjtxXqJiYv1dDAAAAMBv6AoFAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWDx/wUFBcmCBQv8XQwAAAAgQyJYAAAAALjxwSIuLk7Gjh0r5cqVk7CwMClRooSMGjXKPLd9+3Zp3LixZM+eXfLlyyePPfaYnDt3zvPanj17Svv27eW1116TQoUKSe7cuWXkyJFy5coVee655yRv3rxSvHhxmTlzpuc1hw8fNq0Jc+fOlXr16km2bNmkatWqsmLFCs86sbGx0rt3byldurTZd4UKFWTixIkJyj5jxgypUqWKKXeRIkXkySefNMtLlSplvnbo0MHsy/0eAAAAwHUKFkOGDJExY8bI0KFDZdeuXTJnzhwTEs6fPy8tWrSQPHnyyIYNG+STTz6R77//3lN5dy1dulR+//13WblypUyYMEGGDx8ubdu2Na9bt26dPP7449KnTx/59ddffV6nwePZZ5+VzZs3S926daVdu3Zy4sQJT9jRQKL71DINGzZMXnzxRfn44489r586dar069fPhB0NQF988YUJR0rLqzTQHDlyxPN9fBcvXpQzZ874PAAAAACIBDmO46R05bNnz0qBAgVk8uTJ8sgjj/g8N336dHn++eclOjpacubMaZZ98803JgBokNDwoS0Wy5cvl4MHD0pw8P9lmooVK0rBggVN0HBbH3LlyiXvvfee3H///abFQlsiNMzo9pW2cOiy/v37y+DBgxMtqwaao0ePyrx588z3xYoVk4cfflheffXVxE9EUJDMnz/ftKgk5eWXX5YRI0YkWP7IxIUSFRObwrMIAACQOZUrHCFTHr3D38VAGtIb6Vo3j4mJkYiIiLRrsdi9e7e5a9+kSZNEn6tRo4YnVKj69eub1oS9e/d6lmlXJDdUKA0c1apV83yfJUsW043q2LFjPtvXVgpXSEiI1K5d2+zTNWXKFKlVq5YJPuHh4TJt2jSJiooyz+m2NNwkVu7UttboSXUfGqIAAAAAiISkZmUdv2Ara9asCVoKElumgSSldPzFoEGDZPz48SaA3HTTTTJu3DjTtSqtyq10bIY+AAAAAFi0WJQvX95U0pcsWZLguUqVKsnWrVvNWAvXDz/8YFondDC1rR9//NHzf+0K9dNPP5l9uvvRgd19+/aVmjVrmrETBw4c8KyvQUMHZCdWbpeGG+2GBQAAAOA6BwudkUnHOei4hlmzZpnKu1b433//fenWrZt5vkePHrJjxw5ZtmyZGQPx0EMPme5OtrSrk46B2LNnjxmEferUKenVq5cn8GzcuFEWLVok+/btMwPL4w/A1vER2qLx1ltvyc8//yybNm2SSZMmeZ53g4eOy9BtAwAAALiOs0JppV1nZ9KZl7TFoEuXLmYMQ44cOUzF/uTJk1KnTh3p1KmTGdOgA73Tgg7e1oeO41i9erWZ1Sl//vzmOZ1FqmPHjqYst912m5ktSlsvvGngefPNN+Xtt9824zx0JioNGC4NHYsXL5bIyEjT6gEAAADgOs0K5Q/urFA6zezNN98s6XGUPLNCAQAAMCtUZnTdZoUCAAAAgMQQLAAAAADc2Olm/UEHVafz3loAAABAwKPFAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACshdhvAsXy5ZTQ7P4uBQAAgH9F5g/3dxHgRwSLNDCwXQ2JiIjwdzEAAAD8LjbOkSzBQf4uBvyArlAAAABIM4SKwEWwAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAA6V5snOPvIuAqQq62Aq5uwpdb5fgFf5cCAAAgc4rMHy4vdKjp72LgKggWaeC3E+clKibW38UAAAAA/IauUAAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIC1gA0Whw8flqCgINmyZYu/iwIAAABkeOkuWNx1110yYMAAfxcDAAAAQEYOFlfjOI5cuXLF38UAAAAAkF6DRc+ePWXFihUyceJE001JHx988IH5unDhQqlVq5aEhYXJ6tWrzbrt27f3eb22dGiLhysuLk7Gjh0r5cqVM68rUaKEjBo1KtF9x8bGSq9evaRixYoSFRV13Y8VAAAAyExCJB3RQLFv3z6pWrWqjBw50izbuXOn+frCCy/I66+/LmXKlJE8efKkaHtDhgyR6dOnyxtvvCENGjSQI0eOyJ49exKsd/HiRenatasZd7Fq1SopUKBAGh8ZAAAAkLmlq2CRK1cuCQ0NlRw5ckjhwoXNMjcIaNBo1qxZird19uxZE1QmT54sPXr0MMvKli1rAoa3c+fOSZs2bUy4WLZsmSlDUnQdfbjOnDmT6mMEAAAAMqN01RUqObVr107V+rt37zYhoEmTJsmupy0V58+fl++++y7ZUKFGjx5t1nEfkZGRqSoTAAAAkFllmGCRM2dOn++Dg4PNQG5vly9f9vw/e/bsKdpu69atZdu2bbJ27doUda2KiYnxPKKjo1NcfgAAACAzS3fBQrtC6UDqq9FxEDpmwpv3Z1KUL1/ehIslS5Yku50nnnhCxowZI3fffbcZOJ4cHQAeERHh8wAAAACQzsZYqFKlSsm6devMQOrw8HAzs1NiGjduLOPGjZNZs2ZJ3bp15cMPP5QdO3ZIzZo1zfPZsmWT559/XgYPHmzCSv369eX48eNmMHjv3r19ttW/f38TZtq2bWtmn4o/DgMAAABABmuxGDRokGTJkkUqV65sWiWSmvq1RYsWMnToUBMc6tSpYwZrd+/e3Wcdff7ZZ5+VYcOGSaVKlaRLly5y7NixRLenU9WOGDHCdI1as2bNdTk2AAAAILMKcuIPVECK6axQOoj7kYkLJSrm6t23AAAAkHrlCkfIlEfv8HcxArq+GxMTc9VhAOmuxQIAAABAxkOwAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYC7HfBIrlyymh2f1dCgAAgMwpMn+4v4uAFCBYpIGB7WpIRESEv4sBAACQacXGOZIlOMjfxUAy6AoFAACAdI9Qkf4RLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAg04iNc/xdhIAV4u8CZAYTvtwqxy/4uxQAAACBLTJ/uLzQoaa/ixGwCBZp4LcT5yUqJtbfxQAAAAD8hq5QAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrmSJYHD58WIKCgmTLli3+LgoAAAAQkDJFsAAAAADgXwQLAAAAABk3WMybN0+qVasm2bNnl3z58knTpk3l/Pnz5rn33ntPKlWqJNmyZZOKFSvK22+/7fPa9evXS82aNc3ztWvXls2bN/s8HxsbK71795bSpUub7VeoUEEmTpzos07Pnj2lffv28vrrr0uRIkVMGfr16yeXL1++AUcPAAAAZC4h/tjpkSNHpGvXrjJ27Fjp0KGDnD17VlatWiWO48js2bNl2LBhMnnyZBMeNDQ8+uijkjNnTunRo4ecO3dO2rZtK82aNZMPP/xQDh06JE8//bTP9uPi4qR48eLyySefmMCwZs0aeeyxx0yAuO+++zzrLVu2zCzTr/v375cuXbrIzTffbPYHAAAAIAMEiytXrkjHjh2lZMmSZpm2Xqjhw4fL+PHjzXNKWx127dol7777rgkWc+bMMcHh/fffNy0WVapUkV9//VWeeOIJz/azZs0qI0aM8Hyv21i7dq18/PHHPsEiT548JsBkyZLFtIy0adNGlixZkmSwuHjxonm4zpw5cx3ODgAAAJDx+CVY1KhRQ5o0aWLCRIsWLaR58+bSqVMnCQ0NlQMHDphuTN6Vew0huXLlMv/fvXu3VK9e3YQKV926dRPsY8qUKTJjxgyJioqSv/76Sy5dumRaI7xpKNFQ4dLWi+3btydZ7tGjR/sEFgAAAAB+HGOhlfnFixfLwoULpXLlyjJp0iQzDmLHjh3m+enTp5upY92HLv/xxx9TvP25c+fKoEGDTED57rvvzDYefvhhEy68acuGN52yVltDkjJkyBCJiYnxPKKjo1N97AAAAEBm5JcWC7cSX79+ffPQMRXaJeqHH36QokWLysGDB6Vbt26Jvk4Hdf/nP/+Rv//+29NqET906Hbq1asnffv29SzTlhBbYWFh5gEAAAAgHQSLdevWmbEM2gWqYMGC5vvjx4+b0KBdjZ566inT9ally5ZmTMPGjRvl1KlTMnDgQHnggQfkpZdeMl2ltAVBPxxPZ3byVr58eZk1a5YsWrTIjK/QILJhwwbzfwAAAACZJFhERETIypUr5c033zQDoLW1Qgdst2rVyjyfI0cOGTdunDz33HNmNigdizFgwADzXHh4uHz55Zfy+OOPm1mjtCvVv/71L7n33ns92+/Tp4+ZTUpnedKWEZ2BSlsvtOsVAAAAgLQX5Ogcr7gmGoq0ZeWRiQslKibW38UBAAAIaOUKR8iUR+/wdzEyZX1Xxxdr40By+ORtAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAayH2m0CxfDklNLu/SwEAABDYIvOH+7sIAY1gkQYGtqshERER/i4GAABAwIuNcyRLcJC/ixGQ6AoFAACATINQ4T8ECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAADAdRQb50ggCPF3ATKDCV9uleMX/F0KAAAApDeR+cPlhQ41JRAQLNLAbyfOS1RMrL+LAQAAAPgNXaEAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALCW4YPFvHnzpFq1apI9e3bJly+fNG3aVM6fPy933XWXDBgwwGfd9u3bS8+ePc3/X3zxRbntttsSbK9GjRoycuTIG1Z+AAAAIDPI0MHiyJEj0rVrV+nVq5fs3r1bli9fLh07dhTHca762m7dusn69evlwIEDnmU7d+6Ubdu2yQMPPJDoay5evChnzpzxeQAAAADIBMHiypUrJkyUKlXKtFz07dtXwsPDr/raKlWqmNaJOXPmeJbNnj3btGKUK1cu0deMHj1acuXK5XlERkam6fEAAAAAGVWGDhYaDJo0aWICRefOnWX69Oly6tSpFL9eWy3cYKGtHB999JFZlpQhQ4ZITEyM5xEdHZ0mxwEAAABkdBk6WGTJkkUWL14sCxculMqVK8ukSZOkQoUKcujQIQkODk7QJery5cs+32s3qr1798qmTZtkzZo1Jih06dIlyf2FhYVJRESEzwMAAABABg8WKigoSOrXry8jRoyQzZs3S2hoqMyfP18KFChgukq5YmNjZceOHT6vLV68uDRs2NB0gdJHs2bNpGDBgn44CgAAACBjC5EMbN26dbJkyRJp3ry5CQT6/fHjx6VSpUqSM2dOGThwoHz99ddStmxZmTBhgpw+fTrBNrTr0/Dhw+XSpUvyxhtv+OU4AAAAgIwuQwcL7Yq0cuVKefPNN80MTSVLlpTx48dLq1atTLenrVu3Svfu3SUkJESeeeYZadSoUYJtdOrUSZ588knTrUqnowUAAACQekFOSuZmRaI0zOjsUI9MXChRMbH+Lg4AAADSmXKFI2TKo3dIRq/v6sRFVxtfnOHHWAAAAADwP4IFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYC3EfhMoli+nhGb3dykAAACQ3kTmD5dAQbBIAwPb1ZCIiAh/FwMAAADpUGycI1mCgySzoysUAAAAcB1lCYBQoQgWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAA6VRsnCMZRYi/C5AZTPhyqxy/4O9SAAAAIDOJzB8uL3SoKRkFwSIN/HbivETFxPq7GAAAAIDf0BUKAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAQGAGi2nTpknRokUlLi7OZ/k999wjvXr1kgMHDpj/FypUSMLDw6VOnTry/fff+6xbqlQpee2118z6N910k5QoUcJsFwAAAECABIvOnTvLiRMnZNmyZZ5lJ0+elG+//Va6desm586dk9atW8uSJUtk8+bN0rJlS2nXrp1ERUX5bGf8+PFSu3Zts07fvn3liSeekL179/rhiAAAAICMLUMGizx58kirVq1kzpw5nmXz5s2T/PnzS6NGjaRGjRrSp08fqVq1qpQvX15eeeUVKVu2rHzxxRc+29HwoYGiXLly8vzzz5vXe4eV+C5evChnzpzxeQAAAADIoMFCacvEp59+air7avbs2XL//fdLcHCwabEYNGiQVKpUSXLnzm26Q+3evTtBi0X16tU9/w8KCpLChQvLsWPHktzn6NGjJVeuXJ5HZGTkdTxCAAAAIOPIsMFCuzY5jiNff/21REdHy6pVq0zYUBoq5s+fb8ZQ6PItW7ZItWrV5NKlSz7byJo1q8/3Gi7ij9vwNmTIEImJifE8dL8AAAAAREIkg8qWLZt07NjRtFTs379fKlSoILfccot57ocffpCePXtKhw4dzPfagnH48GHrfYaFhZkHAAAAgEwSLJS2ULRt21Z27twpDz74oGe5jqv47LPPTKuGtkIMHTo02ZYIAAAAAAHaFUo1btxY8ubNa2ZyeuCBBzzLJ0yYYAZ416tXz4SLFi1aeFozAAAAAKS9DN1ioQO1f//99wTL9TMqli5d6rOsX79+Pt8n1jVKx2IAAAAACLAWCwAAAADpA8ECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGAtxH4TKJYvp4Rm93cpAAAAkJlE5g+XjIRgkQYGtqshERER/i4GAAAAMpnYOEeyBAdJRkBXKAAAACCdypJBQoUiWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMBaiP0mApfjOObrmTNn/F0UAAAAIM259Vy33pscgoWFEydOmK+RkZH+LgoAAABw3Zw9e1Zy5cqV7DoECwt58+Y1X6Oioq56opH50rsGyujoaImIiPB3cXADce0DF9c+MHHdAxfXXjwtFRoqihYtKldDsLAQHPx/Q1Q0VATyGy6Q6XXn2gcmrn3g4toHJq574OLaS4pvoDN4GwAAAIA1ggUAAAAAawQLC2FhYTJ8+HDzFYGFax+4uPaBi2sfmLjugYtrn3pBTkrmjgIAAACAZNBiAQAAAMAawQIAAACANYIFAAAAAGsEi2s0ZcoUKVWqlGTLlk1uu+02Wb9+vb+LBEujR4+WOnXqyE033SQFCxaU9u3by969e33W+fvvv6Vfv36SL18+CQ8Pl3vvvVf++OMPn3X0AxPbtGkjOXLkMNt57rnn5MqVKzf4aHCtxowZI0FBQTJgwADPMq575vXbb7/Jgw8+aK5t9uzZpVq1arJx40bP8zoMcdiwYVKkSBHzfNOmTeXnn3/22cbJkyelW7duZp773LlzS+/eveXcuXN+OBqkVGxsrAwdOlRKly5trmvZsmXllVdeMdfbxbXPHFauXCnt2rUzH+6mv9sXLFjg83xaXedt27bJHXfcYeqF+qF6Y8eOlYCkg7eROnPnznVCQ0OdGTNmODt37nQeffRRJ3fu3M4ff/zh76LBQosWLZyZM2c6O3bscLZs2eK0bt3aKVGihHPu3DnPOo8//rgTGRnpLFmyxNm4caNz++23O/Xq1fM8f+XKFadq1apO06ZNnc2bNzvffPONkz9/fmfIkCF+Oiqkxvr1651SpUo51atXd55++mnPcq575nTy5EmnZMmSTs+ePZ1169Y5Bw8edBYtWuTs37/fs86YMWOcXLlyOQsWLHC2bt3q3H333U7p0qWdv/76y7NOy5YtnRo1ajg//vijs2rVKqdcuXJO165d/XRUSIlRo0Y5+fLlc7766ivn0KFDzieffOKEh4c7EydO9KzDtc8c9PfxSy+95Hz22WeaGp358+f7PJ8W1zkmJsYpVKiQ061bN1OH+Oijj5zs2bM77777rhNoCBbX4NZbb3X69evn+T42NtYpWrSoM3r0aL+WC2nr2LFj5pfQihUrzPenT592smbNav4AuXbv3m3WWbt2recXWHBwsHP06FHPOlOnTnUiIiKcixcv+uEokFJnz551ypcv7yxevNhp2LChJ1hw3TOv559/3mnQoEGSz8fFxTmFCxd2xo0b51mm74ewsDBTcVC7du0y74UNGzZ41lm4cKETFBTk/Pbbb9f5CHCt2rRp4/Tq1ctnWceOHU3FUHHtM6f4wSKtrvPbb7/t5MmTx+f3vf5+qVChghNo6AqVSpcuXZKffvrJNJW5goODzfdr1671a9mQtmJiYszXvHnzmq963S9fvuxz7StWrCglSpTwXHv9ql0pChUq5FmnRYsWcubMGdm5c+cNPwaknHZ10q5M3tdXcd0zry+++EJq164tnTt3Nt3XatasKdOnT/c8f+jQITl69KjPtc+VK5fp/up97bVrhG7Hpevr34V169bd4CNCStWrV0+WLFki+/btM99v3bpVVq9eLa1atTLfc+0DQ1pdZ13nzjvvlNDQUJ+/Adqd+tSpUxJIQvxdgIzmzz//NH0zvSsQSr/fs2eP38qFtBUXF2f62NevX1+qVq1qlukvH/2lob9g4l97fc5dJ7H3hvsc0qe5c+fKpk2bZMOGDQme47pnXgcPHpSpU6fKwIED5cUXXzTX/6mnnjLXu0ePHp5rl9i19b72Gkq8hYSEmBsSXPv064UXXjDBX28SZMmSxfxdHzVqlOlHr7j2gSGtrrN+1fE6Sf0NyJMnjwQKggWQxN3rHTt2mDtYyNyio6Pl6aeflsWLF5tBdwisGwh6F/K1114z32uLhf7cv/POOyZYIPP6+OOPZfbs2TJnzhypUqWKbNmyxdxM0gG+XHvg2tEVKpXy589v7m7EnxFGvy9cuLDfyoW08+STT8pXX30ly5Ytk+LFi3uW6/XVrnCnT59O8trr18TeG+5zSH+0q9OxY8fklltuMXeh9LFixQp56623zP/1rhPXPXPSWWAqV67ss6xSpUpmhi/va5fc73v9qu8fbzobmM4iw7VPv3TWNm21uP/++003xoceekieeeYZMzug4toHhrS6zvwN+AfBIpW0ibxWrVqmb6b3XS/9vm7dun4tG+zouC4NFfPnz5elS5cmaNbU6541a1afa6/9J7US4l57/bp9+3afX0J6J1ynqItfgUH60KRJE3PN9I6l+9C72Nolwv0/1z1z0q6O8aeU1j73JUuWNP/X3wFaKfC+9tp9RvtVe197DZ0aUF36+0P/Lmg/baRPFy5cMH3kvelNQ71uimsfGNLqOus6Oq3t5cuXff4GVKhQIaC6QRn+Hj2eUaeb1RkDPvjgAzNbwGOPPWamm/WeEQYZzxNPPGGmnFu+fLlz5MgRz+PChQs+047qFLRLly41047WrVvXPOJPO9q8eXMzZe23337rFChQgGlHMxjvWaEU1z3zTi8cEhJiph79+eefndmzZzs5cuRwPvzwQ5+pKPX3++eff+5s27bNueeeexKdirJmzZpmytrVq1eb2cWYcjR969Gjh1OsWDHPdLM6FalOET148GDPOlz7zDPjn04Drg+t9k6YMMH8/5dffkmz66wzSel0sw899JCZblbrifq7hOlmkWKTJk0yFQ39PAudflbnNkbGpr9wEnvoZ1u49BdN3759zbRy+kujQ4cOJnx4O3z4sNOqVSszh7X+oXr22Wedy5cv++GIkFbBguueeX355ZcmFOrNoooVKzrTpk3zeV6noxw6dKipNOg6TZo0cfbu3euzzokTJ0wlQz8HQacYfvjhh01lBunXmTNnzM+4/h3Pli2bU6ZMGfNZB97ThXLtM4dly5Yl+rddw2VaXmf9DIwGDRqYbWho1cASiIL0H3+3mgAAAADI2BhjAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAcEMcPXpU+vfvL2XKlJGwsDCJjIyUdu3ayZIlS25oOYKCgmTBggU3dJ8AEAhC/F0AAEDmd/jwYalfv77kzp1bxo0bJ9WqVZPLly/LokWLpF+/frJnzx5/FxEAYCnIcRzHdiMAACSndevWsm3bNtm7d6/kzJnT57nTp0+bwBEVFWVaNLQFIzg4WFq2bCmTJk2SQoUKmfV69uxp1vVubRgwYIBs2bJFli9fbr6/6667pHr16pItWzZ57733JDQ0VB5//HF5+eWXzfOlSpWSX375xfP6kiVLmtADALBHVygAwHV18uRJ+fbbb03LRPxQoTRUxMXFyT333GPWXbFihSxevFgOHjwoXbp0SfX+/v3vf5v9rFu3TsaOHSsjR44021MbNmwwX2fOnClHjhzxfA8AsEdXKADAdbV//37RxvGKFSsmuY62Umzfvl0OHTpkxl6oWbNmSZUqVUzlv06dOinen7ZYDB8+3Py/fPnyMnnyZLP9Zs2aSYECBTxhpnDhwtbHBgD4By0WAIDrKiU9bnfv3m0ChRsqVOXKlU0A0OdSQ4OFtyJFisixY8dStQ0AQOoRLAAA15W2GuhMTLYDtHXcRfyQogPA48uaNavP97pv7WoFALi+CBYAgOsqb9680qJFC5kyZYqcP38+wfM6ILtSpUoSHR1tHq5du3aZ57TlQmk3Jh0X4U0HbqeWBo/Y2NhrOhYAQNIIFgCA605DhVbmb731Vvn000/l559/Nl2c3nrrLalbt640bdrUTEHbrVs32bRpk6xfv166d+8uDRs2lNq1a5ttNG7cWDZu3GjGXujrdRzFjh07Ul0WnRlKx1zo52qcOnXqOhwtAAQmggUA4LrTD8XTwNCoUSN59tlnpWrVqmYwtVbwp06darorff7555InTx658847TdDQ1/z3v//1bENbPYYOHSqDBw82g7nPnj1rwkdqjR8/3swSpeM5atasmcZHCgCBi8+xAAAAAGCNFgsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAEFv/D/X0w0X7J7aHAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "try:\n", + " import matplotlib.pyplot as plt\n", + "\n", + " latest_cat = cat_metrics[0] if cat_metrics else None\n", + " if latest_cat and latest_cat.get(\"histogram\"):\n", + " hist = latest_cat[\"histogram\"]\n", + " labels = [e[\"value\"] for e in hist[\"values\"]]\n", + " counts = [e[\"count\"] for e in hist[\"values\"]]\n", + " if hist[\"other_count\"] > 0:\n", + " labels.append(\"(other)\")\n", + " counts.append(hist[\"other_count\"])\n", + "\n", + " fig, ax = plt.subplots(figsize=(8, 4))\n", + " ax.barh(labels, counts, color=\"steelblue\", edgecolor=\"white\")\n", + " ax.set_title(f\"vehicle_type distribution — {latest_cat['metric_date']}\")\n", + " ax.set_xlabel(\"Count\")\n", + " plt.tight_layout()\n", + " plt.show() # pragma: allowlist secret\n", + " else:\n", + " print(\"No categorical histogram data available.\")\n", + "except ImportError:\n", + " print(\"Install matplotlib to visualize: pip install matplotlib\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Feature view aggregates" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date: 2024-12-01 Total rows: 5000 Features w/ nulls: 0 Max null rate: 0.0\n", + "Date: 2025-01-01 Total rows: 4922 Features w/ nulls: 0 Max null rate: 0.0\n", + "Date: 2025-01-01 Total rows: 576 Features w/ nulls: 0 Max null rate: 0.0\n" + ] + } + ], + "source": [ + "view_metrics = monitoring.get_feature_view_metrics(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " data_source_type=\"batch\",\n", + ")\n", + "\n", + "for m in view_metrics[:3]:\n", + " print(f\"Date: {m['metric_date']} Total rows: {m['total_row_count']} \"\n", + " f\"Features w/ nulls: {m['features_with_nulls']} Max null rate: {m.get('max_null_rate', 'N/A')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Feature service aggregates" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date: 2025-01-01 Total features: 4 Avg null rate: 0.0\n", + "Date: 2025-01-01 Total features: 4 Avg null rate: 0.0\n", + "Date: 2025-02-28 Total features: 4 Avg null rate: 0.0\n" + ] + } + ], + "source": [ + "svc_metrics = monitoring.get_feature_service_metrics(\n", + " project=\"monitoring_demo\",\n", + " feature_service_name=\"driver_service\",\n", + " data_source_type=\"batch\",\n", + ")\n", + "\n", + "for m in svc_metrics[:3]:\n", + " print(f\"Date: {m['metric_date']} Total features: {m['total_features']} \"\n", + " f\"Avg null rate: {m.get('avg_null_rate', 'N/A')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Baseline metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Baseline mean: 0.4989\n", + "Baseline stddev: 0.1975\n", + "Baseline null_rate: 0.0000\n" + ] + } + ], + "source": [ + "baseline = monitoring.get_baseline(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " feature_name=\"conv_rate\",\n", + " data_source_type=\"batch\",\n", + ")\n", + "\n", + "if baseline:\n", + " print(f\"Baseline mean: {baseline[0]['mean']:.4f}\")\n", + " print(f\"Baseline stddev: {baseline[0]['stddev']:.4f}\")\n", + " print(f\"Baseline null_rate: {baseline[0]['null_rate']:.4f}\")\n", + "else:\n", + " print(\"No baseline found.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 7: Visualize a Feature Distribution\n", + "\n", + "Use the histogram stored in the metrics to plot a distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/homebrew/Cellar/python@3.12/3.12.11/Frameworks/Python.framework/Versions/3.12/lib/python3.12/pty.py:95: DeprecationWarning: This process (pid=12140) is multi-threaded, use of forkpty() may lead to deadlocks in the child.\n", + " pid, fd = os.forkpty()\n" + ] + } + ], + "source": [ + "!uv pip install -q 'matplotlib'" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAGGCAYAAABmGOKbAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAARL1JREFUeJzt3Qd4FOXa//E7Cb2F3kNRkd4EBDwoUgQRUYr1KKIiKgIqqJQjCIIIBxVQATk2sCHKK6hU6eARUGlKURSlRCEUEQIoLZn3up/3P/vfDQmEMJOZ3f1+rmuNmV12752dze5vnhZjWZYlAAAAAADAcbHO3yUAAAAAAFCEbgAAAAAAXELoBgAAAADAJYRuAAAAAABcQugGAAAAAMAlhG4AAAAAAFxC6AYAAAAAwCWEbgAAAAAAXELoBgAAAADAJYRuAACyYNiwYRITExOyrVKlSnLvvfe6/tg7d+40jz116tTANn3cAgUKSHbRx9d9AAAAzo3QDQCICKtWrTIh8PDhwxJO5s2b59vw6ufanDZz5ky5/fbb5ZJLLpF8+fJJ1apV5YknnsjwePr888/liiuukDx58kiFChVk6NChcubMmZDbLFmyRO6//365/PLLzX3qfT/wwAOyd+/es+7v2muvNScy0l6uv/76TD8HrfXBBx+UEiVKSP78+aVFixayfv36s2730Ucfyd133y1VqlQxj6GPfaGy87EAINzl8LoAAACcCt3PPvusafEtXLiwJzVs27ZNYmNjLzjYTpw48YLCbcWKFeXvv/+WnDlzZqFKZ2rTx8+RI3K+RmiALFu2rAmIGqI3bdokEyZMMPtAw2TevHkDt50/f7507NjRBMhXX33V3Pa5556T/fv3y2uvvRa43YABA+TQoUNy6623mtD566+/mvucM2eObNy4UUqXLh1SQ/ny5WXUqFEh27SmzEhNTZX27dvLd999J0899ZQUL15cJk2aZGpct26deXyb1qjbGjVqJH/88ccF76vsfCwAiASR82kJAIgox48fNy1o4SR37tyu3r+2pGrgyZUrl2lh9ZLXj++0//mf/zmrFbZBgwbSrVs3+eCDD0wLte3JJ5+UOnXqyMKFCwMnHgoVKiTPP/+8PPbYY1KtWjWzbezYsdKsWbOQEzHact28eXMTvjWoB4uPjzehP6v164mnGTNmyC233GK23XbbbaaVXVvhp02bFrjte++9J+XKlTN11apVy9ePBQCRgO7lABAhfv/9d+nevbtpGdPwV7lyZenZs6ecOnUqcBttadNWt6JFi5rurk2aNJG5c+eG3M/y5ctNN9CPP/5YRo4caVrfNGC1atVKtm/fHrhd7969zRjiv/7666xa7rzzTtOKl5KSckHjo7du3Sr//Oc/pUiRIiasqO+//960XmvXXK1D71e77Aa3mum/1xY3pc/b7pqrY59t77//vglR2mKpz/+OO+6QxMTETNX33//+17TU6eNfeuml8p///Cfd26Ud03369GnT+q4tf/pvixUrZp7XokWLzPV6W21JVsFdioPHbb/44osyfvx487j6uuo+Sm9Md/Br3LZtW3PCQo+F4cOHi2VZZ72++jNY2vs8V232trQt4Bs2bJB27dqZAKrHhh4za9asCbmN3r/+26+++kr69esX6J7cqVMnOXDggHglvW7PWpP64YcfAtt0/+tFW8aDW/ofeeQRs581kNquueaas3o+6DY9/oLvM+2JlWPHjl1w/fq4pUqVks6dOwe26b7VMPzZZ5/JyZMnA9sTEhIuuEeGV48FAJGAlm4AiAB79uyRK6+8MjDOUlvaNITrl2MNxdoyum/fPrnqqqvM748++qgJgO+8847cdNNN5nZ2wLCNHj3afFnWVr0jR47ImDFj5K677pKvv/7aXK/jXzWUaWjXIG/T+589e7YJbXFxcRf0POxuuNpiaAdFDagaJO+77z4TuLds2SKvv/66+amBTgOcfvn/6aef5MMPP5Rx48aZ7q52EFB68mDIkCEmFGiLpYY77RasAUiD4rm6o2vX4TZt2pj70pCpoUhb8zR0nI/eXrsL62Pq65OcnCxr16413ZWvu+46eeihh8xrp89RWwTTM2XKFDlx4oR5XTV0a2DT1u706EkObUnVkyn6ei1YsCAw1ljD94XITG3B9PW4+uqrTeDu37+/6fquJyc0zK5YsUIaN24ccvs+ffqYkytanwZ+PbGgJ3J0DLBfJCUlmZ/28aT0eFENGzYMua2e4NATVPb1GdFArZfg+7TpMawnIPREmR5fPXr0kGeeeSZTwwj0cXWMedqAq8edvl/0vmvXri1OyM7HAoCIYAEAwt4999xjxcbGWt9+++1Z16Wmppqfjz/+uKZY68svvwxcd/ToUaty5cpWpUqVrJSUFLNt2bJl5nbVq1e3Tp48Gbjtyy+/bLZv2rQpcL/lypWzunTpEvJ4H3/8sbndypUrM13/0KFDzb+58847z7rur7/+Omvbhx9+eNZjvPDCC2bbjh07Qm67c+dOKy4uzho5cmTIdn0eOXLkOGt7Wh07drTy5Mlj7dq1K7Bt69at5j7TfoxWrFjR6tatW+D3unXrWu3btz/n/ffq1eus+1H6PHR7oUKFrP3796d73ZQpUwLb9HF1W58+fQLb9DXSx8+VK5d14MCBkNdXf57vPjOqTel2fd2C95M+zi+//BLYtmfPHqtgwYLWNddcE9im96//tnXr1oFjU/Xt29fs08OHD1t+0b17d1PTTz/9dNZxtnv37rNu36hRI6tJkybnvM8RI0aYf79kyZKQ7ffff781bNgw65NPPrHeffdd66abbjK3u+222zJVa/78+c19pDV37lxzPwsWLEj339WsWdNq3rx5ph7Di8cCgEhAfx8ACHPa6vnpp59Khw4dzmp9U3aXYJ0QSlui7G7bSrsAawuqtjRql9lg2rKsLeQ2bcVU2ups36+2TOv9BneH1ZZKHcMZ/DiZ9fDDD5+1LXgCK23xPXjwoGnJVenNlpzerNS6j7SVW/+tfdFWc21VX7ZsWYb/VluOv/jiCzNplk6uZatevbrpwn0+2oKuLcA///yzZFWXLl0CLfaZoa3FNn2N9HdtOV28eLG4RfeTjm/W/aTDAGxlypQxwwW0e7628gfT4y64u7oeX3o/u3btEj/QcclvvfWWmcE8eGIwnUAuo/H7OoTAvj49K1euNMMN9Fhs2bJlyHX6WNrqr702unbtarppa0u3DvNI20U/Pfq4GdUUXLcTsvOxACASELoBIMxpV2kNNOebpEjDjC6DlJYGSPv6YMEhU2lXYPXnn38GtmkXc/2CrcsnKQ3fGsI1jKddwzozdDx2Wjr7s05Opd1tNYBrALVvp93ez0cDrzbManDSfxt80XG1OuP0ufatPr/g0GVLb1+mpV26tcu/TjCl3W113LmOUb/YfZIR7e4bHHqVPrYKHt/uNN1POqwgo+NLT3qkHT+fmeMrLX0ttMt3Vi6ZOVZsX375pZkfQU+s6NCE9E4CBY9bDj4pFHySKNiPP/5ohnDo+/TNN9/MVB0a+JV9wkRPnqR9Xva8Cfq4GdUUXHdmZedjAUCkY0w3ACBdGY3HDp6US1ucdfIwbY3TFk0dy63BSMN4VqT3ZV1bBXWmZA2s9erVM63zGuJ07HJGY5uD6W30BIAu85Tec9L7c4uOGf/ll19Mq6W2BGvY0jHnkydPDpkN+1ycDjAZnQzJ7KR32Xl8paW9KLQHRlboLOTpTTyXli6DpfMcaDjWuQ7SLoumrfdK19rWScKC6TbtTZKWnnDQeQF0dnI9KVWwYMFM1Wzfv554Uvo+0PWwg+3YscO8B7Wu9Nb/trdldukxW3Y+FgBEOkI3AIQ5bbHVyas2b9583rWddR3p9Frg7OuzQkPxyy+/bFrbNRTpl3K7+/fF0lbPJUuWmC65OqGULb3u2hmFSZ31W4Octhjbrb4Xsm819Kb3eOnty/ToxGcaFPWiPQE0iOsEa3bozkqPgHOdYNDu/8HPUye1Uvq6BLcoawt8sPS6dWe2Nt1POht+RseXtsCnDahZoS3P9szvFyozQVBPkOjJnJIlS5pwnN4JGT3xo3RCvOCArZPO/fbbb6bbfDCdZV8Dt7YM67Fsh/bMsIdy2MML6tate9bzt9f61rq0hV6PgeAJznTiQ31tLvTYz87HAoBIR+gGgDCnX3p1LK0uiaVBIO24bg2cGp5uuOEGM0P06tWrpWnTpoG1sHW2YQ1kNWrUyNLja6u2zpStM6HrbNnaFdzp1tC0rZ/6PNKy1/ROGyZ1jOygQYNMcNd9FBwk9X61FVFncs/o8TXo6Zj53bt3B7pEa7d0Het9Phq4gu9bQ9xll10W0tU6uO5zzaKeWbr+8yuvvBJ4fvq7zn6ty3fZJ1f0een4Yj1ubJMmTTrrvjJbm96fBktt0ddu7HbA1xnzdWy0ju/XE0MXSwPrhYTWC6Hdp/U56PtJX9uMxtHXrFnTrA6g7xud4d0+Rl977TVzbNnrVtvvL33f6UoCOndAesMUlJ6w0jHSweOk9bWz1/G25w/QEyatW7dO9z70cbVlXucwsGvQuQt0LW2d7+FC15DPzscCgEhH6AaACKBLbGn35ebNm5uWNh1Hq1099UuwTmKlgWngwIFmSS1dR1mXDNMWWA3K2mX0k08+yfJaurp0kAbJp59+2rTmZbVreXo0qGnLsIZ6XfNaJ2jT56k1p6VrcCutQ9fg1qCpAUBbujW8aPDWQKhBU7v36n3MmjXL7C9dFi0jGtb1ZIJO9KVrMevyW7rcmIav843P1hMZumSW1qb7W0+KaFgJnuzMrltfEw1XGuK0/qzQiay0Vu1KrUt0aZd6XdLtX//6VyBEahdnHXOvz0FDou6fOXPmpDu2/UJq032sLaMasHU/abdsXTJMjwl9/fxOW7i1ZVmXO9P3jF5sOp+ALvFme+GFF0wXdA3puj+0l4me3NDeC/YcCUqX2Pvmm2/MuvJ6oiZ4bW49AWOf9NAJAXVte73oe0mHaOixqWuZ6/Gp77Hz0fCrPUy0R4VOiqhLkumJFB02oMdwMD3hohd7PL6eHLADvr7f9OKXxwKAiOD19OkAAGfokla6dFiJEiWs3LlzW5dccolZ8il42S9dzumWW26xChcubJbBuvLKK605c+aE3I+9pNSMGTPOu6SU7emnnzbXXXbZZVmq3V4yzF7WKthvv/1mderUydQcHx9v3XrrrWYpqrRLVtnLMekyZrp8Wtrlw3QppmbNmpnljvRSrVo1s3+2bdt23vpWrFhhNWjQwCyJpft18uTJgZrPtWTYc889Z/ax1p43b17zmLpE2alTpwK3OXPmjFnmS1+3mJiYwH3a+1uXqEoroyXD9Hnpa9ymTRsrX758VqlSpUyd9nJwNt3PutSb3qZIkSLWQw89ZG3evPms+8yoNpXe/l+/fr3Vtm1bq0CBAua+W7RoYa1atSrkNvaSYWmXt8toKbPsoo+d0SW9Za5mzZpl1atXz7zXypcvbw0ePDjkdbWPh4zuU6+z/frrr+a41qX79H2p+06PNz3OgpdVO59Dhw6ZZc6KFStm7kPrTm8ZQfvYTe+S9jX1w2MBQLiL0f94HfwBAAAAAIhELBkGAAAAAIBLGNMNAHCNztatl3PRscYZLR8FAAAQ7gjdAADXvPjii2dNrJSWvfYvAABAJGJMNwDANTobtL3WcEZ0tmuddRsAACASEboBAAAAAHAJE6kBAAAAAOASxnSLSGpqquzZs0cKFiwoMTExXpcDAAAAAPA57TR+9OhRKVu2rMTGZtyeTegWMYE7ISHB6zIAAAAAAGEmMTFRypcvn+H1hG4R08Jt76xChQp5XQ4AAAAAwOeSk5NN462dJzNC6NbZ5P5fl3IN3IRuAAAAAEBmnW+IMhOpAQAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDAAAAABCJoXvYsGESExMTcqlWrVrg+hMnTkivXr2kWLFiUqBAAenSpYvs27cv5D52794t7du3l3z58knJkiXlqaeekjNnznjwbAAAAAAACJVDPFazZk1ZvHhx4PccOf5/SX379pW5c+fKjBkzJD4+Xnr37i2dO3eWr776ylyfkpJiAnfp0qVl1apVsnfvXrnnnnskZ86c8vzzz3vyfAAAAAAA8E3o1pCtoTmtI0eOyFtvvSXTpk2Tli1bmm1TpkyR6tWry5o1a6RJkyaycOFC2bp1qwntpUqVknr16smIESNkwIABphU9V65cHjwjAAAiW0qqJXGxMV6X4Zs6AADwdej++eefpWzZspInTx5p2rSpjBo1SipUqCDr1q2T06dPS+vWrQO31a7net3q1atN6NaftWvXNoHb1rZtW+nZs6ds2bJF6tev79GzAgAgcmnQHT1rgyQePOZZDQnFC8jATnzOAwD8z9PQ3bhxY5k6dapUrVrVdA1/9tln5eqrr5bNmzdLUlKSaakuXLhwyL/RgK3XKf0ZHLjt6+3rMnLy5ElzsSUnJzv8zAAAiGwauLcn8fkJAICvQ3e7du0C/1+nTh0TwitWrCgff/yx5M2b17XH1dZ0DfgAAAAAAETNkmHaqn355ZfL9u3bzTjvU6dOyeHDh0Nuo7OX22PA9Wfa2czt39MbJ24bNGiQGTNuXxITE115PgAAAACA6Oar0H3s2DH55ZdfpEyZMtKgQQMzC/mSJUsC12/bts0sEaZjv5X+3LRpk+zfvz9wm0WLFkmhQoWkRo0aGT5O7ty5zW2CLwAAAAAARFT38ieffFI6dOhgupTv2bNHhg4dKnFxcXLnnXeaJcK6d+8u/fr1k6JFi5pg3KdPHxO0dRI11aZNGxOuu3btKmPGjDHjuAcPHmzW9tZgDQBAOPHLbNx+qQMAgEjgaej+7bffTMD+448/pESJEtKsWTOzHJj+vxo3bpzExsZKly5dzMRnOjP5pEmTAv9eA/qcOXPMbOUaxvPnzy/dunWT4cOHe/isAADIGmYFBwAg8ngauqdPn37O63UZsYkTJ5pLRrSVfN68eS5UBwBA9mNWcAAAIouvxnQDAAAAABBJCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAIh4KamW+IFf6gAAANknRzY+FgAAnoiLjZHRszZI4sFjntWQULyADOxU37PHBwAA3iB0AwCiggbu7UnJXpcBAACiDN3LAQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAQETyyxJtfqkDAOANZi8HAAARiaXiAAB+QOgGAAARi6XiAABeo3s5AAAAAAAuIXQDAAAAAOASQjcAAICH/DLRml/qAIBI45sx3aNHj5ZBgwbJY489JuPHjzfbTpw4IU888YRMnz5dTp48KW3btpVJkyZJqVKlAv9u9+7d0rNnT1m2bJkUKFBAunXrJqNGjZIcOXzz1AAAADLEhG8AENl8kUy//fZb+c9//iN16tQJ2d63b1+ZO3euzJgxQ+Lj46V3797SuXNn+eqrr8z1KSkp0r59eyldurSsWrVK9u7dK/fcc4/kzJlTnn/+eY+eDQAAwIVhwjcAiFyedy8/duyY3HXXXfLGG29IkSJFAtuPHDkib731lowdO1ZatmwpDRo0kClTpphwvWbNGnObhQsXytatW+X999+XevXqSbt27WTEiBEyceJEOXXqlIfPCgAAAAAAH4TuXr16mdbq1q1bh2xft26dnD59OmR7tWrVpEKFCrJ69Wrzu/6sXbt2SHdz7YKenJwsW7ZsycZnAQAAAACAz7qX61jt9evXm+7laSUlJUmuXLmkcOHCIds1YOt19m2CA7d9vX1dRnR8uF5sGtIBAAAAAIiYlu7ExEQzadoHH3wgefLkydbH1onWdIy4fUlISMjWxweASOGX2Y79UgcAAIBvWrq1+/j+/fvliiuuCGzTidFWrlwpEyZMkC+++MKMyz58+HBIa/e+ffvMxGlKf37zzTch96vX29dlRGdJ79evX0hLN8EbAC4csy4DAAD4NHS3atVKNm3aFLLtvvvuM+O2BwwYYEKwzkK+ZMkS6dKli7l+27ZtZomwpk2bmt/158iRI014L1mypNm2aNEiKVSokNSoUSPDx86dO7e5AAAuHrMuAwAA+DB0FyxYUGrVqhWyLX/+/FKsWLHA9u7du5sW6aJFi5og3adPHxO0mzRpYq5v06aNCdddu3aVMWPGmHHcgwcPNpOzEaoBAAAAAF7zxTrdGRk3bpzExsaalm6d+ExnJp80aVLg+ri4OJkzZ4707NnThHEN7d26dZPhw4d7WjcAAAAAAL4L3cuXLw/5XSdY0zW39ZKRihUryrx587KhOgAAAAAAwmydbgAAAAAAIhWhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugHAh1JSLfEDv9QBAAAQrnJ4XQAA4GxxsTEyetYGSTx4zLMaEooXkIGd6nv2+AAAAJGA0A0APqWBe3tSstdlAAAA4CLQvRwAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAABwXimplviBX+oAgMzKkelbAgAAIGrFxcbI6FkbJPHgMc9qSCheQAZ2qu/Z4wNAVhC6AQAAkCkauLcnJXtdBgCEFbqXAwAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAACRGLpfe+01qVOnjhQqVMhcmjZtKvPnzw9cf+LECenVq5cUK1ZMChQoIF26dJF9+/aF3Mfu3bulffv2ki9fPilZsqQ89dRTcubMGQ+eDQAAAAAAPgrd5cuXl9GjR8u6detk7dq10rJlS7n55ptly5Yt5vq+ffvK7NmzZcaMGbJixQrZs2ePdO7cOfDvU1JSTOA+deqUrFq1St555x2ZOnWqPPPMMx4+KwAAAAAA/k8O8VCHDh1Cfh85cqRp/V6zZo0J5G+99ZZMmzbNhHE1ZcoUqV69urm+SZMmsnDhQtm6dassXrxYSpUqJfXq1ZMRI0bIgAEDZNiwYZIrVy6PnhkAAAAAAD4a062t1tOnT5fjx4+bbuba+n369Glp3bp14DbVqlWTChUqyOrVq83v+rN27domcNvatm0rycnJgdZyAAAAAACisqVbbdq0yYRsHb+t47ZnzZolNWrUkI0bN5qW6sKFC4fcXgN2UlKS+X/9GRy47evt6zJy8uRJc7FpSAcAAAAAwBct3Zdccon88ccfZ20/fPiwue5CVK1a1QTsr7/+Wnr27CndunUzXcbdNGrUKImPjw9cEhISXH08AAAAAEB0ylLo3rlzp+kOnpa2Hv/+++8XdF/amn3ZZZdJgwYNTBiuW7euvPzyy1K6dGkzQZoG+WA6e7lep/Rn2tnM7d/t26Rn0KBBcuTIkcAlMTHxgmoGAAAAAMDx7uWff/554P+/+OIL00ps0xC+ZMkSqVSpklyM1NRUE941hOfMmdPcpy4VprZt22aWCNPu6Ep/6uRr+/fvN8uFqUWLFpnlx7SLekZy585tLgAAAAAA+CZ0d+zY0fyMiYkx3cCDaUDWwP3SSy9l+v60xbldu3ZmcrSjR4+amcqXL18eCPTdu3eXfv36SdGiRU2Q7tOnjwnaOnO5atOmjQnXXbt2lTFjxphx3IMHDzZrexOqAQAAAABhFbq1FVpVrlxZvv32WylevPhFPbi2UN9zzz2yd+9eE7Lr1KljAvd1111nrh83bpzExsaalm5t/daZySdNmhT493FxcTJnzhwzFlzDeP78+c3JgOHDh19UXQAAAAAAeDZ7+Y4dOxx5cF2H+1zy5MkjEydONJeMVKxYUebNm+dIPQAAAAAA+GLJMB1rrRdtrbZbwG1vv/22E7UBAAAAABB9ofvZZ581XbgbNmwoZcqUMWO8AQAAAACAA6F78uTJMnXqVDOBGQAAAAAAcHCdbl0/+6qrrsrKPwUAAABck5JqiR/4pQ4AYdrS/cADD5jlvYYMGeJ8RQAAAEAWxcXGyOhZGyTx4DHPakgoXkAGdqrv2eMDiIDQfeLECXn99ddl8eLFZpkvXaM72NixY52qDwAAALggGri3JyV7XQYAZD10f//991KvXj3z/5s3bw65jknVAAAAAAC4iNC9bNmyrPwzAAAAAACiSpYmUgMAAAAAAC61dLdo0eKc3ciXLl2albsFAAAAACCiZCl02+O5badPn5aNGzea8d3dunVzqjYAAAAAAKIvdI8bNy7d7cOGDZNjx7xbngEAAAAAgIgd03333XfL22+/7eRdAoCjUlIt8QO/1AEAAAAftnRnZPXq1ZInTx4n7xIAHBUXGyOjZ20wa7h6JaF4ARnYqb5njw8AAACfh+7OnTuH/G5Zluzdu1fWrl0rQ4YMcao2AHCFBu7tSclelwEAAIAokKXQHR8fH/J7bGysVK1aVYYPHy5t2rRxqjYAAAAAAKIvdE+ZMsX5SgAAAAAAiDAXNaZ73bp18sMPP5j/r1mzptSvzxhFAAAAAAAuKnTv379f7rjjDlm+fLkULlzYbDt8+LC0aNFCpk+fLiVKlMjK3QIAAAAAEFGytGRYnz595OjRo7JlyxY5dOiQuWzevFmSk5Pl0Ucfdb5KAAAAAACipaV7wYIFsnjxYqlevXpgW40aNWTixIlMpAYAAAAAwMW0dKempkrOnDnP2q7b9DoAAAAAAJDF0N2yZUt57LHHZM+ePYFtv//+u/Tt21datWrlZH0AAAAAAERX6J4wYYIZv12pUiW59NJLzaVy5cpm26uvvup8lQAAAAAARMuY7oSEBFm/fr0Z1/3jjz+abTq+u3Xr1k7XBwAAAABAdLR0L1261EyYpi3aMTExct1115mZzPXSqFEjs1b3l19+6V61AAAAAABEaugeP3689OjRQwoVKnTWdfHx8fLQQw/J2LFjnawPAAAAAIDoCN3fffedXH/99Rler8uFrVu3zom6AAAAAACIrtC9b9++dJcKs+XIkUMOHDjgRF0AAAAAAERX6C5Xrpxs3rw5w+u///57KVOmjBN1AQAAAAAQXaH7hhtukCFDhsiJEyfOuu7vv/+WoUOHyo033uhkfQAAAAAARMeSYYMHD5aZM2fK5ZdfLr1795aqVaua7bps2MSJEyUlJUWefvppt2oFAAAAACByQ3epUqVk1apV0rNnTxk0aJBYlmW26/Jhbdu2NcFbbwMAAAAAAC4wdKuKFSvKvHnz5M8//5Tt27eb4F2lShUpUqSIOxUCAAAAABAtodumIbtRo0bOVgMAAAAAQLROpAYAAAAAADKP0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAQCSG7lGjRkmjRo2kYMGCUrJkSenYsaNs27Yt5DYnTpyQXr16SbFixaRAgQLSpUsX2bdvX8htdu/eLe3bt5d8+fKZ+3nqqafkzJkz2fxsAAAAAADwUehesWKFCdRr1qyRRYsWyenTp6VNmzZy/PjxwG369u0rs2fPlhkzZpjb79mzRzp37hy4PiUlxQTuU6dOyapVq+Sdd96RqVOnyjPPPOPRswIAAAAA4P/kEA8tWLAg5HcNy9pSvW7dOrnmmmvkyJEj8tZbb8m0adOkZcuW5jZTpkyR6tWrm6DepEkTWbhwoWzdulUWL14spUqVknr16smIESNkwIABMmzYMMmVK5dHzw4AAAAAEO18NaZbQ7YqWrSo+anhW1u/W7duHbhNtWrVpEKFCrJ69Wrzu/6sXbu2Cdy2tm3bSnJysmzZsiXbnwMAAAAAAL5o6Q6Wmpoqjz/+uPzjH/+QWrVqmW1JSUmmpbpw4cIht9WArdfZtwkO3Pb19nXpOXnypLnYNKADAAAAABCxLd06tnvz5s0yffr0bJnALT4+PnBJSEhw/TEBAAAAANHHF6G7d+/eMmfOHFm2bJmUL18+sL106dJmgrTDhw+H3F5nL9fr7Nuknc3c/t2+TVqDBg0yXdntS2JiogvPCgAAAAAQ7TwN3ZZlmcA9a9YsWbp0qVSuXDnk+gYNGkjOnDllyZIlgW26pJguEda0aVPzu/7ctGmT7N+/P3AbnQm9UKFCUqNGjXQfN3fu3Ob64AsAAAAAABE1plu7lOvM5J999plZq9seg61dvvPmzWt+du/eXfr162cmV9Nw3KdPHxO0deZypUuMabju2rWrjBkzxtzH4MGDzX1ruAYAAAAAICpD92uvvWZ+XnvttSHbdVmwe++91/z/uHHjJDY2Vrp06WImP9OZySdNmhS4bVxcnOma3rNnTxPG8+fPL926dZPhw4dn87MBAAAAAMBHoVu7l59Pnjx5ZOLEieaSkYoVK8q8efMcrg4AAAAAgAiYSA0AAAAAgEhE6AYAAACyWUrq+Xt8RlMdQCTztHs5AAAAEI3iYmNk9KwNknjwmGc1JBQvIAM71ffs8YFoQegGAAAAPKCBe3tSstdlAHAZ3csBAAAAAHAJoRtARI0J80sdAAAAgKJ7OQBHMDYNAAAAOBuhG4BjGJsGAAAAhKJ7OQAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDPpeSaokf+KUOAAAAIJzk8LoAAOcWFxsjo2dtkMSDxzyrIaF4ARnYqb5njw8AAACEK0I3EAY0cG9PSva6DAAAAAAXiO7lAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAgXSmpltcl+KIG4GLkuKh/DQAAACBixcXGyOhZGyTx4DFPHj+heAEZ2Km+J48NOIXQDQAAACBDGri3JyV7XQYQtuheDgAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAARGLoXrlypXTo0EHKli0rMTEx8umnn4Zcb1mWPPPMM1KmTBnJmzevtG7dWn7++eeQ2xw6dEjuuusuKVSokBQuXFi6d+8ux455s44gAAAAAAC+Cd3Hjx+XunXrysSJE9O9fsyYMfLKK6/I5MmT5euvv5b8+fNL27Zt5cSJE4HbaODesmWLLFq0SObMmWOC/IMPPpiNzwIAAAAAgPTlEA+1a9fOXNKjrdzjx4+XwYMHy80332y2vfvuu1KqVCnTIn7HHXfIDz/8IAsWLJBvv/1WGjZsaG7z6quvyg033CAvvviiaUEHAAAAAMArvh3TvWPHDklKSjJdym3x8fHSuHFjWb16tfldf2qXcjtwK719bGysaRnPyMmTJyU5OTnkAgAAAABA1IRuDdxKW7aD6e/2dfqzZMmSIdfnyJFDihYtGrhNekaNGmUCvH1JSEhw5TkAAAAAAKKbb0O3mwYNGiRHjhwJXBITE70uCQAAAAAQgXwbukuXLm1+7tu3L2S7/m5fpz/3798fcv2ZM2fMjOb2bdKTO3duM9t58AUAAAAAgKgJ3ZUrVzbBecmSJYFtOvZax2o3bdrU/K4/Dx8+LOvWrQvcZunSpZKammrGfgMAAAAAELWzl+t62tu3bw+ZPG3jxo1mTHaFChXk8ccfl+eee06qVKliQviQIUPMjOQdO3Y0t69evbpcf/310qNHD7Os2OnTp6V3795mZnNmLgcAAACiQ0qqJXGxMVFfA/zJ09C9du1aadGiReD3fv36mZ/dunWTqVOnSv/+/c1a3rrutrZoN2vWzCwRlidPnsC/+eCDD0zQbtWqlZm1vEuXLmZtbwAAAADRQcPu6FkbJPHgMU8eP6F4ARnYqb4njw3/8zR0X3vttWY97ozExMTI8OHDzSUj2io+bdo0lyoEAAAAEA40cG9PYilg+I9vx3QDAAAAABDuCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN2IWimpGa8RH411AAAAAHBeDhfuEwgLcbExMnrWBkk8eMyzGhKKF5CBnep79vgAAAAA3EXoRlTTwL09KdnrMgAAAABEKLqXAwAAAADgEkI3InaMsl/qAAAAABC96F4OxzFWGgAAAAD+D6EbrmCsNAAAAADQvRwAAAAAANcQugEAAAAAcAmhGwAAAADgqwmJU3xQg1MY0w0AAAAA8M3EyAkRNikyoTuM6NkefQN4zS91AAAAAHAHEyM7h9AdRrw+4xSJZ50AAAAAwE2E7jDDGScAAAAACB9MpAYAAAAAgEsI3QAAAAAQJTNy+6GGaEP3cgAAAACIgjmamJ/JG4RuAAAAAMgmzNEUfeheDgAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuCRiQvfEiROlUqVKkidPHmncuLF88803XpcEAAAAAIhyERG6P/roI+nXr58MHTpU1q9fL3Xr1pW2bdvK/v37vS4NAAAAABDFIiJ0jx07Vnr06CH33Xef1KhRQyZPniz58uWTt99+2+vSAAAAAABRLIeEuVOnTsm6detk0KBBgW2xsbHSunVrWb16dbr/5uTJk+ZiO3LkiPmZnJwsflcin8ip+DhPHz8z+4k6M4c6nUWdzqJOZ1Gns6jTWdTpLOrM/hrt21Jn9NXpJbtGy7LOebsY63y38Lk9e/ZIuXLlZNWqVdK0adPA9v79+8uKFSvk66+/PuvfDBs2TJ599tlsrhQAAAAAEGkSExOlfPnykdvSnRXaKq5jwG2pqaly6NAhKVasmMTExEik0jMxCQkJ5qAoVKiQ+BV1Oos6nUWdzqJOZ1Gns6jTWdTpLOp0FnVGZ50XS9uvjx49KmXLlj3n7cI+dBcvXlzi4uJk3759Idv199KlS6f7b3Lnzm0uwQoXLizRQg/8cDj4qdNZ1Oks6nQWdTqLOp1Fnc6iTmdRp7OoMzrrvBjx8fGRP5Farly5pEGDBrJkyZKQlmv9Pbi7OQAAAAAA2S3sW7qVdhXv1q2bNGzYUK688koZP368HD9+3MxmDgAAAACAVyIidN9+++1y4MABeeaZZyQpKUnq1asnCxYskFKlSnldmq9ol3pdyzxt13q/oU5nUaezqNNZ1Oks6nQWdTqLOp1Fnc6izuisM7uE/ezlAAAAAAD4VdiP6QYAAAAAwK8I3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNCNsKJrsMM57E9nsT+dxf50FvvTWexPZ7E/ncX+dBb70zmpUbovCd0IC7t27ZLff/9dYmM5ZJ3w888/y6+//sr+dAjHp7PYn85ifzqL/eksPo+cxfHpLI5P5/wc5fsyOp81jO3bt8u4ceOkf//+Mn/+fNm3b5/40caNG6VBgwby5Zdfip/99NNPZq34e++9V959913ZtGmT+NF3330ntWrVki+++EL8jOPTWexPZ7E/ncX+dBafR87i+HQWx6dz2JdhRNfpRvTZtGmTVaRIEatZs2ZW48aNrdy5c1t33nmnNW/ePMtPNm7caOXNm9d64oknzrouNTXV8ostW7ZYhQsXtq6//npzKVWqlNWyZUtrypQplp9s2LDB7M8nn3zS8jOOT2exP53F/nQW+9NZfB45i+PTWRyfzmFfhhdCdxT666+/rBtvvNHq06ePdebMGbNt/vz5Vps2baxrr73WmjlzpuUHP/74o/lwGzZsmPlda/3vf/9r6vv+++8DtXvt1KlTVteuXa0HHngg8MH2zTffmN9r1Khhvfbaa5Yf/PTTT1aOHDms4cOHm99Pnz5tLViwwHr99det5cuXW/v27bP8gOPTWexPZ7E/ncX+dBafR87i+HQWx6dz2Jfhh9AdhfSPb/369a3nnnsuZPvq1autm266yZwtW7NmjeWlEydOWP/85z+tokWLWt9++63Z1qFDB6tmzZpW8eLFrbi4OOupp56yfv31V8tr+sfu6quvtnr16nXWh+AjjzxiNWjQwPrss88sr/849+/f38qTJ481d+5cs61du3Zmf5YtW9Zs1z/eegx4jePTWexPZ7E/ncX+dBafR9F3fP79998cn1F4fLIvww9juqNwxsCTJ09KmTJl5ODBg2ZbSkqK+dmkSRN58sknZffu3fLpp5+abXpixgu5c+eWBx98UFq1amVqqlKliql9ypQpZvyK/nzjjTfkvffe87RO+3Hr1KkjBw4ckD///DNwXdWqVeXhhx+WYsWKycyZMz2tM2fOnNK1a1d56KGHpG/fvlKxYkWz7cMPP5TExERT34YNG2Tq1Kme1snx6Syt6cSJE2GxP3v06OH7/an7juPTOWfOnGF/OkwfOxw+j+6++27ffx6pv/76y/fHZ548eaR79+6+Pz61pnA5Pu+66y5fH5989wxTXqd+eGPChAlWrly5rC+++ML8npKSErhu0qRJVsGCBa39+/dbXtOuJ/ZYlV9++SXkutGjR5uxLH/88YcntQWPkfr444/NeBXtLpN27NSMGTNM1xqvzjIHv7Zbt261HnzwQXOmUf8/2Ntvv23lzJnT2r17t+W1iRMnhsXxuWLFCt8en8F4v198S2cw7bbnx/2Zts4vv/zSl/vz0KFDYbE/09bp1/2p3TO3b9/u+88jrVO7mvr98+jnn3+2XnjhBd8fn1rnv//9b98fn2mPwWnTpvny+Exbi3bL9+PxGezDDz/05b5Mb16EB32+L7MDoTsKJCYmmvET+kEc/Obr1q2b+bDQcT/BFi5caNWuXTvb/zhnVKd2O5k9e7YZBxL8gacfhHXq1DHdV7LT0aNHz6pFDRkyxIypev/990O+/OrkJtqVJrv/8GVUp37pWbp0aWC/2dd98sknZhzQ4cOHs7XOpKQka+3atea405pt3bt399XxmVGd2p3PT8fnrl27zJcaPXGh47v8uj8zqtNv+1Mnqmnfvr21ePHikO06bs5P+zOjOvWY9dP+XL9+vRUbG2t+Bv9d8tv+zKhOv+3P7777zrr88svN+0j/RtkGDx7sq8+jjOrctm2brz6PtE7tpl2xYsWQIO234zO4zgMHDvj2+NTvGzp5Vs+ePa1Ro0b59vjMqE7tqu2X41NDqR5v+rn5ww8/BLYPHDjQV/syozq3bNnim33pFUJ3hNMzdTqbYaNGjcy4noYNG1q9e/cOjFW67bbbrHz58lnvvPOOtWPHDrNNZ76sW7eu9eeff3paZ/A4lbQtOOrRRx+1OnfubCY6ya6ZOfUMXdu2bc0fE/sPh/3hpnTclH5RGzFihAkSR44cMduqVKkS8sHoRZ3BXxzT21/6uuvkMMGBMjte9+rVq5vjLSYmxrrhhhvMNqVfeO666y7fHJ9p69QvPbbgfevl8al1JiQkWC1atLDi4+PNTw0NSo8/Hffnl/2ZUZ0qvUl/vNif+jh6clJr1MmUggOtfXxqK4PX+/Ncdfrp+NQvgRpc+vXrd9Z1Bw8eNDNC++H4TK/O4H2UXnDxYn9qUChWrJj12GOPpft3+/HHH/fF59H56jx58qQvPo/s2b/1vaStwq+88krI8emXz6O0db766qu+fL/r33kdU37rrbeaGbV1bLz2urLpvvPD8ZlendqDwRb8Hc+r41O/b+h3ZP1epydbmjRpYl5/v73X06vz3nvvPed3+Sc8eK97hdAdwfSskX4Y6JtR//+3334zb0g986VfzIIPeH1zVKhQwYRd/XAM/gLsVZ21atUy4Sa9s2h6llS/YG7evDnb6tQP2WrVqpluMFdddZU5O5deoB03bpw5a6f7VJ9X6dKls3V/ZrZOm3ZL/Ne//mU+vLULUHZ+EStTpox5LfVMrJ5RLl++vPliZtMvCHoW18vjM6M69XhNj1fHp9alx9rTTz9t9pvWoftNu5/Z9MuW1+/3jOqcPn26r/anTSek0WWCOnXqZLVu3TrQxdT+AjFgwABP92dGdWpLg5/2p/5t0aCgPYKCuxvrl7Tgkyza2uTl/syoTv1inl7Y9vL41Peynqiw39v6XtegqKHQNmbMGE8/jzJbp9efR/aSRvp5Y7+f9PNTv4/46fMoozp///33dG/v1fGpQU9b1nUSLaXf7bRr8dixY0Nup93jvTw+M1unfaLCi+NTe4boSf9BgwaZv0F6wnfo0KGmAUB7N/nlvZ7ZOr1+r3uJ0B3BtPumdudatWpVYJueSdLu27pdz+rZvvrqKzP+44MPPjChzS91Vq1aNaRO/QOiy3RUrlzZfPhkFz3TqWO8dLZSreG6664zM0MGB9rgL44a1JYtW2a+oAd/aPuhzuCz3NrdR2+j+zk796d+eXnooYdMl2dt5bD33eTJk81JobRn43WIgRfH5/nq1NAVXKe2QHhxfB4/ftx0f9QxU3oM2DXdcsst1siRI61nn302JNRqF0kv9uf56tQlRYLrXLdunSf7M5j2FtHxkF9//bU5e69n5LUu/bJoj0Hz6vg8X526z7RO/Rvr5fGpf8+bN29uvlzZtNVNW5T0C5nWFNyq6NXn0fnq1B4ZL7/8cuA63YdeHp/6vrHr0RYlncn40ksvNRftNWafZNWeT158HmWmTj1RZNepJza8+DzSk6kaSu0gq/Qzs1ChQqY7bNoT1l693y+0Tq/e70r/RurJ/+C5Bu677z7zftIeV/q5avPy+Dxfndrd3KYnCL04PleuXGnVq1fP2rNnT8g+095iun68niQIHq7h1b48V516IuDGoMY+PQHkxb70GqE7gukkMPrH9sUXXwzZrkFBzzDrOCQdX+X3OvUspIYcm477TjtJiNs0HOgfZz0RoDTABgdau3uc12thZrbO4KCofyjtL+XZ+eVWP9imTJkSsv3TTz81rcrJycmmxvRa5v1YZ7B58+Zl+/GpS8Z8/vnn5kuWTQOsBgX94qAtIfp+D+5F4IXM1hncrdeL93swHR+pdSnttq0tyeXKlTM1Z9S65Lc6g8fQerE/9eSVjje87LLLrI4dO5qTAvoFTIOLngDSbrsaEqdOnZqtdWWlziuvvNJ67733fHF8ao3690nH6+pJFu0CrRddwkpbnNLrKebHOnV5K5uefM/uzyMNzum1umtd11xzTaBLrNefR5mtM5hXx6f2aNLeALpuuJ5g1b/zOqmX9gzSXmIatuy/V17KTJ3NmjXz9PhctGiRGbuv4/VtGlT1b9H48eNNo5WedPVaZur86KOPPN2XXiN0RzD9A6xjPnQmS3ucbHCLk7aG3nHHHZbXwqXOtIFav6DZgXbmzJmBcT9er4uY2TpnzZpleSn4bKhds34J02EFwUE27UyX2S1c6gweF6ln5HXsoX0s6pdF/RKh3SG1q2w41BkcFL2kLQfaGmfTrttas7bY6WzBfuH3OvWEi4ZXPcHatGlTa+/evYHrdBKqf/zjHybUei0c6rTDnwYwfZ317/szzzwTchvtMaJdTb2cufhC6gxuafSixmD233U92aqt8fba116G7nCp06ZjirXLtp7409ddg6ye+Ldpy7x2f9ZW2XCo0+5J4AUNppUqVTLfk/X9og0l2uNBh2gpDbX6uem1cKnTS4TuCKfjJHRSA50wLe2H2ksvvWRdccUVJth6LVzqTBu+tCb9Q60hQVuXH374Yats2bIhQc3PdfqhpS74C4Ke+dSzzseOHTO/63gfbRnxw6yW4VKnzT4G7bp1SRE/zhDq9zq1Lm1F0q7kXbt2Ne8bnWRHW++0dVaXjfODcKhTTwDOmTPHmj9/fuBvk/1TJ87U+v0QGMKlTv2Sq93htTeDvubB9PXWVrqdO3daXguXOtNrELjkkkus+++/3/Izv9apPcH0pI++xnqSOu0s69qjRHvmec3PddonVnRyNO31qa+zzikTHF5vv/1201PMS+FSp9dyeL1OONyTmpoqtWrVks8++0xatWplfn/kkUekRYsW5voff/xRypcvLzlyeHsYhEudweLi4uTMmTOSL18++fzzz6Vjx45y9913S86cOWXlypVSpkwZCYc6y5Yt63WJEhsbG/j/U6dOydGjR81rPXToUBkzZoysXr1a4uPjxWvhUqetdOnSIXVv2rTJvM9y584tfuLnOvXEtL5/9GfTpk1NjXPnzpV69epJxYoV5d1335VKlSp5XWbY1Jk3b1657rrrTH36t0nZPw8ePGjqDX6feSUc6tTXukKFCvL666/LHXfcYV7vUaNGyaBBg+TkyZOyZMkSKVasmBQqVIg6syAlJcX8Derfv7+MHTtW1q1bJw0aNBC/8XOdBQsWNBf9Tqc1/vDDD3L11Veb6/T7XoECBaRcuXJel+nrOmNiYkxdjRo1kkWLFpn3zPHjx6VatWrmev27n5ycLM2aNfOkvnCr03Nep35cPD3jnrZLsX0W3t6uZ+t0ggNtMdZZDW+++WYz+Ubw2ErqPH+dadm305ZjnSgiO2cHjcQ6dYIabZXTGYx13cngsUFui8Q67V4O2hJfokQJ377ufq9Tx/lq1+20r7Pd0yE7RFKdaVuV9XXXORJ0fGV2iYQ67Z86tEAnK9MJi7Q+bYnXv/PZOUFRJNSZHh02lCtXrpAJ9LJDJNWpQ5q0l532ttPejNoirxOA+e1197rOc9WY3jJv2lNRu27rcmc6eW92CZc6/ShG/+N18EfWbd26VZ5//nlJSkqSKlWqyI033ijt27cPnAHVs/P2z927d5uzoEuXLpWEhAS56aabAmehqDPzdaY1YcIEefTRR03N9evXp86LqHPVqlXmTGiRIkXM2dIrrriCOi+iTu3dMHPmTPNe0jP2fn3d/V7n6dOnzVn7woULm9/1Y1PP7GeXSKvTNmvWLJkxY4YsX77ctID67XUPhzq1dUlb3f/44w/57bffZP78+aZluXHjxnLppZdS5wXWmZ5///vf5rY1a9akzgus0/4bpK3Hr7zyiuzcudP0vnnsscekevXq1HkBNQbbsWOHvPnmmzJlyhTf/U3yQ51+RegOY9u2bTMfWO3atTNdB/VDTLsNaxgYN25coBtsrly5sv3LV6TXGezAgQOm20x2fXGI5Dr1g+62226TqVOnSo0aNajzIuvctWuXCbN64srPr7tf69QucsHd3O3gkJ0iqc703kfvv/++3H777eYLHHVeXJ1eiOQ6zxVwqTPzddp/j/7++28zdENPDuptqfPCa7TpyVX9dyVKlDCNU9khXOr0Na+b2pE12oVDu7ppF5jgySCee+450z27R48eZy1x5MWsxZFap862vH//fup0sE57luD0lj2hzqzXmZ0TPkVyneHyPgqXOu3XPTuXWYzkOsPldafO6Ksz7fe69LogR2udWXnNw+U7stcrpfiR9zOWIEu0NXjPnj2mi4dNJ4LQ7sM6UdaGDRtk9OjRZrt26ejdu7fpMqNn8qjz4uvs1auXvPzyy9TpYJ2vvvqqOWOfnS0ikV6nvu7Z2XMkkusMl/dRuNRpv+7Z2SIfyXWGy+tOndFXZ9rvddn1tz4c6szKax4u35G9qNPvCN1hyB4RoONI9cu/dt0IfhPcf//9ZtzE7NmzTVcPHW+h2/SSnV8cqJM6L6TO7t27my5y2fWBHA116utOndH1PgqXOsPldQ+XOsPldadO6qTO8KkxnOoMC143tSPrdD1rnQ1QZ1g8evRoSHcYXatV18ScPXu2x1VSp9Oo01nU6SzqdBZ1Oos6nUWdzqLO6KszHGoMpzr9jNAd5pYuXWqWLOrVq5d14MCBwHYdh6ZLbq1atcryA+p0FnU6izqdRZ3Ook5nUaezqNNZ1Bl9dYZDjeFUp18RuiPA559/bt4EnTt3tqZPn27Waxw4cKBZBzMxMdHyC+p0FnU6izqdRZ3Ook5nUaezqNNZ1Bl9dYZDjeFUpx8RuiPEunXrrObNm1sVK1a0Lr30Uuvyyy+31q9fb/kNdTqLOp1Fnc6iTmdRp7Oo01nU6SzqjL46w6HGcKrTb1inO4LoGsyHDh2So0ePSpkyZaR48eLiR9TpLOp0FnU6izqdRZ3Ook5nUaezqDP66gyHGsOpTj8hdAMAAAAA4BLmcgcAAAAAwCWEbgAAAAAAXELoBgAAAADAJYRuAAAAAABcQugGAAAAAMAlhG4AAAAAAFxC6AYAAAAAwCWEbgAAAAAAXELoBgAA6dq5c6fExMTIxo0bvS4FAICwRegGACBK3XvvvSZU25dixYrJ9ddfL99//725PiEhQfbu3Su1atXyulQAAMIWoRsAgCimIVuDtV6WLFkiOXLkkBtvvNFcFxcXJ6VLlzbbAABA1hC6AQCIYrlz5zbBWi/16tWTgQMHSmJiohw4cOCs7uXLly83v2s4b9iwoeTLl0+uuuoq2bZtm9dPAwAA3yJ0AwAA49ixY/L+++/LZZddZrqaZ+Tpp5+Wl156SdauXWtawe+///5srRMAgHBCfzEAAKLYnDlzpECBAub/jx8/LmXKlDHbYmMzPi8/cuRIad68ufl/bRlv3769nDhxQvLkyZNtdQMAEC5o6QYAIIq1aNHCdB/XyzfffCNt27aVdu3aya5duzL8N3Xq1An8v4Z0tX///mypFwCAcEPoBgAgiuXPn990J9dLo0aN5M033zQt3m+88UaG/yZnzpyB/9cx3io1NTVb6gUAINwQugEAQEiI1q7lf//9t9elAAAQERjTDQBAFDt58qQkJSWZ///zzz9lwoQJZkK1Dh06eF0aAAARgdANAEAUW7BgQWBcdsGCBaVatWoyY8YMufbaa82SYQAA4OLEWJZlXeR9AAAAAACAdDCmGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAEHf8L6qfN8v/SQz9AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "try:\n", + " import matplotlib.pyplot as plt\n", + "\n", + " # Get the latest daily metric for conv_rate\n", + " latest = metrics[0] if metrics else None\n", + " if latest and latest.get(\"histogram\"):\n", + " hist = latest[\"histogram\"]\n", + " bins = hist[\"bins\"]\n", + " counts = hist[\"counts\"]\n", + "\n", + " fig, ax = plt.subplots(figsize=(10, 4))\n", + " ax.bar(\n", + " [f\"{bins[i]:.2f}\" for i in range(len(counts))],\n", + " counts,\n", + " color=\"steelblue\",\n", + " edgecolor=\"white\",\n", + " )\n", + " ax.set_title(f\"conv_rate distribution — {latest['metric_date']}\")\n", + " ax.set_xlabel(\"Bin\")\n", + " ax.set_ylabel(\"Count\")\n", + " plt.xticks(rotation=45)\n", + " plt.tight_layout()\n", + " plt.show() # pragma: allowlist secret\n", + " else:\n", + " print(\"No histogram data available.\")\n", + "except ImportError:\n", + " print(\"Install matplotlib to visualize: pip install matplotlib\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 8: Time-Series Trend\n", + "\n", + "Plot how a metric (e.g., `mean`) evolves over time." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 data points from 2025-01-01 to 2025-02-28\n", + " 2025-01-01: mean=0.4989, null_rate=0.0000\n", + " 2025-02-28: mean=0.5201, null_rate=0.0000\n", + " ...\n" + ] + } + ], + "source": [ + "timeseries = monitoring.get_timeseries(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " feature_name=\"conv_rate\",\n", + " data_source_type=\"batch\",\n", + " granularity=\"daily\",\n", + " start_date=date(2025, 1, 1),\n", + " end_date=date(2025, 3, 1),\n", + ")\n", + "\n", + "if timeseries:\n", + " dates = [t[\"metric_date\"] for t in timeseries]\n", + " means = [t[\"mean\"] for t in timeseries]\n", + " null_rates = [t[\"null_rate\"] for t in timeseries]\n", + "\n", + " print(f\"{len(timeseries)} data points from {dates[0]} to {dates[-1]}\")\n", + " for t in timeseries[:5]:\n", + " print(f\" {t['metric_date']}: mean={t['mean']:.4f}, null_rate={t['null_rate']:.4f}\")\n", + " print(\" ...\")\n", + "else:\n", + " print(\"No time-series data.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAepVJREFUeJzt3Ql4lNX59/F7Jvs2gewsYacKiqyCoK0bLRSrolgBrSAiWlusihuogOBC3RFFsfXvVhdwodatvLVoqxZURK2IQmVfsxGSyb7NvNd9JjMkmQEDSSZ5Jt/Pdc01mWfOTM4kD8jz8z73sbndbrcAAAAAAAAAQWQP5jcDAAAAAAAAFKEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAJrNnXfeKTabrd6xHj16yOWXXy7txb/+9S/zM9B7AABweIRSAAAATbRmzRoTxhQUFIiVgiPvLTY2Vrp16ybnnnuuPPvss1JRUSFt0RlnnFFv3oe76ecDAABtX3hrTwAAACAUQqkFCxaYaqAOHTqIVTz55JMSHx9vQqi9e/fK//t//0+uuOIKWbx4sbzzzjuSmZl51O95xx13yOzZs1tkvrfffrtceeWVvsfr1q2TJUuWyG233Sb9+vXzHT/ppJNa5PsDAIDmRSgFAADQQElJicTFxUmou+iiiyQlJcX3eN68efLSSy/JlClT5Ne//rV8+umnR/2e4eHh5tYSfv7zn9d7HB0dbUIpPa5VVO399wkAgNWwfA8AADSZVtlMnz5dOnfuLFFRUdKzZ0+55pprpLKy0jdm27ZtJuhISkoyy8VOOeUUeffddwP24nn11Vflnnvuka5du5rg4eyzz5YtW7b4xs2cOdNU+JSWlvrNZfLkyZKRkSE1NTVHtZTtu+++k0suuUQ6duwop512mnnum2++MdVPvXr1MvPQ99VKogMHDtR7/c0332y+1s/tXUK2Y8cO35gXX3xRhg4dKjExMebzT5o0SXbv3i1t0aWXXmqqkT777DN5//33fcc//vhj8/vTZX76O9YqqhtuuEHKysp+tKdUXXoe6POPPPJIwIozfe6VV1455vkf6ffZ2N+FBlwnnniieY8zzzzTnK9dunSR+++/3+/77dmzR8aPH29Cr7S0NPMzaavLHwEAaGuolAIAAE2yb98+GT58uOmndNVVV8nxxx9vQqrXX3/dhEaRkZGSnZ0to0aNMo//8Ic/SHJysjz//PNy3nnnmXEXXHBBvff84x//KHa7XW666SYpLCw0YYCGJRqUqIkTJ8rSpUtNqKVBiZe+/9tvv22CpLCwsKP6HPo+ffv2lXvvvVfcbrc5pqGMhijTpk0zgdTGjRvlT3/6k7nXKiINPy688EL53//+Z4IUDVq8lUepqanmXsO1uXPnysUXX2zCntzcXHnsscfkZz/7mXz11VdtcrnfZZddZj7nP/7xD1910muvvWZ+vho26u/v888/N59DQxl9rrE04Dv11FNNRZYGOHXpsYSEBDn//POb/BkC/T6P5ndx8OBBGTt2rPn96ng9T2+99VYZMGCA/PKXvzRjNJDTwHTXrl3mvNZQ9i9/+Yt88MEHTZ4/AADtghsAAKAJpkyZ4rbb7e5169b5Pedyucz99ddfr6mA++OPP/Y9V1RU5O7Zs6e7R48e7pqaGnPsww8/NOP69evnrqio8I199NFHzfENGzb43rdLly7uCRMm1Pt+r776qhn30UcfNXr+8+fPN6+ZPHmy33OlpaV+x1555RW/7/HAAw+YY9u3b683dseOHe6wsDD3PffcU++4fo7w8HC/48Hi/cy5ubkBnz948KB5/oILLjjiz2LRokVum83m3rlzp99719W9e3f31KlTfY+feuopM+b777/3HausrHSnpKTUG/djXnvtNfM+et782O/zaH4Xp59+unmPF154wXdMz8eMjIx659zixYvNOD3vvEpKStx9+vTxmxcAAPDH8j0AAHDMXC6XvPnmm2bXtmHDhvk9713G9d5775lqqrrLqHT5nVZW6TI3XSZVl1YmaYWV109/+lNzr1VL3vfVShh93+LiYt+4FStWmGVWdb9PY/32t7/1O6ZLvLzKy8slLy/PLDtUX3755Y++58qVK83PSCtt9LXem1ZdaRXPhx9+KG2R/m5UUVFRwJ+F9mjSz6HVb1qFpFVGR0N/HrocUiujvLTJur7nb37zm2b5DA1/n0f7u9CfQd256Pmo57D3HFR6/nXq1Mn05vLSpX56XgMAgB/H8j0AAHDMdPmT0+k0/XeOZOfOnTJixAi/494d0/T5uu+hfYvq0r5A3iVVXrqET3eJe+utt0zvIA2nNCS4+uqrj9jT6HC0H1RD+fn5Zle95cuXS05OTr3ndFnhj/nhhx9MaKOhRyARERGHfa3249Lvfyw0QNF+ScfKG/TpUjovXaKmjdD1513399DYn0VdukxOg8yXX35Z7rrrLnNMAyoNFM866yxpDg1/n0f7u9B+Zg3PIz0Ptc+Yl563ffr08Rt33HHHNcMnAAAg9BFKAQCANudw/aC8vYGUViz16NHDNEXXUEp7SWmPHw2rjkXdSiAvrarR5tvayHzQoEGmekarbbTXkN7/GB2jgcXf//73gJ/JW5EUiH5fbbJ9LE4//XTTNP5Yffvtt+ZeAxelTeO1t5SGZNpXSfuGaWNv7R2m/bsa87NoSHf4015U+jm1T5OGXb/73e9ML7Hm0PD3ebS/i8acgwAAoGkIpQAAwDHTZt4Oh8MXYhxO9+7dZfPmzX7HN23a5Hv+WGho9Oijj5pqLV26pyGVd3ldU2k10OrVq02llFYI1a24aehwlVm9e/c2IYZW7fzkJz85qu8/cODAervfHQ1vZdmx0mbdasyYMeZ+w4YNppm7NqfXMMnrWOenNNjT80crpLSKTpuoa4P1ltKU38Xh6Hmr576+b91zINC5DgAA/BFKAQCAY6ZVLePHj5cXX3xRvvjiC7++Ut6L9XHjxpmldmvXrpWRI0f6+hLpDm8aJPXv3/+Yvr9WRenOfBqWrFq1Sq677jppLt5KmYaVMfo5GtKqIaU7ENalO7fNmTPHBFv6M6obXOj7auWR7mR3uGBp9OjREmy6pO7pp582vyfdWe5wPwv9WgPBYxUeHi6TJ0823+/777831VInnXSStJSm/C4OR89r3aFQd+bz7gKp4Zqe1wAA4McRSgEAgCa59957zYW5LhnTBs/aJ2r//v1madYnn3xi+gfNnj1bXnnlFfnlL38pf/jDH0y/Iw2Stm/fLm+88cYxL9kaMmSIWWJ2++23S0VFxTEv3QtEK8B+9rOfmdCrqqrK9DvSz6lzbmjo0KHmXucxadIk059IeyZpdc7dd99twhBt6K4BnvZp0vf461//an5eN910k7QWDVN02Zr2r9KleNps/D//+Y+p0tLfn5cu19PPonPVcfqz0d9bw95SR0urrpYsWWKajN93333SklridzFjxgx5/PHHzedYv369aXquVWba7BwAAPw4QikAANAkGtZ89tlnMnfuXLMUS5fS6TENoLwX5+np6aZ3kPYjeuyxx8xOdloVo32gzjnnnCZ9fw2i7rnnHhNOaUjVnLSK59prr5WlS5eaappf/OIXpidR586d6407+eSTTcPuZcuWmYot7V+kYYdWUGkgp8vFHnnkEVOlozIzM817nXfeedKarrnmGnOvO+GlpKSYvlnPPPOM6dEVFRXlG6chm/6uNFBctGiRGX/BBRfIzJkzTYB1rDTMO+GEE0yl1KWXXiotrbl/F3p+6xJPPUf0vNbH+jn03NfliQAA4Mhsbro1AgAAoJUMHjzYVM5puAMAANqX5tneBAAAADhK2ofs66+/rtc8HQAAtB9USgEAgJBUXFxsbkeiu795m3gjeHTHOu3B9NBDD0leXp5s27bNLAkEAADtC5VSAAAgJD344IOm8fSRbrt3727tabZL2mB92rRppoG8NsAnkAIAoH2iUgoAAIQkrb7R25GcdtppBCIAAACthFAKAAAAAAAAQcfyPQAAAAAAAARdePC/ZehxuVyyb98+SUhIEJvN1trTAQAAAAAAaDW6KK+oqEg6d+4sdvvh66EIpZqBBlKZmZmtPQ0AAAAAAIA2QzeV6dq162GfJ5RqBloh5f1hOxwOsWq1V25urtka+0gpJgAAAAAAaBmuELk2dzqdpnjHm5ccDqFUM/Au2dNAysqhVHl5uZm/lU98AAAAAACsyhVi1+Y/1uLI+p8QAAAAAAAAlkMoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAACgfYdSS5culR49ekh0dLSMGDFCPv/888OOfe6550zDrLo3fZ1XVVWV3HrrrTJgwACJi4uTzp07y5QpU2Tfvn313ic/P18uvfRS00SsQ4cOMn36dCkuLm7RzwkAAAAAANBQrrNMvt9XZO7bgzYTSq1YsUJmzZol8+fPly+//FIGDhwoY8aMkZycnMO+RoOk/fv3+247d+70PVdaWmreZ+7cueZ+5cqVsnnzZjnvvPPqvYcGUhs3bpT3339f3nnnHfnoo4/kqquuatHPCgAAAAAAUNeqr3bJ1Mf/JX9893/mXh+HOpvb7XZLG6CVUSeffLI8/vjjvm0QMzMz5dprr5XZs2cHrJS6/vrrpaCgoNHfY926dTJ8+HATXnXr1k2+//576d+/vzk+bNgwM2bVqlUybtw42bNnj6muagyn0ymJiYlSWFhogjIr0p+3BoBpaWkhse0kAAAAAABtXWV1jWzLLpKvt+fJsx9urvec3WaTF/5wpqQ6YsRqGpuThEsbUFlZKevXr5c5c+b4jmkwMnr0aFm7du1hX6fL7Lp3724ClSFDhsi9994rJ5xwwmHH6w9Dl/npMj2l761fewMppd9Tv/dnn30mF1xwQcD3qaioMLe6P2yl89CbFem8NZ+06vwBAAAAAGjLyiurZVtOkWzZ75QtWYWyJcspO3OLxXWYWiE9vvdAsSTHR4nVNDZbaBOhVF5entTU1Eh6enq94/p406ZNAV9z3HHHyTPPPCMnnXSSCZsefPBBGTVqlFmK17VrV7/x5eXlpsfU5MmTfSldVlaWqQyqKzw8XJKSksxzh7No0SJZsGCB3/Hc3FzzfaxITxj9OWowRaUUAAAAAADHrqyyRnYeKJUdeaWyM89zv7+wXALlTwnR4dKlQ7Rsyqrf39puE4msKTtiW6O2qqioyDqh1LEYOXKkuXlpINWvXz956qmn5K677qo3VpueX3zxxSZwefLJJ5v8vbWiS/tf1a2U0qWGqampll6+p1Vk+hkIpQAAAAAAaJyisipT+bQ1SyugPFVQe/NLA45Nio+SPhkO6ZOR6Lnv5JCUhGhzPf7/vt4tS977VlxuTyD1h3EnyvG9/IturKDuRnRtPpRKSUmRsLAwyc7OrndcH2dkZDTqPSIiImTw4MGyZcuWgIGU9pH64IMP6oVG+t4NE8fq6mqzI9+Rvm9UVJS5NaRhjpUDHf1DYPXPAAAAAABASykoqfAET/sL5Yf9ugSvULIKAu+Ul5YY4wug+nZKlN4ZDklOOHxY88sh3WVIrxT5bts+6d+rs6R3iBOramyu0CZCqcjISBk6dKisXr1axo8f76vc0cczZ85s1Hvo8r8NGzaYJuUNA6kffvhBPvzwQ0lOTq73Gq200kbp2s9Kv7/S4Eq/tzZeBwAAAAAA7dOBonJP76f9Tl8AlesM3LKnU8dYE0Bp+KQhlAZQHeKOvhdUqiNG+nVOsGRz82PRJkIppcvhpk6dapqO6w55ixcvlpKSEpk2bZp5fsqUKdKlSxfTz0ktXLhQTjnlFOnTp48Jlh544AFTDXXllVf6AqmLLrpIvvzyS3nnnXdMaOXtE6U9ozQI0+V+Y8eOlRkzZsiyZcvMazQEmzRpUqN33gMAAAAAANalrX40bNLQyRM+eSqh8osPbXBWV9ekOOmj4VMnh/Q1AVSiJMREBH3eoaDNhFITJ040jcLnzZtnwqNBgwbJqlWrfM3Pd+3aVa/86+DBgyZM0rEdO3Y0lU5r1qyR/v37m+f37t0rb731lvla36surZo644wzzNcvvfSSCaLOPvts8/4TJkyQJUuWBPGTAwAAAACAYAVQ2QVlJnz6IetQAFVYWuk3Vvs6dU2O91Q/dfIsweuVniBxUQRQzcXm1t8ImkQbnScmJprd66zc6Fz7a+luhPSUAgAAAABYncvtlv35pb6ld9774vJqv7Fhdpt0T02obT5eG0ClJUh0ZHBreVwhcm3e2JykzVRKAQAAAAAAHIsal1v2HCg2VU9a/aQBlO6GV1rpH0BFhNmlR5ongPJWQfVMS5DI8LBWmXt7RigFAAAAAAAso7rGJbvyig9VP+13ytZsp1RU1fiNjQy3S690bwNyz0543dMSTDCF1kcoBQAAAAAA2qTK6hrZmVvsW3qnAdS2bKdU1bj8xkZHhJld77w74Ol9ZkqchFl4GVyoI5QCAAAAAACtTiudtufo0jtnbQBVKDtyiqTa5d8KOzYq/FD/pwzPErwuSRpA2Vpl7jg2hFIAAAAAACCoyiqrTcWTBk/eEEororQ5eUMJMRGm8qluD6hOHWPFbiOAsjpCKQAAAAAA0GJKyqtMzydP/ydPI/LdecXiHz+JJMZGmuDJ1wOqU6KkJ8aIjQAqJBFKAQAAAACAZuEsqzS73nkDqB+yCmVffmnAsckJUb6ld94eUHqMAKr9IJQCAAAAAABHraCkorYBee0yvKxCyS4oCzhWq518PaA6JZqG5Enx0UGfM9oWQikAAAAAAHBEB4rKfQGUdye8PGd5wLHa78lb+dSnk8N8rcvygIYIpQAAAAAAgOF2uyXXWV7bgNwTPmkQlV9c4TdWF9l1SY7zC6DioyNaZe6wHkIpAAAAAADaaQCVVVBWpwG5J4AqLK30G2u3iWSmxNc2IPf0geqd7pDYKGIFHDvOHgAAAAAAQpzL7ZZ9+SX1ekBpCFVcXu03Nsxukx6pCb7KJw2ieqY7JDoirFXmjtBFKAUAAAAAQAipcblld16xr/JJg6itWYVSVlnjNzYizC490zSAql2Cl+GQHmkJEhlOAIWWRygFAAAAAIBFVde4ZGeuJ4Dy9oDaluWUimqX39iocLv0SnfUC6C6pSaYYApoDYRSAAAAAABYQGV1jQmgNHzyBlDbs4ukqsY/gNKldr0zHL4eUHqfmRInYXYCKLQdhFIAAAAAALQxFVU1si3b6VmCt9+zBG9HbpFZmtdQXFS4qX7SyidvAKW74tltuj8e0HYRSgEAAAAA0IrKKqtlqzYf9y7B2++UXXnFpjl5QwkxEfWqnzSIyugYSwAFSyKUAgAAAAAgSErKqzy73/kCqELZc6BE/OMnkQ5xkSZ46puR6KuESkuMERsBFEIEoRQAAAAAAC3AWVrp2/3OsxNeoezLLw04NiUh2tOAPONQI/Kk+CgCKIQ0QikAAAAAAJqooKSiTgNyp6mAyi4sCzg2PTHGV/nkXYrXMT4q6HMGWhuhFAAAAAAAjeR2uyW/2BNAbakTQuUVlQcc3zkp1oROdXtAOWIjgz5voC0ilAIAAAAA4DABVE5hma/y6YfanfAOllT4jdVFdl2T43xL7zSE6p3hkPjoiFaZO2AFhFIAAAAAgHZPA6j9B0vr94DaXyjOsiq/sXabSLeUBE/41MnhC6BiIrnEBo4Gf2IAAAAAAO2Ky+2WvQdK6jQg91RClVRU+40Ns9ukR2r9AKpnukOiI8JaZe5AKCGUAgAAAACErBqXS3bnHQqg9H5btlPKKmv8xkaE2aVneoKv/5PeuqfGS2Q4ARTQEgilAAAAAAAhoarGJbtyiw4twasNoCqqXX5jo8Lt0ivDU/nk7QGlAVR4mL1V5g60R4RSAAAAAADLqayukR05Rb7d7zSA2p5TZIKphmIiw6R3nd3vNIDKTIk3S/MAtB5CKQAAAABAm1ZeVSPbs+s2IHfKjtwiqXG5/cbGR4eb0KlPbQClQVTnpDix2wiggLaGUAoAAAAA0GaUVlTL1mxP5ZO3B9TuvGIJkD+JIybCt/TO04g8UTI6xIiNAAqwBEIpAAAAAECrKC6v8lU+eaugdFe8APmTdIyLkr61u995A6hURzQBFGBhhFIAAAAAgBbnLK2UH0wApdVPThNA7T9YGnBsiiO6TgNyzxK85ITooM8ZQMsilAIAAAAANKuDxRW+pXcmhMpySk5hWcCx6R1ipG+DHlAd4qKCPmcAwUcoBQAAAAA4Jm63Ww4UVdRpQK4BVKE5FkiXpDjP7nedPFVQvTMc4oiJDPq8AbQNhFIAAAAAgEYFUNmFZbUNyA/1gCooqfQbq12eMlPifZVPGkL1TndIXHREq8wdQNtEKAUAAAAAqMfldpt+T57+T54QSgOoorIqv7F2m026p2oApdVPniqoXukOiYnkchPAkbWpvyWWLl0qDzzwgGRlZcnAgQPlsccek+HDhwcc+9xzz8m0adPqHYuKipLy8nLf45UrV8qyZctk/fr1kp+fL1999ZUMGjSo3mvOOOMM+fe//13v2NVXX21eBwAAAAChrsbllr35Jb6ld3q/NcspJRXVfmPD7TbpkZZQ2//JE0L1THNIVERYq8wdgLW1mVBqxYoVMmvWLBMGjRgxQhYvXixjxoyRzZs3S1paWsDXOBwO87xXw61AS0pK5LTTTpOLL75YZsyYcdjvrc8tXLjQ9zg2NrZZPhMAAAAAtCU1Lpfsyi32VT79UBtAlVfV+I2NCLObiqc+Wv1UuxOeVkRFhhNAAQixUOrhhx824ZC3+knDqXfffVeeeeYZmT17dsDXaAiVkZFx2Pe87LLLzP2OHTuO+L01hDrS+wAAAACA1VTVuGRnTtGhXfCynLIt2ymV1S6/sVrp1Ls2gDI9oDISpVtKvISH2Vtl7gDahzYRSlVWVpoldnPmzPEds9vtMnr0aFm7du1hX1dcXCzdu3cXl8slQ4YMkXvvvVdOOOGEo/7+L730krz44osmmDr33HNl7ty5VEsBAAAAsIzK6hrZrgFUnR5QO3KKTDDVUGxkuNn1zhM+eXpAdU2OlzB7/ZUnANAuQqm8vDypqamR9PT0esf18aZNmwK+5rjjjjNVVCeddJIUFhbKgw8+KKNGjZKNGzdK165dG/29L7nkEhNsde7cWb755hu59dZbzZJA7Ud1OBUVFebm5XQ6zb2GY3qzIp237qZh1fkDAAAA7YUutduerbvfeZbg6fK7nXnFpjdUQ/HR4abqScMnbxDVqWOsaU5en14L+L8eQHC5QuTavLHzbxOh1LEYOXKkuXlpINWvXz956qmn5K677mr0+1x11VW+rwcMGCCdOnWSs88+W7Zu3Sq9e/cO+JpFixbJggUL/I7n5ubWa7RutRNGwz09+bVKDQAAAEDrK6uskZ0HSmVnXqnsqL3tLywXd4D8KCE6XHqkxEr3lFjpkRxrvk5JiKzfe7e6RPJyS4L6GQC0v2vzoqIi64RSKSkpEhYWJtnZ2fWO6+PG9nqKiIiQwYMHy5YtW5o0F22yrvR9DhdK6TJDbcpet1IqMzNTUlNTTfN1q574+h8r/QxWPvEBAAAAqyour/I1IN9iqqCcsi+/RALVLyXFRx2qftJKqE4OSUmI9tv8CYC1uELk2jw6Oto6oVRkZKQMHTpUVq9eLePHj/f9IvTxzJkzG/Ueuvxvw4YNMm7cuCbN5euvvzb3WjF1OFFRUebWkJ4wVj5p9MS3+mcAAAAArKCwtNL0f6rbhHz/wdKAY1Md0b7d77w74SUnNO6CD4D12ELg2ryxc28ToZTSyqOpU6fKsGHDZPjw4bJ48WIpKSnx7cY3ZcoU6dKli1k6pxYuXCinnHKK9OnTRwoKCuSBBx6QnTt3ypVXXul7z/z8fNm1a5fs27fPPNZeUUqrr/SmS/RefvllE2QlJyebnlI33HCD/OxnPzO9qgAAAACgqfKLy03lkyd88gRQOYVlAcdmdIjx7X6nDci1GqpDnP//EAeAUNBmQqmJEyeankzz5s2TrKwsGTRokKxatcrX/FzDpbpJ28GDB2XGjBlmbMeOHU2l1Zo1a6R///6+MW+99ZYv1FKTJk0y9/Pnz5c777zTVGj985//9AVgugRvwoQJcscddwT1swMAAACwPu0Bk1dU7gmfapuQ69f5xYc2SaqrS1JcvR3wNIhKiIkI+rwBoLXY3Po3J5pEe0olJiaaZmRW7imVk5MjaWlpli4RBAAAAIJBL6OyC8rkB9P/qVB+0F5Q+wvNsryG7DaRrsnxvgBK73tlOCQuigAKQGhemzc2J2kzlVIAAAAA0Ba53G7Zn19aJ4DyVEJpY/KG7DabdE+NN5VP3hCqd7pDoiO59AKAhvibEQAAAABq1bjcsvdAsa/5uLcHVGlFtd/YcLtNeqQl1DYg9yy/65WeIJHhYa0ydwCwGkIpAAAAAO1Sjcslu3KLfZVPGkBtzXJKeVWN39jIcLv0Snf4+j/1zUiU7mkJEhFm3eU1ANDaCKUAAAAAhLyqGpfsyCnyVD7pErz9Ttme45TKapff2KiIME/4lHFoCV5mSryEE0ABQLMilAIAAAAQUiqqamR7bQDl2Qmv0ARS1S7/PZ5iI8OlT6f6AVSX5HgJ0+7kAIAWRSgFAAAAwLLKK6tla7az3g54O3OLTXPyhuKjI0wApUvvvEvwOiXFmubkAIDgI5QCAAAAYAklFVWyLcvpa0Ku93sOaADlPzYxNtJX+eTdCS89MUZsBFAA0GYQSgEAAABoc4rKqnz9n7wB1N78koBjkxOizPI73xK8Tg5JSYgmgAKANo5QCgAAAECrKiipMMGTpwG5hlCFklVQFnBsWmKMqX7yVEF5Aqik+OigzxkA0HSEUgAAAACC5kBReW0Dck8IpV/nOssDju3UMbZeANU7wyEd4qKCPmcAQMsglAIAAADQ7NxutwmbPA3IPUvw9Ov84oqA47smxZneT95G5L0zEiUhJiLo8wYABA+hFAAAAIAmB1C63M4XQNX2gSosrfQba7eJZKbE1y698/SA6pWeIHFRBFAA0N4QSgEAAABoNJfbLfvyS2SLLr8zy/A8S/CKy6v9xobZbdI9NeHQEjwNoNISJDqSyxAAAKEUAAAAgMOocbllz4Hi2gooz/K7rVlOKa30D6AiwuzSIy2htv+TwwRQPdMSJDI8rFXmDgBo+wilAAAAAEh1jUt25RX7Kp+0EmprtlMqqmr8xkaG26V3uid48lZBdUtNMMEUAACNRSgFAAAAtDOV1TWyM/dQAKX327OLpKrG5Tc2OiLM7Hrn3QFP7zNT4iTMTgAFAGgaQikAAAAghGml0/Ycp/ygPaBqQ6gdOUVS7XL7jY2NCvctvdMd8PS+S5IGULZWmTsAILQRSgEAAAAhoqyyWrZle8InE0JlFZqKKG1O3lBCTISv8skbRHXqGCt2GwEUACA4CKUAAAAACyoprzI9n8wSPFMB5ZTdecXiHz+JdIiL9C2/8/aASkuMERsBFACgFRFKAQAAAG2cs6zSNB73NCDXnfAKZV9+acCxyQlRvqV33kooPUYABQBoawilAAAAgDakoKSitgF5bRVUVqFkF5QFHJueGHOoB1SnRNOQPCk+OuhzBgDgWBBKAQAAAK3kQFG5b/ndD1meSqg8Z3nAsdrvydcDqpPDfJ0YGxn0OQMA0FwIpQAAAIAW5na7JddZXtuA3FP9pJVQ+cUVfmN1kV2X5LhDPaBqA6j46IhWmTsAAC2FUAoAAABo5gAqq6CsTgNyTwBVWFrpN9ZuE+mWkuALnjSI6pXukNgo/pkOAAh9/NcOAAAAOEYut1v2HijxBU8aRG3NKpTi8mq/sWF2m/RI9QRQ3iqonukOiY4Ia5W5AwDQ2gilAAAAgEaocblkd54ngPI2ItcAqqyyxm9sRJhdeqZpAFXbAyrDIT3SEiQynAAKAAAvQikAAACggeoal+zMLa4TQBXKtiynVFS7/MZGhdvNkru6AVT31AQJD7O3ytwBALAKQikAAAC0a5XVNSaA0vDJ2wdqe06RVNX4B1AxkWHSW5uPZxzqAZWZEidhdgIoAACOFqEUAAAA2o2KqhrZlu309IDa7+kBtSO3SGpcbr+xcVHhpvpJAyhvDyjdFc9u0/3xAABAq4dSNTU18txzz8nq1aslJydHXK76/0fpgw8+aOq3AAAAAI5aWWW1bM3yBFCeCiin7MorNs3JG0qIiTDBU1+tgqoNojp1jBUbARQAAG03lLruuutMKHXOOefIiSeeyH+4AQAAEHQl5VW+3e88VVCFsudAifjHTyId4iL9Aqi0xBj+HQsAgNVCqeXLl8urr74q48aNa54ZAQAAAEfgLK2UH2qX33mroPYfLA04NiUh2tOAPONQI/Kk+CgCKAAAQiGUioyMlD59+jTPbAAAAIA6CkoqDjUg16V4+wslu7As4Nj0DjGm71PdHlAd46OCPmcAABCkUOrGG2+URx99VB5//HH+jxMAAACOidvtlgNFFb6ld94QKq+oPOD4zkmxvt3vvEGUIzYy6PMGAACtGEp98skn8uGHH8rf//53OeGEEyQiIqLe8ytXrmzqtwAAAECIBVA5hWUNekA55WBJhd9Y/V+eXZPjPOFTnQAqLrr+vzkBAEA7DKU6dOggF1xwQbNMZunSpfLAAw9IVlaWDBw4UB577DEZPnx4wLHaXH3atGn1jkVFRUl5eXm9QGzZsmWyfv16yc/Pl6+++koGDRpU7zU6Xqu9tDdWRUWFjBkzRp544glJT09vls8EAADQ3gMo7ffkW35XWwnlLKvyG2u32aR7arwneOrkWYLXK90hMZFN/icrAABog5r8X/hnn322WSayYsUKmTVrlgmRRowYIYsXLzYB0ebNmyUtLS3gaxwOh3neq+HywZKSEjnttNPk4osvlhkzZgR8jxtuuEHeffddee211yQxMVFmzpwpF154ofznP/9pls8FAADQXrjcbrPjnYZO3gbkW7OcUlJR7Tc23G6THmkJ9QKoHmkOiY4Ia5W5AwCA4Gsz/9vp4YcfNsGRt/pJwykNi5555hmZPXt2wNdoCJWRkXHY97zsssvM/Y4dOwI+X1hYKP/3f/8nL7/8spx11lm+kK1fv37y6aefyimnnNIMnwwAACD01LhcsjuvxLf8Tu+3ZTulrLLGb2xEmF16pnsCKA2f9KYVUZHhBFAAALRnzRJKvf766/Lqq6/Krl27pLKyst5zX3755Y++Xl+jS+zmzJnjO2a322X06NGydu3aw76uuLhYunfvLi6XS4YMGSL33nuv6WvVWPo9q6qqzPfxOv7446Vbt27m+xJKAQAAiFTVuGRXbtGhHlC1AVRFtctvbFS4XXrV2f1ObxpAhYfZW2XuAAAghEOpJUuWyO233y6XX365/O1vfzOVTlu3bpV169bJ73//+0a9R15entTU1Pj1cdLHmzZtCvia4447zlRRnXTSSabi6cEHH5RRo0bJxo0bpWvXro36vtq7KjIy0vTFavh99bnD0d5TevNyOp3mXsMxvVmRzlt7Plh1/gAAoHlUVtfIjpxiT+8n0wPKKdtznFJd4/YbGxMZJr3THbUNyB3m1jU5XsLs/jsy828MAADaz7W5q5Hzb3IopU3B//SnP8nkyZNN8/FbbrlFevXqJfPmzTPNxVvKyJEjzc1LAylddvfUU0/JXXfdJS1p0aJFsmDBAr/jubm59RqtW+2E0XBPT36tUgMAAKFPK512HyiVHXmlsjPPc7/3YJkEyJ8kNjJMeqTESne9JcdKz5RYSUuMMs3JfdxlciCvLKifAQCAUOIKkWvzoqKi4IRSumRPAyEVExPj+8baz0mXvz3++OM/+h4pKSkSFhYm2dnZ9Y7r4yP1jKorIiJCBg8eLFu2bGn03PW9delgQUFBvWqpH/u+usxQm7LXrZTKzMyU1NRU03zdqie+9ujSz2DlEx8AAARWWlFtltz5dsDLcsruvGJxBQigHDER9aqfdAleRocYv01lAABA83KFyLV5dHR0cEIpDW+0Ikp7O2kvJm0QPnDgQNm+fbtJ9hpDl9ANHTpUVq9eLePHj/f9IvSx7obXGLr8b8OGDTJu3LhGz12/p4ZZ+n0mTJhgjulufhq01a3CaigqKsrcGtITxsonjZ74Vv8MAABApLi8yhM87a/tAZVVKHsPlEigf5l1jIuSvp08S/D6mp3wEiXVEU0ABQBAK7GFwLV5Y+fe5FBKd6176623TJWS9pO64YYbTOPzL774Qi688MJGv49WHk2dOlWGDRsmw4cPl8WLF0tJSYlvN74pU6ZIly5dzNI5tXDhQlOJ1adPH1Pp9MADD8jOnTvlyiuv9L2nhmUaMO3bt88XOHmDNL0lJibK9OnTzfdOSkoyVU7XXnutCaRocg4AAKygsLSyNoDSHfA8VVD7D5YGHJviiPYFT31qm5EnJzTu/2QCAAA0tyaHUtpPytvAShubJycny5o1a+S8886Tq6++utHvM3HiRNOTSXtRaZPxQYMGyapVq3zNzzVcqpu0HTx4UGbMmGHGduzY0VQ96fft37+/b4yGZd5QS02aNMncz58/X+68807z9SOPPGLeVyultHn5mDFjTJ8sAACAtuZgcYWv8smEUFlOySkM3MNJl9uZ3e+0Aqo2hOoQ51/pDQAA0Fps7sauscNhaU8prbrSZmRW7imVk5MjaWlpli4RBAAgFOg/z/KKys3yOw2gvEHUgaJDu//W1SUpzlf5pCFU7wyHOGIigz5vAADQNK4QuTZvbE7S5Eop9fHHH5td77Zu3WqW7ukyu7/85S/Ss2dPOe2005rjWwAAAIRsAJVdWFa7/M7TgFwDqIKSSr+x2uUpMyXeL4CKi4polbkDAAA0RZNDqTfeeMPstHfppZfKV199ZZbAKU3D7r33Xnnvvfea+i0AAABCgsvtNv2eGgZQRWVVfmPtNpt0T42vbUDuaUTeK90hMZHN8v8UAQAAWl2T/1Vz9913y7Jly0wj8uXLl/uOn3rqqeY5AACA9qjG5Za9+SW1vZ88PaA0hCqtqPYbG263SY+0hNoG5J4eUD3TEiQqIqxV5g4AAGCJUEp3tPvZz37md1zXDuqueAAAAKGuxuWSXbnFvsonrYLamuWU8qoav7ERYXZT8dSnU+0SvIxEUxEVGU4ABQAA2pcmh1IZGRmyZcsW6dGjR73jn3zyifTq1aupbw8AANCmVNW4ZGdOUZ0G5E7Zlu2UymrPbsR1aaVT73Rv/yeHCaC6pcRLeJh1G5cCAAC0mVBqxowZct1118kzzzwjNptN9u3bJ2vXrpWbbrpJ5s6d2zyzBAAAaAWV1TWyPafIEz7VBlA7copMMNVQbGS4aTruqX7y9IDqmhwvYXZtTw4AAIBmD6Vmz55ttiw8++yzpbS01Czli4qKMqHUtdde29S3BwAACIryymrZ1iCA2plbZHpDNRQfHeFZfpdxqAdUp6RY05wcAAAAjWNz6z7EzaCystIs4ysuLpb+/ftLfHy8tBdOp9P00NIdBx0Oh1iRBos5OTmSlpYmdjtLCgAAoU2bjW/V5XfaA6p2J7w9B4olQP4kibGRtQ3IPVVQGkSld4gxFeIAAADNyRUi1+aNzUmOuVLqiiuuaNQ4XdYHAADQWorKqmoDKK2A8oRQe/JLAo5Nio8yAZSpgKrtAZXqiCaAAgAAaAHHHEo999xz0r17dxk8eLA0U7EVAABAkxSWVvoqn7QRuS7B23+wNOBYDZu8u995A6jkhOigzxkAAKC9OuZQ6pprrpFXXnlFtm/fLtOmTZPf/OY3kpSU1LyzAwAAOIz84vLa/k9O3054uc7ygGM7dYz1NB+v7f+kDck7xEUFfc4AAABopp5SFRUVsnLlSrNEb82aNXLOOefI9OnT5Re/+EW7KnOnpxQAAC1H/6miYZOpfNrvrF2GVyj5xRUBx3dNiqvXA6p3RqIkxEQEfd4AAADt9drc2dI9pZTusjd58mRz27lzp1nS97vf/U6qq6tl48aN7arZOQAAaJ4AKrugzARPniV4nh5QuiyvIbtNpGtyvGcJnukD5ZBeGQ6JiyKAAgAAsIImhVJ1aYKn1VH6j8mamprmelsAABCiXG637M8v9VU+eRuRF5dX+Y2122zSPfVQAKVVUL3THRId2Wz/lAEAAECQNelfcnWX733yySfyq1/9Sh5//HEZO3aspcvMAABA86pxuWXvgeJD1U+1TchLK6r9xobbbdIjLeFQBVSnROmZliCR4WGtMncAAAC0sVBKl+ktX75cMjMz5YorrjBNz1NSUpp3dgAAwHJqXC7ZmVt8qAfU/kLZmu2Uiir/SurIcLv0StcG5I7aJXiJ0j0tQSLC+J9bAAAAoe6YQ6lly5ZJt27dpFevXvLvf//b3ALRSioAABCaqmpcsiOnqDaA0j5QTtme45TKapff2OiIMLPrnXcHPA2iuqXGSxjV1QAAAO3SMYdSU6ZMaVc77AEA0N5ppdP22gDKLMPbX2gCqWqX/0a+sVHh9aqf9OsuyRpA8W8HAAAANDGU0p32AABAaCqvrDZL7jwNyD33uiRPm5M3FB8d4at88vaA6tQx1jQnBwAAAA6HLWsAAGjnSiqqZGtt8KTNx7UKandesfjHTyKJsZG+AMrbiDw9MYbqaQAAABw1QikAANoRZ1mlL4Dy7oS3N78k4NjkhKg6/Z80gHJISkI0ARQAAACaBaEUAAAhqqCkwlf55KmCKpSsgrKAY9MSYw5VP9UGUEnx0UGfMwAAANoPQikAAELAgaLy2gbk3j5QhZLnLA84Vvs9eSqgPDvh6RI8XZYHAAAABBOhFAAAFuJ2uyXXWe4LnrQSSr/OL67wG6uL7Lokx/kqn7QKqnd6oiTERLTK3AEAAIC6CKUAAGjDAZQut/MFULU9oApLK/3G2m0imSnxh3pAmQDKIbFR/KceAAAAbRP/UgUAoA1wud2yL79Etux31gmgCqW4vNpvbJjdJt1TE+otv+uV7pDoiLBWmTsAAABwLAilAAAIshqXW/YcKPbtfqcBlO6IV1rpH0BFhNmlR1pCbQNyhwmgeqYlSGQ4ARQAAACsjVAKAIAWVF3jkl153gBKK6CcsjXbKRVVNX5jI8PtZsmdBk/enfC6pSaYYAoAAAAINYRSAAA0k8rqGtmZeyiA0vvt2UVSVePyG6tL7XrXBk/ePlCZKXESZieAAgAAQPtAKAUAwDHQSqftOU75Yb9n+Z2GUDtyiqTa5fYbq83GvZVP3h5QXZI0gNL98QAAAID2iVAKAIAfUVZZLduyNYDyLL/TAEororQ5eUMJMRGHwqfaICqjY6zYbQRQAAAAQF2EUgAA1FFSXuVpPl67A54GUXsOlIh//CTSIS6y3vI7DaHSEmPERgAFAAAA/ChCKQBAu+Usq/RVPnn7QO3LLw04NiUh2rf7nTeISk6IIoACAAAAjhGhFACgXSgoqagNnmqX4WUVSnZBWcCx6YkxfgFUx/iooM8ZAAAACGWEUgCAkOJ2uyW/uDaA0uV3tUvx8pzlAcd36hhbpwG5Q/pmJIojNjLo8wYAAADaG0IpAIClA6hcZ7kvgPIsw3PKwZIKv7G6yK5Lcly9HlC9MxwSHx3RKnMHAAAA2rs2FUotXbpUHnjgAcnKypKBAwfKY489JsOHDw849rnnnpNp06bVOxYVFSXl5eX1Llbmz58vf/7zn6WgoEBOPfVUefLJJ6Vv376+MT169JCdO3fWe59FixbJ7Nmzm/3zAQCOnf6dvv9gqacJuamA8gRRzrIqv7F2m0i3lART+eQNoHqlOyQ2qk39Zw8AAABo19rMv85XrFghs2bNkmXLlsmIESNk8eLFMmbMGNm8ebOkpaUFfI3D4TDPezVsNnv//ffLkiVL5Pnnn5eePXvK3LlzzXt+9913Eh0d7Ru3cOFCmTFjhu9xQkJCi3xGAEDjuNxu2XugxLMDnrcH1P5CKamo9hsbZrdJj1RPAOWtguqZ7pDoiLBWmTsAAAAAi4VSDz/8sAmGvNVPGk69++678swzzxy2aklDqIyMjMP+H3UNtu644w45//zzzbEXXnhB0tPT5c0335RJkybVC6EO9z4AgJZV43LJ7jxPAOVtRL41q1DKKmv8xkaE2aVnWkKdBuQO6ZGWIJHhBFAAAACA1bSJUKqyslLWr18vc+bM8R2z2+0yevRoWbt27WFfV1xcLN27dxeXyyVDhgyRe++9V0444QTz3Pbt280yQH0Pr8TERFOFpe9ZN5T64x//KHfddZd069ZNLrnkErnhhhskPPzwP5qKigpz83I6neZe56E3K9J5a5Bn1fkDsIbqGpfsyis24dNW04DcKduynVJR7f93T1S43Sy50+BJez9pCNUtJV7Cw+x+Y/m7CwAAAKHAFSLX5o2df5sIpfLy8qSmpsZUMdWljzdt2hTwNccdd5ypojrppJOksLBQHnzwQRk1apRs3LhRunbtagIp73s0fE/vc+oPf/iDCbSSkpJkzZo1Jhjbv3+/qdw6HO05tWDBAr/jubm59XpaWe2E0Z+jnvwaCAJAU1XVuGRPfpnsyCuVnXml5n53fplUu9x+Y6Mj7NItOVZ6pHhu3ZNjpVOHaLM075ByyT9gzb9jAQAAgPZ0bV5UVGSdUOpYjBw50ty8NJDq16+fPPXUU6bqqbG0j5WXBlyRkZFy9dVXm+BJG6cHosFV3ddppVRmZqakpqaaPldWPfF1OaR+Biuf+ABaR3lVjWzPccrW/U75wVRAFcrO3GKpCRBAxUWFm+onXYJn7jMc0jkpTuwN+gICAAAA7Y0rRK7N6/bxbvOhVEpKioSFhUl2dna94/q4sb2eIiIiZPDgwbJlyxbz2Ps6fY9OnTrVe89BgwYd9n10eV91dbXs2LHDVGMFomFVoMBKTxgrnzR64lv9MwBoeWWV1Wbpnaf/kzYgd5oledqcvCFHTISn/1NGoi+E6tQx1m9jCgAAAAChc23e2Lm3iVBKq5OGDh0qq1evlvHjx/vSQX08c+bMRr2HLv/bsGGDjBs3zjzW3fY0mNL38IZQWtH02WefyTXXXHPY9/n666/ND+9wO/4BQHtSUl51aPe72kbkuiuef/wk0jEuSvp20sonTwClPaBSHdEEUAAAAADabiildDnc1KlTZdiwYTJ8+HCzc15JSYlvN74pU6ZIly5dzLI6tXDhQjnllFOkT58+UlBQIA888IDs3LlTrrzySvO8XgRdf/31cvfdd0vfvn1NSDV37lzp3LmzL/jShucaUp155plmBz59rE3Of/Ob30jHjh1b8acBAMHnLK2UH2orn7wB1P6DpQHHpjiiTfjUt3YZngZQSfFRBFAAAAAArBdKTZw40TQKnzdvnmlErtVNq1at8jUq37VrV73yr4MHD8qMGTPMWA2QtNJKG5X379/fN+aWW24xwdZVV11lgqvTTjvNvKd3baMuwVu+fLnceeedZjc9Da40lKrbLwoAQtHB4gpf8LTFVEE5JbuwLODY9A4xngDK1wMqUTrGB+65BwAAAACNZXNrS3c0iS4LTExMNB3yrdzoPCcnxyxbtPK6VQD16V/xB4r8A6i8osC72HVOiq0TQOkyPIc4YiKDPm8AAACgPXKFyLV5Y3OSNlMpBQBoegCVU1hW2//p0BK8gpJKv7G6yK5rcpwnfPIGUBkOiYuOaJW5AwAAAGh/CKUAwKIBlPZ7qhtAaRWUs6zKb6zdZpPuqfG+yicNonqlOyQmkv8EAAAAAGg9XJEAQBvncrtlz4GS2qV3nuqnrVlOKamo9hsbbrdJj7SEOjvgOaRnmkOiIsJaZe4AAAAAcDiEUgDQhtS4XLI7r6S2AsoTQG3LdkpZZY3f2Igwu/RMT/D1f9J7rYiKDCeAAgAAAND2EUoBQCupqnHJrtyiQ0vwagOoimqX31itdOqd7jDL77wBVLeUeAkPs27zQwAAAADtG6EUAARBZXWNbM8p8u1+p0HUjpwiE0w1FBsZLr0zNIDyNB/XAKprcryE2bU9OQAAAACEBkIpAGhm5VU1puJJAyhvFdTO3CKpcbn9xsZHhx/q/1TbiLxzUpxpTg4AAAAAoYxQCgCaoLSiWrbWC6AKZXdesQTIn8QRE+Hp/+QLoBIlo0OM2AigAAAAALRDhFIA0EjF5VW+5uNb9jvN13sPlEiA/EmS4qMOLb+rDaBSHdEEUAAAAABQi1AKAAIoLK2s7f+kIZQngNp/sDTg2BRHtC946lvbiDw5ITrocwYAAAAAKyGUAtDu5ReX+yqfvD2gcgrLAo7V5Xa+HlC1lVAd4qKCPmcAAAAAsDpCKQDthtvtlryihgFUoRwoqgg4vktSnG/3Ow2hdEc8R0xk0OcNAAAAAKGIUApAyAZQ2YVl9XbA0wCqoKTSb6x2ecpMifdVPnkDqLioiFaZOwAAAAC0B4RSACzP5Xabfk+eBuSHAqiisiq/sXabTbqnxtfugOcJoHqlOyQmkr8OAQAAACCYuAoDYCk1LrfsPVBsgqcfsg6FUKUV1X5jw+026ZGWUKf/U6L0TEuQqIiwVpk7AAAAAOAQQikAbVaNyyW7cmsDqNr+T1uznFJeVeM3NiLMbiqezO53tQGUBlJ6HAAAAADQ9hBKAWgTqmpcsjOnqF7107Zsp1RWu/zGaqVT73RvA3KHCaC6pcRLOAEUAAAAAFgGoRSAoKusrpHtGkDV9oDS+x05RVLtcvuNjY0M9wVP3p3wuiTHS5hd25MDAAAAAKyKUApAiyqvrJat2dp43OkLoHbmFpvm5A3FR0eYAKqvBlCmEXmidEqKNc3JAQAAAAChhVAKQLMpqaiSbaYB+aEAas8BDaD8xybGRtbbAU8DqPQOMWIjgAIAAACAdoFQCsAxKSqrkq1ZnuDJWwW1J78k4Nik+Chf8ORdipfqiCaAAgAAAIB2jFAKwI8qLK309X/SHfD066yCsoBjNWwyDcjrBFDJCdFBnzMAAAAAoG0jlAJQT35xeW0A5fQFULnO8oBjO3WMNc3HNXjSIKp3hkM6xEUFfc4AAAAAAOshlALaKbfbbcImDZ40gPrB3BdKfnFFwPFdk+LMEjxvI/LeGYmSEBMR9HkDAAAAAEIDoRTQTgKo7IIyEzzV7QGly/IasttEuibHe5bg1TYi75XhkLgoAigAAAAAQPMhlAJCjMvtlv35pbXhU2FtBZRTisur/MaG2W3SPTXBswRPA6hOidIrLUGiI/mrAQAAAADQsrjyBCysxuWWPQeKaxuQO00QtTXLKaWV1X5jI8Ls0iOtfgDVMy1BIsPDWmXuAAAAAID2jVAKsIjqGpfsyiv2NR/X6qet2U6pqKrxGxsZbpde6dqA3OHbCa97WoIJpgAAAAAAaAsIpYA2qLK6Rnbm1g+gtuc4pbLa5Tc2OiLM7HrnDZ80iOqWGi9hdgIoAAAAAEDbRSgFtDKtdNqeU+TrAaVL8XbkFEm1y+03NjYq/NDyOw2gOiVKl6Q40xsKAAAAAAArIZQCgqi8stosudPg6YfaHfC0IkqbkzcUHx1RW/1UWwXVKVE6dYwVu40ACgAAAABgfYRSQAspqagyTcdNAFXbiHx3XrH4x08iibGRfgFUemKM2AigAAAAAAAhilAKaAbOskq/AGpvfknAsckJUab3k68HVCeHpCREE0ABAAAAANoVQingKBWUVJjQydOA3NMHKqugLODYtMQY6VvbA8obQCXFRwd9zgAAAAAAtDWEUsARHCgq91U+efpAFUqeszzgWO335KmAOhRC6bI8AAAAAADQxkOppUuXygMPPCBZWVkycOBAeeyxx2T48OEBxz733HMybdq0eseioqKkvPxQYOB2u2X+/Pny5z//WQoKCuTUU0+VJ598Uvr27esbk5+fL9dee628/fbbYrfbZcKECfLoo49KfHx8C35StDV6ruQ6y33Bk6cCyin5xRV+Y3WRXZfkuENL8Do5pHd6oiTERLTK3AEAAAAAsKI2E0qtWLFCZs2aJcuWLZMRI0bI4sWLZcyYMbJ582ZJS0sL+BqHw2Ge92rYk+f++++XJUuWyPPPPy89e/aUuXPnmvf87rvvJDras4Tq0ksvlf3798v7778vVVVVJui66qqr5OWXX27hT4zWDKB0ud2h/k+eAKqwtNJvrN0mkpkSX6f/U6L0TndIbFSb+aMDAAAAAIAl2dx6hd4GaBB18skny+OPP24eu1wuyczMNFVMs2fPDlgpdf3115sKqED0Y3Xu3FluvPFGuemmm8yxwsJCSU9PN6+dNGmSfP/999K/f39Zt26dDBs2zIxZtWqVjBs3Tvbs2WNe3xhOp1MSExPN+2tQZkX6887JyTEBoFaMhQqX2y378kvqLcHTEKq4vNpvbJjdJt1TEzzL72oDqF7pDomOCGuVuQMAAAAA2hdXiFybNzYnaRPlHpWVlbJ+/XqZM2eO75j+8EePHi1r16497OuKi4ule/fu5pc2ZMgQuffee+WEE04wz23fvt0sA9T38NIfiIZf+p4aSul9hw4dfIGU0vH6vT/77DO54IILAn7fiooKc6v7w1Y6D71Zkc5bgzyrzl/VuNyy50CxJ3yqDaC2ZjulrLLGb2x4mE16pmn45L0lSo+0eIkM9w+grPwzAQAAAABYhysErs1VY+ffJkKpvLw8qampMVVMdenjTZs2BXzNcccdJ88884ycdNJJJnl78MEHZdSoUbJx40bp2rWrCaS879HwPb3P6X3DpYHh4eGSlJTkGxPIokWLZMGCBX7Hc3Nz6/W0stoJoz9HPfmtkMZWu9yy72CZ7MgrlZ15peZ+V36ZVFb7n/iRYTbJTI6VHimeW/eUWOnSIVrCw+p+zgopyPfvHwUAAAAAQLC4LHZtfjhFRUXWCaWOxciRI83NSwOpfv36yVNPPSV33XVXi35vrejS/ld1K6V0qWFqaqqll+9pTy79DG3txK+srpGdubUVULXL8LbnFElVjX8ApUvtetepftL7zJQ4CWtjnwkAAAAAACtdmx8Nbx9vS4RSKSkpEhYWJtnZ2fWO6+OMjIxGvUdERIQMHjxYtmzZYh57X6fv0alTp3rvOWjQIN8YXatZV3V1tdmR70jfV3f501tDesJY+aTRE7+1P0NFVY1sz3F6ekDt12V4hbIjp8hURjWkzcY1dKrbhLxLkgZQ9RveAwAAAABgFbY2cG3eVI2de5sIpSIjI2Xo0KGyevVqGT9+vC8d1MczZ85s1Hvo8r8NGzaYJuVKd9vTYEnfwxtCaUWT9oq65pprzGOttNJG6drPSr+/+uCDD8z31t5TaFllldWy1fR/8gRQGkTtyis2zckbSoiJOBQ+1QZRGR1jxd5gx0UAAAAAAGANbSKUUrocburUqabp+PDhw2Xx4sVSUlIi06ZNM89PmTJFunTpYvo5qYULF8opp5wiffr0McHSAw88IDt37pQrr7zSlyzq7nx333239O3b14RUc+fONTvqeYMvXe43duxYmTFjhixbtkyqqqpMCKZN0Bu78x4ap6S8qrYBuQZQhSaA2nOgRAJt/dghLtIXQHnuHZKWGGN+pwAAAAAAIDS0mVBq4sSJplH4vHnzTJNxrW5atWqVr1H5rl276pV/HTx40IRJOrZjx46m0mnNmjXSv39/35hbbrnFBFtXXXWVCa5OO+0085511za+9NJLJog6++yzzftPmDBBlixZEuRPH1qcpZW+AMosw8sqlH35pQHHpiREH1qCVxtEJSdEEUABAAAAABDibG5t6Y4m0WWBiYmJpkO+lRuda38t3Y3waNatFpRUmODJEz55gqjsgrKAY9MTY2qDp0N9oDrG+/fmAgAAAACgPXId47W5VXOSNlMphbZNs8v8Yk8AZZbf1e6El1dUHnB856TY2v5Ph5bgOWIjgz5vAAAAAADQNhFKwch1lsn3+4rEFp0gaYmxklNY5ql8MgGUpxH5wZIKv9fpIruuyXG+pXcaQPXOcEh8dESrfA4AAAAAAGANhFKQVV/tksXvbKhtOv4/iY4Ik/KqGr9xdptIt5QE6dPp0PK7XukOiY3iNAIAAAAAAEeHNKGd0wqpR9/1BlIeGkhpANUzzduA3GECqJ7pDhNYAQAAAAAANBWhVDu3N79EXAFa3d89+WQZ2jutNaYEAAAAAADaAeu2ckez6JIUZ6qi6rLbbNItNaG1pgQAAAAAANoBQql2LtURI9edM8AXTOn9deecaI4DAAAAAAC0FJbvQcYO7iaDeybLd9v2Sf9enSW9Q1xrTwkAAAAAAIQ4KqVgaGVUv84JVEgBAAAAAICgIJQCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0NDpvBm6329w7nU6xKpfLJUVFRRIdHS12O1klAAAAAADBFirX5t58xJuXHA6hVDPQE0ZlZma29lQAAAAAAADaTF6SmJh42Odt7h+LrdCoJHPfvn2SkJAgNptNrJpiaqi2e/ducTgcrT0dAAAAAADaHWeIXJtr1KSBVOfOnY9Y8UWlVDPQH3DXrl0lFOhJb+UTHwAAAAAAq3OEwLX5kSqkvKy7QBEAAAAAAACWRSgFAAAAAACAoCOUghEVFSXz58839wAAAAAAIPii2tm1OY3OAQAAAAAAEHRUSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEXXjwv2Xocblcsm/fPklISBCbzdba0wEAAAAAAGg1brdbioqKpHPnzmK3H74eilCqGWgglZmZ2drTAAAAAAAAaDN2794tXbt2PezzhFLNQCukvD9sh8MhVq32ys3NldTU1COmmAAAAAAAoGW4QuTa3Ol0muIdb15yOIRSzcC7ZE8DKSuHUuXl5Wb+Vj7xAQAAAACwKleIXZv/WIsj639CAAAAAAAAWA6hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEHaEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgsF0otXbpUevToIdHR0TJixAj5/PPPjzj+tddek+OPP96MHzBggLz33nuHHfvb3/5WbDabLF68uAVmDgAAAAAAAEuGUitWrJBZs2bJ/Pnz5csvv5SBAwfKmDFjJCcnJ+D4NWvWyOTJk2X69Ony1Vdfyfjx483t22+/9Rv717/+VT799FPp3LlzED4JAAAAAABA+2apUOrhhx+WGTNmyLRp06R///6ybNkyiY2NlWeeeSbg+EcffVTGjh0rN998s/Tr10/uuusuGTJkiDz++OP1xu3du1euvfZaeemllyQiIiJInwYAAAAAAKD9skwoVVlZKevXr5fRo0f7jtntdvN47dq1AV+jx+uOV1pZVXe8y+WSyy67zARXJ5xwQgt+AgAAAAAAAHiFi0Xk5eVJTU2NpKen1zuujzdt2hTwNVlZWQHH63Gv++67T8LDw+UPf/hDo+dSUVFhbl5Op9MXcOnNinTebrfbsvMHAAAAAMDqXCFybd7Y+VsmlGoJWnmlS/y0P5U2OG+sRYsWyYIFC/yO5+bmSnl5uVj1hCksLDQnv1agAQAAAACA4HKFyLV5UVFRaIVSKSkpEhYWJtnZ2fWO6+OMjIyAr9HjRxr/8ccfmybp3bp18z2v1Vg33nij2YFvx44dAd93zpw5puF63UqpzMxMSU1NFYfDIVY98TWY089g5RMfAAAAAACrcoXItXl0dHRohVKRkZEydOhQWb16tdlBz/vL0sczZ84M+JqRI0ea56+//nrfsffff98cV9pLKlDPKT2uzdQPJyoqytwa0hPGyieNnvhW/wwAAAAAAFiZLQSuzRs7d8uEUkqrk6ZOnSrDhg2T4cOHm2qmkpISX4A0ZcoU6dKli1lep6677jo5/fTT5aGHHpJzzjlHli9fLl988YX86U9/Ms8nJyebW126+55WUh133HGt8AkBAAAAAADaB0uFUhMnTjR9m+bNm2ealQ8aNEhWrVrla2a+a9euemncqFGj5OWXX5Y77rhDbrvtNunbt6+8+eabcuKJJ7bipwAAAAAAAIDNrd2z0CTaUyoxMdE0I7NyTyntr5WWlmbpEkEAAAAAAKzKFSLX5o3NSaz7CQEAAAAAAGBZhFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEHaEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAILOcqHU0qVLpUePHhIdHS0jRoyQzz///IjjX3vtNTn++OPN+AEDBsh7773ne66qqkpuvfVWczwuLk46d+4sU6ZMkX379gXhkwAAAAAAALRflgqlVqxYIbNmzZL58+fLl19+KQMHDpQxY8ZITk5OwPFr1qyRyZMny/Tp0+Wrr76S8ePHm9u3335rni8tLTXvM3fuXHO/cuVK2bx5s5x33nlB/mQAAAAAAADti83tdrvFIrQy6uSTT5bHH3/cPHa5XJKZmSnXXnutzJ4922/8xIkTpaSkRN555x3fsVNOOUUGDRoky5YtC/g91q1bJ8OHD5edO3dKt27dGjUvp9MpiYmJUlhYKA6HQ6xIf5Ya7qWlpYndbqmsEgAAAACAkOAKkWvzxuYklvmElZWVsn79ehk9erTvmP6C9PHatWsDvkaP1x2vtLLqcOOV/sBsNpt06NChGWcPAAAAAACAusLFIvLy8qSmpkbS09PrHdfHmzZtCviarKysgOP1eCDl5eWmx5Qu+TtSkldRUWFudRNAb6KpNyvSeWvRnFXnDwAAAACA1blC5Nq8sfO3TCjV0rTp+cUXX2x++U8++eQRxy5atEgWLFjgdzw3N9cEW1Y9YbRKTD+/lUsEAQAAAACwKleIXJsXFRWFViiVkpIiYWFhkp2dXe+4Ps7IyAj4Gj3emPHeQEr7SH3wwQc/2hdqzpw5puF63Uop7W2Vmppq6Z5SumxRP4OVT3wAAAAAAKzKFSLX5tHR0aEVSkVGRsrQoUNl9erVZgc97y9LH8+cOTPga0aOHGmev/76633H3n//fXO8YSD1ww8/yIcffijJyck/OpeoqChza0hPGCufNHriW/0zAAAAAABgZbYQuDZv7NwtE0oprU6aOnWqDBs2zOyQt3jxYrO73rRp08zzU6ZMkS5dupjldeq6666T008/XR566CE555xzZPny5fLFF1/In/70J18gddFFF8mXX35pdujTnlXeflNJSUkmCAMAAAAAAEDzs1QoNXHiRNO3ad68eSY8GjRokKxatcrXzHzXrl310rhRo0bJyy+/LHfccYfcdttt0rdvX3nzzTflxBNPNM/v3btX3nrrLfO1vlddWjV1xhlnBPXzAQAAAAAAtBc2t3bPQpNoT6nExETTjMzKPaVycnIkLS3N0iWCAAAAAABYlStErs0bm5NY9xMCAAAAAADAsgilAAAAAAAAEHSEUgAAAAAAALBGKFVQUCBPP/20zJkzR/Lz880x3cFOG4cDAAAAAAAAzb773jfffCOjR482Dat27NghM2bMkKSkJFm5cqXZ/e6FF1442rcEAAAAAABAO3PUlVKzZs2Syy+/XH744QeJjo72HR83bpx89NFHzT0/AAAAAAAAhKCjDqXWrVsnV199td/xLl26SFZWVnPNCwAAAAAAACHsqEOpqKgocTqdfsf/97//SWpqanPNCwAAAAAAACHsqEOp8847TxYuXChVVVXmsc1mM72kbr31VpkwYUJLzBEAAAAAAADtPZR66KGHpLi4WNLS0qSsrExOP/106dOnjyQkJMg999zTMrMEAAAAAABA+959T3fde//99+U///mP/Pe//zUB1ZAhQ8yOfAAAAAAAAECLhFIvvPCCTJw4UU499VRz86qsrJTly5fLlClTjvYtAQAAAAAA0M4c9fK9adOmSWFhod/xoqIi8xwAAAAAAADQ7KGU2+02zc0b2rNnj1naBwAAAAAAADTb8r3BgwebMEpvZ599toSHH3ppTU2NbN++XcaOHdvYtwMAAAAAAEA71uhQavz48eb+66+/ljFjxkh8fLzvucjISOnRo4dMmDChZWYJAAAAAACA9hlKzZ8/39xr+KSNzqOjo1tyXgAAAAAAAAhhR7373tSpU1tmJgAAAAAAAGg3jjqU0v5RjzzyiLz66quya9cuqaysrPd8fn5+c84PAAAAAAAAIeiod99bsGCBPPzww2YJX2FhocyaNUsuvPBCsdvtcuedd7bMLAEAAAAAANC+Q6mXXnpJ/vznP8uNN95oduCbPHmyPP300zJv3jz59NNPW2aWAAAAAAAAaN+hVFZWlgwYMMB8rTvwabWU+tWvfiXvvvtu888QAAAAAAAAIeeoQ6muXbvK/v37zde9e/eWf/zjH+brdevWSVRUVPPPEAAAAAAAACHnqEOpCy64QFavXm2+vvbaa2Xu3LnSt29fmTJlilxxxRUtMUcAAAAAAAC09933/vjHP/q+1mbn3bt3lzVr1phg6txzz23u+QEAAAAAACAEHXUo1dApp5xibuqLL76QYcOGNce8AAAAAAAAEMKOevlecXGxlJWV1Tv29ddfmyqpESNGNOfcAAAAAAAA0N5Dqd27d8vIkSMlMTHR3GbNmiWlpaWml5SGUXFxcWYZHwAAAAAAANBsy/duvvlmKS8vl0cffVRWrlxp7j/++GMTSG3dutXsygcAAAAAAAA0a6XURx99JE8++aTMnDlTli9fLm63Wy699FJ5/PHHgxpILV26VHr06CHR0dEmEPv888+POP61116T448/3owfMGCAvPfee/We188xb9486dSpk8TExMjo0aPlhx9+aOFPAQAAAAAA0L41OpTKzs6Wnj17mq/T0tIkNjZWfvnLX0owrVixwiwbnD9/vnz55ZcycOBAGTNmjOTk5AQcr8sJJ0+eLNOnT5evvvpKxo8fb27ffvutb8z9998vS5YskWXLlslnn31mliHqe2pVWLviPCCRezebewAAAAAA0Aqc7eva3ObWUqFGCAsLk6ysLElNTTWPHQ6H/Pe///UFVcGglVEnn3yyqc5SLpdLMjMz5dprr5XZs2f7jZ84caKUlJTIO++84zumOwUOGjTIhFD60Tt37iw33nij3HTTTeb5wsJCSU9Pl+eee04mTZrUqHk5nU7TZ0tfqz8Xy1n/vrjfeVJPBnHbbGL75QyRQWe29qwAAAAAAGg/vv5Q3H//86Fr83N/JzJktFhRY3OSRveU0gDnJz/5idhsNt8ufIMHDxa7vX6xVX5+vrSEyspKWb9+vcyZM8d3TL+3Lrdbu3ZtwNfoca2sqkuroN58803z9fbt203Qpu/hpT80Db/0tYcLpSoqKsyt7g/bG5LpzVKcB8RWG0gpc//enzw3AAAAAAAQNDbvvQZTbz8p7l4DRRzJYjWNzUYaHUo9++yz0pry8vKkpqbGVDHVpY83bdoU8DUaOAUar8e9z3uPHW5MIIsWLZIFCxb4Hc/NzbXcsj8tC0xqXLEcAAAAAAAIEpvbJQe3fi+VXX4iVlNUVNS8odTUqVObMp+QotVadSuwtFJKlxHq0kbLLd+LDvOUBdYJptw2u7ivWWzJNBYAAAAAAMtxHhDbk9f5XZt36N3Pktfmutlcs4ZSrS0lJcX0tdKG63Xp44yMjICv0eNHGu+912O6+17dMdp36nCioqLMrSFdTthwOWOb1yFV5NzfmbJATWH1pLede43Y0jJbe2YAAAAAALQP0bGBr831mt2CGpuNWCZBiYyMlKFDh8rq1avrrVHUxyNHjgz4Gj1ed7x6//33feO1SbsGU3XHaNWT7sJ3uPcMSUNGi/u6ZZJ/7g3m3qqN1AAAAAAAsKwh7e/a3DKVUkqXzOkywmHDhsnw4cNl8eLFZne9adOmmeenTJkiXbp0MT2f1HXXXSenn366PPTQQ3LOOefI8uXL5YsvvpA//cnTxFubtl9//fVy9913S9++fU1INXfuXLMj3/jx46VdcSR71qlasCwQAAAAAICQ4Ghf1+aWCqUmTpxomonPmzfPNCLXJXarVq3yNSrftWtXvRKxUaNGycsvvyx33HGH3HbbbSZ40p33TjzxRN+YW265xQRbV111lRQUFMhpp51m3rOx6x8BAAAAAABw9GxuN1uvNZUu+UtMTJTCwkLrNTqvsxQyJydH0tLSrNcXCwAAAACAEOAKkWvzxuYkjaqUqrvT3I95+OGHGz0WAAAAAAAA7VOjQqmvvvqqUW+mPZoAAAAAAACAZgmlPvzww8YMAwAAAAAAABrFugsUAQAAAAAAENqVUhdeeGGj33DlypVNmQ8AAAAAAADagUaFUtoxHQAAAAAAAAhqKPXss8822zcEAAAAAAAA6CkFAAAAAACAtlkpVVfPnj3FZrMd9vlt27Y1dU4AAAAAAAAIcUcdSl1//fX1HldVVclXX30lq1atkptvvrk55wYAAAAAAIAQddSh1HXXXRfw+NKlS+WLL75ojjkBAAAAAAAgxDVbT6lf/vKX8sYbbzTX2wEAAAAAACCENVso9frrr0tSUlJzvR0AAAAAAABC2FEv3xs8eHC9Rudut1uysrIkNzdXnnjiieaeHwAAAAAAAELQUYdS48ePr/fYbrdLamqqnHHGGXL88cc359wAAAAAAAAQoo46lJo/f37LzAQAAAAAAADtRrP1lAIAAAAAAACavVJKl+nV7SUViD5fXV3d6G8OAAAAAACA9qnRodRf//rXwz63du1aWbJkibhcruaaFwAAAAAAAEJYo0Op888/3+/Y5s2bZfbs2fL222/LpZdeKgsXLmzu+QEAAAAAACAEHVNPqX379smMGTNkwIABZrne119/Lc8//7x07969+WcIAAAAAACA9h1KFRYWyq233ip9+vSRjRs3yurVq02V1IknnthyMwQAAAAAAED7Xb53//33y3333ScZGRnyyiuvBFzOBwAAAAAAADSGze12uxu7+15MTIyMHj1awsLCDjtu5cqV0t44nU5JTEw0lWQOh0OsSJvU5+TkSFpamvldAwAAAACA4HKFyLV5Y3OSRldKTZkyRWw2W3PNDwAAAAAAAO1Yo0Op5557rmVnAgAAAAAAgHbDurVgAAAAAAAAsCxCKQAAAAAAAASdZUKp/Px8ufTSS02DrA4dOsj06dOluLj4iK8pLy+X3//+95KcnCzx8fEyYcIEyc7O9j3/3//+VyZPniyZmZmmiXu/fv3k0UcfDcKnAQAAAAAAaN8sE0ppILVx40Z5//335Z133pGPPvpIrrrqqiO+5oYbbpC3335bXnvtNfn3v/8t+/btkwsvvND3/Pr1601H+xdffNG89+233y5z5syRxx9/PAifCAAAAAAAoP2yud1ut7Rx33//vfTv31/WrVsnw4YNM8dWrVol48aNkz179kjnzp39XqPbDqampsrLL78sF110kTm2adMmUw21du1aOeWUUwJ+L62s0u/3wQcfNPtWh21ZqGw7CQAAAACAVblC5Nq8sTmJJT6hhki6ZM8bSKnRo0ebX9Bnn30W8DVaBVVVVWXGeR1//PHSrVs3836Hoz+wpKSkZv4EAAAAAAAAqCtcLCArK8ukhHWFh4eb8EifO9xrIiMjTZhVV3p6+mFfs2bNGlmxYoW8++67R5xPRUWFudVNAL2Jpt6sSOetRXNWnT8AAAAAAFbnCpFr88bOv1VDqdmzZ8t99913xDG6lC4Yvv32Wzn//PNl/vz58otf/OKIYxctWiQLFizwO56bm2uaq1v1hNEqMT35rVwiCAAAAACAVblC5Nq8qKio7YdSN954o1x++eVHHNOrVy/JyMgwayrrqq6uNjvy6XOB6PHKykopKCioVy2lu+81fM13330nZ599tmmcfscdd/zovLUZ+qxZs+pVSukOftrDyso9pWw2m/kMVj7xAQAAAACwKleIXJtHR0e3/VBKf8h6+zEjR4404ZL2iRo6dKg5po3I9Zc1YsSIgK/RcREREbJ69WqZMGGCObZ582bZtWuXeT8v3XXvrLPOkqlTp8o999zTqHlHRUWZW0N6wlj5pNET3+qfAQAAAAAAK7OFwLV5Y+duiU+oO+aNHTtWZsyYIZ9//rn85z//kZkzZ8qkSZN8O+/t3bvXNDLX55V2eZ8+fbqpaPrwww9NoDVt2jQTSHl33tMle2eeeaZZrqfjtNeU3nQZHgAAAAAAANp5o3P10ksvmSBKl9lp4qbVT0uWLPE9rzvtaSVUaWmp79gjjzziG6uNyceMGSNPPPGE7/nXX3/dBFAvvviiuXl1795dduzYEcRPBwAAAAAA0L7Y3No9C02iPaW0MkubkVm5p5T27dJdDq1cIggAAAAAgFW5QuTavLE5iXU/IQAAAAAAACyLUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEHaEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNBZJpTKz8+XSy+9VBwOh3To0EGmT58uxcXFR3xNeXm5/P73v5fk5GSJj4+XCRMmSHZ2dsCxBw4ckK5du4rNZpOCgoIW+hQAAAAAAACwVCilgdTGjRvl/fffl3feeUc++ugjueqqq474mhtuuEHefvttee211+Tf//637Nu3Ty688MKAYzXkOumkk1po9gAAAAAAALBcKPX999/LqlWr5Omnn5YRI0bIaaedJo899pgsX77cBE2BFBYWyv/93//Jww8/LGeddZYMHTpUnn32WVmzZo18+umn9cY++eSTpjrqpptuCtInAgAAAAAAaN8sEUqtXbvWLNkbNmyY79jo0aPFbrfLZ599FvA169evl6qqKjPO6/jjj5du3bqZ9/P67rvvZOHChfLCCy+Y9wMAAAAAAEDLCxcLyMrKkrS0tHrHwsPDJSkpyTx3uNdERkaaMKuu9PR032sqKipk8uTJ8sADD5iwatu2bY2aj75Ob15Op9Pcu1wuc7Minbfb7bbs/AEAAAAAsDpXiFybN3b+rRpKzZ49W+67774fXbrXUubMmSP9+vWT3/zmN0f1ukWLFsmCBQv8jufm5prm6lY9YXTJo578VIwBAAAAABB8rhC5Ni8qKmr7odSNN94ol19++RHH9OrVSzIyMiQnJ6fe8erqarMjnz4XiB6vrKw0vaLqVkvp7nve13zwwQeyYcMGef31181j/aWrlJQUuf322wMGT94wa9asWfUqpTIzMyU1NdXsDmjVE193HtTPYOUTHwAAAAAAq3KFyLV5dHR02w+l9Iestx8zcuRIEy5pnyhtWO4NlPSXpY3PA9FxERERsnr1apkwYYI5tnnzZtm1a5d5P/XGG29IWVmZ7zXr1q2TK664Qj7++GPp3bv3YecTFRVlbg3pCWPlk0ZPfKt/BgAAAAAArMwWAtfmjZ27JXpK6RK7sWPHyowZM2TZsmWmgfnMmTNl0qRJ0rlzZzNm7969cvbZZ5uG5cOHD5fExESZPn26qWjS3lNawXTttdeaQOqUU04xr2kYPOXl5fm+X8NeVAAAAAAAAGg+lgil1EsvvWSCKA2eNHHT6qclS5b4ntegSiuhSktLfcceeeQR31htTD5mzBh54oknWukTAAAAAAAAwMvm9jZSwjHTnlJamaXNyKzcU0r7dukuh1YuEQQAAAAAwKpcIXJt3ticxLqfEAAAAAAAAJZFKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAi68OB/y9DjdrvNvdPpFKtyuVxSVFQk0dHRYreTVQIAAAAAEGyuELk29+Yj3rzkcAilmoGeMCozM7O1pwIAAAAAANBm8pLExMTDPm9z/1hshUYlmfv27ZOEhASx2Wxi1RRTQ7Xdu3eLw+Fo7ekAAAAAANDuOEPk2lyjJg2kOnfufMSKLyqlmoH+gLt27SqhQE96K5/4AAAAAABYnSMErs2PVCHlZd0FigAAAAAAALAsQikAAAAAAAAEHaEUjKioKJk/f765BwAAAAAAwRfVzq7NaXQOAAAAAACAoKNSCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIICjZ5BAAAAAAAdYXXewQ0s+LiYomKipKIiAgTTNlsttaeEgAAAAAA7crWrVvllVdekZKSEjnxxBPl0ksvlbaASim0mO+//14uuOACWbFihVRWVppAioopAAAAAACCZ8OGDTJq1Cj54osv5O2335bHH39c3nvvPWkLqJRCi9i5c6dMmDDBpLFaLRUdHS3nnXeeREZGUjEFAAAAAEAQZGdny8SJE2X69Oly7733Sl5enpx11lmyb98+aQuolEKzq6mpkTfeeEP69Okjn3/+uXTo0MGc/G+99RYVUwAAAAAABMnmzZvNNfjvf/978zglJUUGDhwo//3vf+V3v/uduVZvTYRSaHZhYWEmeZ0yZYo52d99911JT0/3BVMVFRUEUwAAAAAAtLDw8HApLS31LdfT6/KXXnpJ7Ha7qZpavny5XHzxxdJabG6SAbSAqqoq09zcSyukzj//fFM6eNttt5mv9fm//e1v5msAAAAAANC89Br8D3/4g1nF1LdvX/nggw9k5cqVpr2Oev755+Xuu++Wv/71r6YBerDRUwrNQhPW3bt3S2xsrKSlpUnHjh3F5XKZ9LW6utr0knrzzTdl/PjxJpnVJX4ffvihqZw6+eSTpXPnzq39EQAAAAAAsLTS0lJzi4mJMdfhumppyZIlpoeU9n7Ozc2V008/3Te+e/fu5tq9blFJMBFKocm++eYb+fWvf22CJl2apye9dvM/5ZRTfOWCGkxFRUWZyijdke+yyy4zf0A++ugjAikAAAAAAJpo48aNcv3110tWVpZ5fOWVV8rUqVPNNbre9Jpdr8vz8/MlMTHRjPnHP/4hqampptdUayCUQpPoyX7uuefKpEmTTDf/7777TlasWCE/+9nP5IUXXjDHvcGU/gHQIEqT2ISEBBNInXDCCa39EQAAAAAAsLTvv/9ezjzzTHMNrk3NtYfUU089JaNGjTKrk5QGU1u3bpXrrrtOevToYdru6PW7rmJKTk5ulXnTUwpN8vXXX5uqp7ffftuc1KqsrEzmzZtnSgR1reo555zjW8r3xBNPyMyZM2X9+vUyePDg1p4+AAAAAACWdvDgQRNG9enTR5YuXeo7PnToUBk+fLg8+eSTvmty3XVv9uzZ5rpdVy3dfvvtrVosQqUUmqSwsNCUCHqzTT3Rde3q/fffb07ySy65RL744gvTUE1NnDhRxo4dK7169WrlmQMAAAAAYH179+4Vh8Nhrre9G43pKqWzzz5bDhw4YI7ZbDazemngwIHy2muvSXx8vGm/o8v5WpO9Vb87LO+0006Tn/70pzJnzhyzLlWTVw2m9ITXY4MGDZJXXnnFhFZ6XEsCCaQAAAAAAGgeWumklVLaRsfbPkclJSVJcXGx+Vqv0cPCwsxjDaRUawdSilAKTaIntaaxO3bsMMv1nE6nCaZUly5dzMm+adMm8wfAexwAAAAAADSdtyhkwoQJ5rEWhHivvUtKSsxue166ounOO+80FVNtBcv3cMz0ZNeT/5prrjHN0nRnPV2yp2tStXRQaWVUx44dzUmvfzB0PAAAAAAAaDpvAOW9PtdbdXW1qZbSDca8u+zNnTtX7rnnHtMXWotL2goaneOYadCkJ7O3Ydpdd90l7777rhQUFMh5550nu3fvlnfeeUc+/fRTdtkDAAAAAKAFr82L6yzNU48++qh888030r17d1m0aJF88sknpvl5W8J6KjSKBk+BTvqdO3fKgAED5F//+pdJXu+77z75xS9+IRs2bDDrU9euXUsgBQAAAABAM3O73aYqynttPn78eBM8eenyvWeffdYs22uLgZRi+R5+dHc9LffzNjD3lgZ6T/pTTz1VfvWrX5mG5+r00083N/3DUXctKwAAAAAAODb79u2TdevWSXl5udndfsiQIWapni7T27Ztm5xxxhnyy1/+0ndtrjIyMkyV1HvvvSf9+vWTtojlezis7777TkaNGiW33HKL3HbbbeZY3WDqiiuukIiICFm2bJmvV5R3HSsAAAAAAGi6DRs2yAUXXGD6Nefk5JhjTzzxhJxzzjnmGnzs2LGSkpIiL774Yr3rcX0uKytLOnXqJG0VoRQC2rNnj+kLpeV+eXl5cvPNN8vs2bPrLd2rqqoyoRQAAAAAAGh+W7duNauRfvOb35hrcu3drIGU7qr3/PPPS1xcnFRWVppr87qBVN2CkraM5XvwoyfvG2+8IT179pSZM2fK559/Lvfee695Tv8QEEgBAAAAANCyKisrZenSpWYFk24sptfgHTp0kJNPPtn0dPb2fo6MjPR7rRUCKUUohYAn77hx4yQtLU3OPPNMGTRokCn702793mBK/zBYJXkFAAAAAMBq7Ha79OnTxxSM6DW4t13OWWedJQsXLjQ9oBMSEuq9xmotdQilEJA2TtOTX+m61SuvvNKc2HUrpvRkf/vtt2XkyJFm/SoAAAAAAGge4eHhppdUw55Q3sooba3jDaE2bdokxx9/vKUCKUUoBV8n/71798qBAwdk9OjRJpHVm24vqX8QNHTSxuZKgyk98XXso48+Krt27Wrt6QMAAAAAEDLX5nl5eTJmzBhJT083x73X5rpiyel0SmlpqQmnNISaM2eO3HfffXLw4EFxOByWCqYIpSDffPON/OpXvzJlf//73/9kwIABctVVV5lGavHx8b7G5qmpqTJ9+nQTSOlufLqWde3atW26kz8AAAAAAFa9Np8xY4Zcdtll5trc20JHwygNqGJiYmTBggWm79Snn34qiYmJYjU0BGrnNH2dNGmSXHLJJfLuu++aVFZL/p577jnTOK2oqMgEUt4Galox9d1335k/JJ988okMGzastT8CAAAAAAAheW3+/PPP+67NvT2dNaDSohEtJtGVTB9++KEMHz5crIhQqp3LysqSsrIyc+L36NHDnNgaSGmZ4Jo1a0wJYHl5uTn5tULqxRdflH/84x/mpO/fv39rTx8AAAAAgHZzba5yc3Nlw4YN8s4778jnn38uQ4cOFasilGrnvGtQvX2hdJ2qHtMk9vTTTzcJ7bp168xzOu7UU0+Vzz77TIYMGdLKMwcAAAAAoP1dm3fp0kVuvPFGWb9+vQwcOFCszObW8he0WxUVFXLaaadJRkaGvPnmm2apnreBmp4aeoIPHjzYlAxabWtJAAAAAABC7drcOz4qKkqsjkqpdkz7ROlJ/Oyzz8pHH30k11xzjTnuPek1gDrvvPMkJyfHHCeQAgAAAACg9a7N3bV1RaEQSClCqXZM+0TpznonnniiSVtfeeUVmTJlimRnZ/vGbN++XTp27GjGAQAAAACA1rs2d9VuQhYqWL7XjjRcfuctBSwuLjalf19//bVpqta9e3dJSkqS5ORk+dvf/iZr1641W1ECAAAAAICm4dr8ECql2oGtW7fKwYMH6530msLqSb9jxw75yU9+YhqmnX322bJx40YZN26caZyWlpZmOvmH2kkPAAAAAECwcW3uj0qpEPff//7XNEN7+umn5Yorrqj33O7du80ueueff778+c9/NmWA2kzNm9rqYy0jBAAAAAAAx45r88AIpUL8pD/11FNl5syZ8sc//tHv+ccee0y2bdsmDz/8cL2k1nvis9seAAAAAABNw7X54RFKhahNmzaZ0r558+bJ3LlzTbL6r3/9S7Zs2WKap/Xt21dSU1NDOnEFAAAAAKA1cW1+ZOE/8jwsSE/mV1991axNveiii8yxn//853LgwAGzTlWbpPXs2dOksCeddFJrTxcAAAAAgJDDtfmPa38xXDug6erVV18tM2bMMGtWNZXt0KGD2VoyNzdXHnzwQbM+9e677zbd/QEAAAAAQPPi2vzHUSkVotLT082JrV38tUu/ft2vXz/z3AUXXCA7d+6U++67TwoLCyU+Pr61pwsAAAAAQMjh2vzICKVCxL59++TLL7+UyspK6datmwwbNsysS73jjjvMSd67d28zTssGNYnt06ePdOzYUSIjI1t76gAAAAAAhASuzY8OoVQI2LBhg4wfP15SUlJMx/4ePXrILbfcIr/+9a+lU6dOkpGR4evUrye9+uc//yldu3aV2NjYVp49AAAAAADWx7X50aOnlMVt3bpVxo0bZ5qm/eMf/5BVq1bJCSecYO41eW24deSuXbvk5ptvlr/85S/y0EMPSVxcXKvOHwAAAAAAq+Pa/NjY3PqTgSVpOeCcOXNkz5495kT2lvs988wzJo3dvHmz6ebvpetXn3rqKVmzZo288sorMmjQoFacPQAAAAAA1se1+bFj+Z7Ft5fUMj9tkqYnvTd5HTVqlGmQVlVVVW/88OHDpaioSBYuXChdunRptXkDAAAAABAquDY/doRSFhYdHW3Wq/bs2bPecd1iMiIiot6Jv379ehk6dKicffbZrTBTAAAAAABCE9fmx46eUhazf/9+U+qn61I1jfWe9LpG1bs+VbeSPHjwoO818+bNk5///Ody4MABk9gCAAAAAIBjx7V586BSykK++eYbOe+88yQqKkqys7NN9349qceMGSNJSUm+EkG92e12UyZ49913y4MPPigff/xxvTWsAAAAAADg6HFt3nxodG4Rubm58rOf/UwuvPBCmT59uikPnDVrlvnDcPHFF8vvf/97SU1NNWNzcnJk7Nix8pOf/ET++te/muZpWh4IAAAAAACOHdfmzYtKKQud+OXl5ebE79Wrlzm2fPlymT17tqxcudJsH6knf2xsrCkF/Prrr2XTpk3y2WeftetO/gAAAAAANBeuzZsXPaUsQhujVVdXS2lpqXlcVlZm7v/4xz/KmWeeKU8++aRs2bLFHOvYsaP87ne/ky+//JKTHgAAAACAZsK1efNi+Z6F6LaRuhb1gw8+MI8rKirMGlZ18sknS58+feSVV14xjzW51TJCAAAAAADQfLg2bz5USrVRJSUlUlRUJE6n03fsqaeeko0bN8oll1xiHutJrwmt0jWt+hovTnoAAAAAAJqGa/OWRSjVBn333Xdmferpp58u/fr1k5deeskc168fffRRef/99+XXv/61KRvUTv7eBmq6dlX/IFD8BgAAAABA03Bt3vJodN4GT3pNVqdMmSLDhg2T9evXy7Rp06R///4yePBgs+2knuC6LvWkk06S448/XiIjI+Xdd9+VTz/9VMLD+ZUCAAAAANAUXJsHBz2l2pD8/HyZPHmyOZk1dfXSZmkDBgyQJUuW+I5p+eDdd99tXqPlgNdcc435wwEAAAAAAI4d1+bBQ3TXhmjJX0FBgVx00UXmscvlMiWAPXv2NCe40gxRbwkJCXLffffVGwcAAAAAAJqGa/Pg4afVhqSnp8uLL74oP/3pT83jmpoac9+lSxffiW2z2czXdZus6TEAAAAAANB0XJsHD6FUG9O3b19fwhoREWG+1vRVm6V5LVq0SJ5++mlfd39OfAAAAAAAmg/X5sHB8r02ShNXPeG9J7U3jZ03b55Zr/rVV1/ROA0AAAAAgBbEtXnLolKqDfP2oNcTPDMzUx588EG5//775YsvvpCBAwe29vQAAAAAAAh5XJu3HOK8NsybwGqp4J///GdxOBzyySefyJAhQ1p7agAAAAAAtAtcm7ccKqUsYMyYMeZ+zZo1MmzYsNaeDgAAAAAA7Q7X5s3P5vbWoaFNKykpkbi4uNaeBgAAAAAA7RbX5s2LUAoAAAAAAABBx/I9AAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAbcjll18uNpvN3CIiIiQ9PV1+/vOfyzPPPCMul6vR7/Pcc89Jhw4dWnSuAAAATUEoBQAA0MaMHTtW9u/fLzt27JC///3vcuaZZ8p1110nv/rVr6S6urq1pwcAANAsCKUAAADamKioKMnIyJAuXbrIkCFD5LbbbpO//e1vJqDSCij18MMPy4ABAyQuLk4yMzPld7/7nRQXF5vn/vWvf8m0adOksLDQV3V15513mucqKirkpptuMu+trx0xYoQZDwAAEGyEUgAAABZw1llnycCBA2XlypXmsd1ulyVLlsjGjRvl+eeflw8++EBuueUW89yoUaNk8eLF4nA4TMWV3jSIUjNnzpS1a9fK8uXL5ZtvvpFf//rXpjLrhx9+aNXPBwAA2h+b2+12t/YkAAAAcKinVEFBgbz55pt+z02aNMkESd99953fc6+//rr89re/lby8PPNYK6quv/56815eu3btkl69epn7zp07+46PHj1ahg8fLvfee2+LfS4AAICGwv2OAAAAoE3S/5eoS/HUP//5T1m0aJFs2rRJnE6n6TVVXl4upaWlEhsbG/D1GzZskJqaGvnJT35S77gu6UtOTg7KZwAAAPAilAIAALCI77//Xnr27GkaoGvT82uuuUbuueceSUpKkk8++USmT58ulZWVhw2ltOdUWFiYrF+/3tzXFR8fH6RPAQAA4EEoBQAAYAHaM0ornW644QYTKrlcLnnooYdMbyn16quv1hsfGRlpqqLqGjx4sDmWk5MjP/3pT4M6fwAAgIYIpQAAANoYXU6XlZVlAqTs7GxZtWqVWaqn1VFTpkyRb7/9VqqqquSxxx6Tc889V/7zn//IsmXL6r1Hjx49TGXU6tWrTYN0rZ7SZXuXXnqpeQ8NtDSkys3NNWNOOukkOeecc1rtMwMAgPaH3fcAAADaGA2hOnXqZIIl3Rnvww8/NDvt/e1vfzPL7jRkevjhh+W+++6TE088UV566SUTWtWlO/Bp4/OJEydKamqq3H///eb4s88+a0KpG2+8UY477jgZP368rFu3Trp169ZKnxYAALRX7L4HAAAAAACAoKNSCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAAECC7f8D11SrppRFFAIAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "try:\n", + " import matplotlib.pyplot as plt\n", + "\n", + " if timeseries:\n", + " fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), sharex=True)\n", + "\n", + " ax1.plot(dates, means, marker=\".\", color=\"steelblue\")\n", + " ax1.set_ylabel(\"Mean\")\n", + " ax1.set_title(\"conv_rate — Daily Trend\")\n", + " ax1.grid(True, alpha=0.3)\n", + "\n", + " ax2.plot(dates, null_rates, marker=\".\", color=\"coral\")\n", + " ax2.set_ylabel(\"Null Rate\")\n", + " ax2.set_xlabel(\"Date\")\n", + " ax2.grid(True, alpha=0.3)\n", + "\n", + " plt.xticks(rotation=45)\n", + " plt.tight_layout()\n", + " plt.show() # pragma: allowlist secret\n", + "except ImportError:\n", + " print(\"Install matplotlib to visualize: pip install matplotlib\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 9: On-Demand Exploration (Transient Compute)\n", + "\n", + "Compute metrics for an arbitrary date range without storing them. Useful for ad-hoc investigation." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "conv_rate (numeric):\n", + " rows=905 nulls=0 null_rate=0.0000\n", + " mean=0.5041 stddev=0.1929\n", + " p50=0.4964 p95=0.8221 p99=0.9757\n", + "\n", + "avg_daily_trips (numeric):\n", + " rows=905 nulls=0 null_rate=0.0000\n", + " mean=20.1525 stddev=4.4410\n", + " p50=20.0000 p95=27.0000 p99=31.9600\n", + "\n", + "vehicle_type (categorical):\n", + " rows=905 nulls=0 null_rate=0.0000\n", + " unique_values=5\n", + " van: 194\n", + " sedan: 193\n", + " suv: 186\n", + " truck: 171\n", + " compact: 161\n", + "\n" + ] + } + ], + "source": [ + "transient_result = monitoring.compute_transient(\n", + " project=\"monitoring_demo\",\n", + " feature_view_name=\"driver_stats\",\n", + " feature_names=[\"conv_rate\", \"avg_daily_trips\", \"vehicle_type\"],\n", + " start_date=date(2025, 1, 10),\n", + " end_date=date(2025, 1, 20),\n", + ")\n", + "\n", + "for fm in transient_result.get(\"metrics\", []):\n", + " print(f\"{fm['feature_name']} ({fm['feature_type']}):\")\n", + " print(f\" rows={fm['row_count']} nulls={fm['null_count']} null_rate={fm['null_rate']:.4f}\")\n", + " if fm[\"feature_type\"] == \"numeric\":\n", + " print(f\" mean={fm['mean']:.4f} stddev={fm['stddev']:.4f}\")\n", + " print(f\" p50={fm['p50']:.4f} p95={fm['p95']:.4f} p99={fm['p99']:.4f}\")\n", + " elif fm[\"feature_type\"] == \"categorical\" and fm.get(\"histogram\"):\n", + " hist = fm[\"histogram\"]\n", + " print(f\" unique_values={hist['unique_count']}\")\n", + " for entry in hist[\"values\"]:\n", + " print(f\" {entry['value']}: {entry['count']}\")\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 10: REST API Usage\n", + "\n", + "Once the Feast registry server is running, all monitoring endpoints are available via HTTP.\n", + "\n", + "```bash\n", + "# Start the server\n", + "feast serve_registry\n", + "```\n", + "\n", + "### Compute metrics via REST" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'job_id': '077f59c5-c341-4fbb-9adc-b0111fc9228b', 'status': 'completed', 'computed_feature_views': 1, 'computed_features': 20, 'granularities': ['biweekly', 'daily', 'monthly', 'quarterly', 'weekly'], 'duration_ms': 98}\n", + "[{'project_id': 'monitoring_demo', 'feature_view_name': 'driver_stats', 'feature_name': 'conv_rate', 'metric_date': '2025-01-01', 'granularity': 'daily', 'data_source_type': 'batch', 'computed_at': '2026-04-21T13:41:42.687597+05:30', 'is_baseline': True, 'feature_type': 'numeric', 'row_count': 4922, 'null_count': 0, 'null_rate': 0.0, 'mean': 0.4988999058272324, 'stddev': 0.1975387054069251, 'min_val': 0.0, 'max_val': 1.0, 'p50': 0.4998365219303598, 'p75': 0.633892663793526, 'p90': 0.7521919750314627, 'p95': 0.825733080299169, 'p99': 0.9640086762359101, 'histogram': {'bins': [0.0, 0.05, 0.1, 0.15000000000000002, 0.2, 0.25, 0.30000000000000004, 0.35000000000000003, 0.4, 0.45, 0.5, 0.55, 0.6000000000000001, 0.65, 0.7000000000000001, 0.75, 0.8, 0.8500000000000001, 0.9, 0.9500000000000001, 1.0], 'counts': [53, 67, 75, 146, 180, 267, 355, 399, 432, 493, 505, 420, 411, 330, 283, 186, 124, 93, 46, 57], 'bin_width': 0.05}}, {'project_id': 'monitoring_demo', 'feature_view_name': 'driver_stats', 'feature_name': 'conv_rate', 'metric_date': '2025-02-28', 'granularity': 'daily', 'data_source_type': 'batch', 'computed_at': '2026-04-21T19:02:39.068597+05:30', 'is_baseline': False, 'feature_type': 'numeric', 'row_count': 104, 'null_count': 0, 'null_rate': 0.0, 'mean': 0.5201334885346333, 'stddev': 0.21216576270117404, 'min_val': 0.09993354474902831, 'max_val': 1.0, 'p50': 0.5065079886167952, 'p75': 0.6963620898617928, 'p90': 0.7809868206291576, 'p95': 0.8538056054296318, 'p99': 0.9187701931117264, 'histogram': {'bins': [0.09993354474902831, 0.1449368675115769, 0.18994019027412548, 0.23494351303667405, 0.27994683579922264, 0.32495015856177123, 0.3699534813243198, 0.4149568040868684, 0.459960126849417, 0.5049634496119656, 0.5499667723745142, 0.5949700951370628, 0.6399734178996113, 0.6849767406621599, 0.7299800634247084, 0.774983386187257, 0.8199867089498056, 0.8649900317123542, 0.9099933544749028, 0.9549966772374514, 1.0], 'counts': [4, 1, 6, 7, 5, 5, 7, 6, 11, 5, 4, 10, 5, 8, 9, 3, 4, 2, 1, 1], 'bin_width': 0.045003322762548585}}]\n", + "[{'project_id': 'monitoring_demo', 'feature_view_name': 'driver_stats', 'feature_name': 'conv_rate', 'metric_date': '2025-01-01', 'granularity': 'daily', 'data_source_type': 'batch', 'computed_at': '2026-04-21T13:41:42.687597+05:30', 'is_baseline': True, 'feature_type': 'numeric', 'row_count': 4922, 'null_count': 0, 'null_rate': 0.0, 'mean': 0.4988999058272324, 'stddev': 0.1975387054069251, 'min_val': 0.0, 'max_val': 1.0, 'p50': 0.4998365219303598, 'p75': 0.633892663793526, 'p90': 0.7521919750314627, 'p95': 0.825733080299169, 'p99': 0.9640086762359101, 'histogram': {'bins': [0.0, 0.05, 0.1, 0.15000000000000002, 0.2, 0.25, 0.30000000000000004, 0.35000000000000003, 0.4, 0.45, 0.5, 0.55, 0.6000000000000001, 0.65, 0.7000000000000001, 0.75, 0.8, 0.8500000000000001, 0.9, 0.9500000000000001, 1.0], 'counts': [53, 67, 75, 146, 180, 267, 355, 399, 432, 493, 505, 420, 411, 330, 283, 186, 124, 93, 46, 57], 'bin_width': 0.05}}]\n" + ] + } + ], + "source": [ + "# This cell is for reference — run it when the registry server is up.\n", + "\n", + "import requests\n", + "\n", + "BASE_URL = \"http://localhost:6572/api/v1\"\n", + "\n", + "# Auto-compute all metrics\n", + "resp = requests.post(f\"{BASE_URL}/monitoring/auto_compute\", json={\n", + " \"project\": \"monitoring_demo\",\n", + "})\n", + "print(resp.json())\n", + "\n", + "# Read per-feature metrics\n", + "resp = requests.get(f\"{BASE_URL}/monitoring/metrics/features\", params={\n", + " \"project\": \"monitoring_demo\",\n", + " \"feature_view_name\": \"driver_stats\",\n", + " \"feature_name\": \"conv_rate\",\n", + " \"granularity\": \"daily\",\n", + " \"data_source_type\": \"batch\",\n", + "})\n", + "print(resp.json())\n", + "\n", + "# Read baseline\n", + "resp = requests.get(f\"{BASE_URL}/monitoring/metrics/baseline\", params={\n", + " \"project\": \"monitoring_demo\",\n", + " \"feature_view_name\": \"driver_stats\",\n", + " \"feature_name\": \"conv_rate\",\n", + " \"data_source_type\": \"batch\",\n", + "})\n", + "print(resp.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 11: Monitoring Feature Serving Logs\n", + "\n", + "If your feature service has logging enabled, you can compute metrics from actual production traffic.\n", + "\n", + "### Define a feature service with logging" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "See the code cell above for the logging config pattern.\n", + "Once applied, log metrics can be computed with:\n", + " CLI: feast monitor run --source-type log\n", + " API: POST /monitoring/compute/log\n", + " SDK: monitoring.compute_log_metrics(project, feature_service_name)\n" + ] + } + ], + "source": [ + "# Example feature service definition with logging\n", + "#\n", + "# from feast import FeatureService, LoggingConfig\n", + "# from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (\n", + "# PostgreSQLLoggingDestination,\n", + "# )\n", + "#\n", + "# driver_service = FeatureService(\n", + "# name=\"driver_service\",\n", + "# features=[driver_stats_fv],\n", + "# logging_config=LoggingConfig(\n", + "# destination=PostgreSQLLoggingDestination(table_name=\"feast_driver_logs\"),\n", + "# sample_rate=1.0,\n", + "# ),\n", + "# )\n", + "print(\"See the code cell above for the logging config pattern.\")\n", + "print(\"Once applied, log metrics can be computed with:\")\n", + "print(\" CLI: feast monitor run --source-type log\")\n", + "print(\" API: POST /monitoring/compute/log\")\n", + "print(\" SDK: monitoring.compute_log_metrics(project, feature_service_name)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compute log metrics (SDK)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment when you have a feature service with logging enabled\n", + "#\n", + "# result = monitoring.compute_log_metrics(\n", + "# project=\"monitoring_demo\",\n", + "# feature_service_name=\"driver_service\",\n", + "# granularity=\"daily\",\n", + "# )\n", + "# print(result)\n", + "\n", + "# Or auto-compute all log metrics\n", + "# result = monitoring.auto_compute_log_metrics(project=\"monitoring_demo\")\n", + "# print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read log vs. batch metrics side-by-side" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Uncomment the cell above once log metrics have been computed.\n" + ] + } + ], + "source": [ + "# Compare batch vs. log metrics for the same feature\n", + "#\n", + "# batch = monitoring.get_feature_metrics(\n", + "# project=\"monitoring_demo\",\n", + "# feature_view_name=\"driver_stats\",\n", + "# feature_name=\"conv_rate\",\n", + "# data_source_type=\"batch\",\n", + "# granularity=\"daily\",\n", + "# )\n", + "#\n", + "# log = monitoring.get_feature_metrics(\n", + "# project=\"monitoring_demo\",\n", + "# feature_view_name=\"driver_stats\",\n", + "# feature_name=\"conv_rate\",\n", + "# data_source_type=\"log\",\n", + "# granularity=\"daily\",\n", + "# )\n", + "#\n", + "# print(\"Batch metrics:\")\n", + "# for m in batch[:3]:\n", + "# print(f\" {m['metric_date']}: mean={m['mean']:.4f}\")\n", + "#\n", + "# print(\"\\nLog metrics:\")\n", + "# for m in log[:3]:\n", + "# print(f\" {m['metric_date']}: mean={m['mean']:.4f}\")\n", + "\n", + "print(\"Uncomment the cell above once log metrics have been computed.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 12: Scheduling in Production\n", + "\n", + "### Cron (simplest)\n", + "\n", + "```bash\n", + "# Compute all batch + log metrics daily at 2 AM\n", + "0 2 * * * cd /path/to/feast/repo && feast monitor run --source-type all >> /var/log/feast-monitor.log 2>&1\n", + "```\n", + "\n", + "### Airflow\n", + "\n", + "```python\n", + "from airflow.operators.bash import BashOperator\n", + "\n", + "monitor_task = BashOperator(\n", + " task_id=\"feast_monitor\",\n", + " bash_command=\"feast monitor run --source-type all\",\n", + " cwd=\"/path/to/feast/repo\",\n", + ")\n", + "```\n", + "\n", + "### Kubernetes CronJob\n", + "\n", + "```yaml\n", + "apiVersion: batch/v1\n", + "kind: CronJob\n", + "metadata:\n", + " name: feast-monitor\n", + "spec:\n", + " schedule: \"0 2 * * *\"\n", + " jobTemplate:\n", + " spec:\n", + " template:\n", + " spec:\n", + " containers:\n", + " - name: feast-monitor\n", + " image: feast-image:latest\n", + " command: [\"feast\", \"monitor\", \"run\", \"--source-type\", \"all\"]\n", + " volumeMounts:\n", + " - name: feast-repo\n", + " mountPath: /feast/repo\n", + " restartPolicy: OnFailure\n", + " volumes:\n", + " - name: feast-repo\n", + " configMap:\n", + " name: feast-repo-config\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "| Capability | CLI | REST API | SDK |\n", + "|-----------|-----|----------|-----|\n", + "| Auto-compute (all granularities) | `feast monitor run` | `POST /monitoring/auto_compute` | `monitoring.auto_compute_metrics()` |\n", + "| Targeted compute | `feast monitor run --feature-view X --granularity daily` | `POST /monitoring/compute` | `monitoring.compute_metrics()` |\n", + "| Set baseline | `feast monitor run --set-baseline` | `POST /monitoring/compute` (with `set_baseline: true`) | `monitoring.compute_metrics(set_baseline=True)` |\n", + "| Log metrics | `feast monitor run --source-type log` | `POST /monitoring/compute/log` | `monitoring.compute_log_metrics()` |\n", + "| On-demand exploration | — | `POST /monitoring/compute/transient` | `monitoring.compute_transient()` |\n", + "| Read metrics | — | `GET /monitoring/metrics/*` | `monitoring.get_feature_metrics()` etc. |\n", + "| Read baseline | — | `GET /monitoring/metrics/baseline` | `monitoring.get_baseline()` |\n", + "| Time-series | — | `GET /monitoring/metrics/timeseries` | `monitoring.get_timeseries()` |" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv312", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/infra/feast-operator/api/v1/featurestore_types.go b/infra/feast-operator/api/v1/featurestore_types.go index 81e1dfa14c1..8ea38a3af5f 100644 --- a/infra/feast-operator/api/v1/featurestore_types.go +++ b/infra/feast-operator/api/v1/featurestore_types.go @@ -127,6 +127,9 @@ type FeatureStoreSpec struct { AuthzConfig *AuthzConfig `json:"authz,omitempty"` CronJob *FeastCronJob `json:"cronJob,omitempty"` BatchEngine *BatchEngineConfig `json:"batchEngine,omitempty"` + // DataQualityMonitoring configures Data Quality Monitoring behaviour. + // +optional + DataQualityMonitoring *DataQualityMonitoringConfig `json:"dqm,omitempty"` // Replicas is the desired number of pod replicas. Used by the scale sub-resource. // Mutually exclusive with services.scaling.autoscaling. // +kubebuilder:default=1 @@ -229,6 +232,13 @@ type BatchEngineConfig struct { ConfigMapKey string `json:"configMapKey,omitempty"` } +// DataQualityMonitoringConfig defines the Data Quality Monitoring configuration. +type DataQualityMonitoringConfig struct { + // AutoBaseline controls whether baseline distribution is computed automatically on feast apply. Defaults to true. + // +kubebuilder:default=true + AutoBaseline *bool `json:"autoBaseline,omitempty"` +} + // JobSpec describes how the job execution will look like. type JobSpec struct { // PodTemplateAnnotations are annotations to be applied to the CronJob's PodTemplate diff --git a/infra/feast-operator/api/v1/zz_generated.deepcopy.go b/infra/feast-operator/api/v1/zz_generated.deepcopy.go index 6d31e31a9de..a662707e7eb 100644 --- a/infra/feast-operator/api/v1/zz_generated.deepcopy.go +++ b/infra/feast-operator/api/v1/zz_generated.deepcopy.go @@ -165,6 +165,26 @@ func (in *DefaultCtrConfigs) DeepCopy() *DefaultCtrConfigs { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DataQualityMonitoringConfig) DeepCopyInto(out *DataQualityMonitoringConfig) { + *out = *in + if in.AutoBaseline != nil { + in, out := &in.AutoBaseline, &out.AutoBaseline + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataQualityMonitoringConfig. +func (in *DataQualityMonitoringConfig) DeepCopy() *DataQualityMonitoringConfig { + if in == nil { + return nil + } + out := new(DataQualityMonitoringConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FeastCronJob) DeepCopyInto(out *FeastCronJob) { *out = *in @@ -450,6 +470,11 @@ func (in *FeatureStoreSpec) DeepCopyInto(out *FeatureStoreSpec) { *out = new(BatchEngineConfig) (*in).DeepCopyInto(*out) } + if in.DataQualityMonitoring != nil { + in, out := &in.DataQualityMonitoring, &out.DataQualityMonitoring + *out = new(DataQualityMonitoringConfig) + (*in).DeepCopyInto(*out) + } if in.Replicas != nil { in, out := &in.Replicas, &out.Replicas *out = new(int32) diff --git a/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml b/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml index e1a1adfabe8..8e4f907ccc5 100644 --- a/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml +++ b/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml @@ -529,6 +529,15 @@ spec: description: The time zone name for the given schedule, see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. type: string type: object + dqm: + description: DataQualityMonitoring configures Data Quality Monitoring behaviour. + properties: + autoBaseline: + default: true + description: AutoBaseline controls whether baseline distribution + is computed automatically on feast apply. Defaults to true. + type: boolean + type: object feastProject: description: FeastProject is the Feast project id. pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$ @@ -6487,6 +6496,15 @@ spec: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. type: string type: object + dqm: + description: DataQualityMonitoring configures Data Quality Monitoring behaviour. + properties: + autoBaseline: + default: true + description: AutoBaseline controls whether baseline distribution + is computed automatically on feast apply. Defaults to true. + type: boolean + type: object feastProject: description: FeastProject is the Feast project id. pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$ diff --git a/infra/feast-operator/config/samples/v1_featurestore_serving.yaml b/infra/feast-operator/config/samples/v1_featurestore_serving.yaml index f60640624c9..412499412e6 100644 --- a/infra/feast-operator/config/samples/v1_featurestore_serving.yaml +++ b/infra/feast-operator/config/samples/v1_featurestore_serving.yaml @@ -26,8 +26,8 @@ spec: push: true # push/write request counters materialization: true # materialization counters and duration histograms freshness: false # feature freshness gauges (can be expensive at scale) - # Example: when a future SDK adds "registry_sync", enable it here - # registry_sync: false + offline_features: true # offline store retrieval counters, latency, row count + audit_logging: false # structured JSON audit logs via the feast.audit logger offlinePushBatching: enabled: true batchSize: 1000 # max rows per offline write batch diff --git a/infra/feast-operator/dist/install.yaml b/infra/feast-operator/dist/install.yaml index c466442b8e8..402bdcbcfcf 100644 --- a/infra/feast-operator/dist/install.yaml +++ b/infra/feast-operator/dist/install.yaml @@ -537,6 +537,15 @@ spec: description: The time zone name for the given schedule, see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. type: string type: object + dqm: + description: DataQualityMonitoring configures Data Quality Monitoring behaviour. + properties: + autoBaseline: + default: true + description: AutoBaseline controls whether baseline distribution + is computed automatically on feast apply. Defaults to true. + type: boolean + type: object feastProject: description: FeastProject is the Feast project id. pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$ @@ -6495,6 +6504,15 @@ spec: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. type: string type: object + dqm: + description: DataQualityMonitoring configures Data Quality Monitoring behaviour. + properties: + autoBaseline: + default: true + description: AutoBaseline controls whether baseline distribution + is computed automatically on feast apply. Defaults to true. + type: boolean + type: object feastProject: description: FeastProject is the Feast project id. pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$ diff --git a/infra/feast-operator/docs/api/markdown/ref.md b/infra/feast-operator/docs/api/markdown/ref.md index 4f57ff865f5..d0872d596f3 100644 --- a/infra/feast-operator/docs/api/markdown/ref.md +++ b/infra/feast-operator/docs/api/markdown/ref.md @@ -121,6 +121,20 @@ _Appears in:_ | `image` _string_ | | +#### DataQualityMonitoringConfig + + + +DataQualityMonitoringConfig defines the Data Quality Monitoring configuration. + +_Appears in:_ +- [FeatureStoreSpec](#featurestorespec) + +| Field | Description | +| --- | --- | +| `autoBaseline` _boolean_ | AutoBaseline controls whether baseline distribution is computed automatically on feast apply. Defaults to true. | + + #### FeastCronJob @@ -275,6 +289,7 @@ _Appears in:_ | `authz` _[AuthzConfig](#authzconfig)_ | | | `cronJob` _[FeastCronJob](#feastcronjob)_ | | | `batchEngine` _[BatchEngineConfig](#batchengineconfig)_ | | +| `dqm` _[DataQualityMonitoringConfig](#dataqualitymonitoringconfig)_ | DataQualityMonitoring configures Data Quality Monitoring behaviour. | | `replicas` _integer_ | Replicas is the desired number of pod replicas. Used by the scale sub-resource. Mutually exclusive with services.scaling.autoscaling. | | `materialization` _[MaterializationConfig](#materializationconfig)_ | Materialization controls feature materialization behavior (batch size, pull strategy). diff --git a/infra/feast-operator/internal/controller/services/repo_config.go b/infra/feast-operator/internal/controller/services/repo_config.go index aa33819c2b4..272f671ecc8 100644 --- a/infra/feast-operator/internal/controller/services/repo_config.go +++ b/infra/feast-operator/internal/controller/services/repo_config.go @@ -106,6 +106,10 @@ func getServiceRepoConfig( } } + if appliedSpec.DataQualityMonitoring != nil { + setRepoConfigDataQualityMonitoring(appliedSpec.DataQualityMonitoring, &repoConfig) + } + return repoConfig, nil } @@ -486,6 +490,15 @@ func coerceStringToYamlType(v string) interface{} { return v } +func setRepoConfigDataQualityMonitoring(dqmConfig *feastdevv1.DataQualityMonitoringConfig, repoConfig *RepoConfig) { + if dqmConfig.AutoBaseline == nil { + return + } + repoConfig.DataQualityMonitoring = &DataQualityMonitoringYamlConfig{ + AutoBaseline: *dqmConfig.AutoBaseline, + } +} + func (feast *FeastServices) getClientFeatureStoreYaml() ([]byte, error) { clientRepo := getClientRepoConfig(feast.Handler.FeatureStore, feast) return yaml.Marshal(clientRepo) diff --git a/infra/feast-operator/internal/controller/services/repo_config_test.go b/infra/feast-operator/internal/controller/services/repo_config_test.go index 9550068d251..01c1a1d2c39 100644 --- a/infra/feast-operator/internal/controller/services/repo_config_test.go +++ b/infra/feast-operator/internal/controller/services/repo_config_test.go @@ -318,6 +318,30 @@ var _ = Describe("Repo Config", func() { Expect(repoConfig.OfflineStore).To(Equal(expectedOfflineConfig)) Expect(repoConfig.OnlineStore).To(Equal(expectedOnlineConfig)) Expect(repoConfig.Registry).To(Equal(expectedRegistryConfig)) + + By("Having DQM config with auto_baseline disabled") + featureStore = minimalFeatureStore() + dqmAutoBaseline := false + featureStore.Spec.DataQualityMonitoring = &feastdevv1.DataQualityMonitoringConfig{ + AutoBaseline: &dqmAutoBaseline, + } + ApplyDefaultsToStatus(featureStore) + repoConfig, err = getServiceRepoConfig(featureStore, emptyMockExtractConfigFromSecret, emptyMockExtractConfigFromConfigMap, false) + Expect(err).NotTo(HaveOccurred()) + Expect(repoConfig.DataQualityMonitoring).NotTo(BeNil()) + Expect(repoConfig.DataQualityMonitoring.AutoBaseline).To(BeFalse()) + + fsYaml, marshalErr := yaml.Marshal(repoConfig) + Expect(marshalErr).NotTo(HaveOccurred()) + Expect(string(fsYaml)).To(ContainSubstring("dqm:")) + Expect(string(fsYaml)).To(ContainSubstring("auto_baseline: false")) + + By("Having no DQM config — dqm should be nil") + featureStore = minimalFeatureStore() + ApplyDefaultsToStatus(featureStore) + repoConfig, err = getServiceRepoConfig(featureStore, emptyMockExtractConfigFromSecret, emptyMockExtractConfigFromConfigMap, false) + Expect(err).NotTo(HaveOccurred()) + Expect(repoConfig.DataQualityMonitoring).To(BeNil()) }) It("should set feature_server block with type local and all options", func() { diff --git a/infra/feast-operator/internal/controller/services/services_types.go b/infra/feast-operator/internal/controller/services/services_types.go index a68772806c5..6b70358b166 100644 --- a/infra/feast-operator/internal/controller/services/services_types.go +++ b/infra/feast-operator/internal/controller/services/services_types.go @@ -271,6 +271,7 @@ type RepoConfig struct { FeatureServer *FeatureServerYamlConfig `yaml:"feature_server,omitempty"` Materialization *MaterializationYamlConfig `yaml:"materialization,omitempty"` OpenLineage *OpenLineageYamlConfig `yaml:"openlineage,omitempty"` + DataQualityMonitoring *DataQualityMonitoringYamlConfig `yaml:"dqm,omitempty"` } // FeatureServerYamlConfig maps to the feature_server section of feature_store.yaml. @@ -298,6 +299,11 @@ type MetricsYamlConfig struct { Categories map[string]interface{} `yaml:",inline,omitempty"` } +// DataQualityMonitoringYamlConfig mirrors the Python DqmConfig in feature_store.yaml. +type DataQualityMonitoringYamlConfig struct { + AutoBaseline bool `yaml:"auto_baseline"` +} + // MaterializationYamlConfig maps to the materialization section of feature_store.yaml. // ExtraConfig is merged inline so future Feast MaterializationConfig fields appear // at the same YAML level as the typed fields above. diff --git a/sdk/python/feast/api/registry/rest/__init__.py b/sdk/python/feast/api/registry/rest/__init__.py index 14db40d7af6..6cc5a99934a 100644 --- a/sdk/python/feast/api/registry/rest/__init__.py +++ b/sdk/python/feast/api/registry/rest/__init__.py @@ -7,6 +7,7 @@ from feast.api.registry.rest.features import get_feature_router from feast.api.registry.rest.lineage import get_lineage_router from feast.api.registry.rest.metrics import get_metrics_router +from feast.api.registry.rest.monitoring import get_monitoring_router from feast.api.registry.rest.permissions import get_permission_router from feast.api.registry.rest.projects import get_project_router from feast.api.registry.rest.saved_datasets import get_saved_dataset_router @@ -25,3 +26,4 @@ def register_all_routes(app: FastAPI, grpc_handler, server=None): app.include_router(get_saved_dataset_router(grpc_handler)) app.include_router(get_search_router(grpc_handler)) app.include_router(get_metrics_router(grpc_handler, server)) + app.include_router(get_monitoring_router(grpc_handler, server)) diff --git a/sdk/python/feast/api/registry/rest/monitoring.py b/sdk/python/feast/api/registry/rest/monitoring.py new file mode 100644 index 00000000000..9cd7257f667 --- /dev/null +++ b/sdk/python/feast/api/registry/rest/monitoring.py @@ -0,0 +1,376 @@ +import logging +from datetime import date +from typing import Any, Dict, List, Optional + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel, Field + +from feast.infra.offline_stores.offline_store import OfflineStore +from feast.permissions.action import AuthzedAction +from feast.permissions.security_manager import assert_permissions + +logger = logging.getLogger(__name__) + +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + + +class ComputeMetricsRequest(BaseModel): + project: str + feature_view_name: Optional[str] = None + feature_names: Optional[List[str]] = None + start_date: Optional[str] = None + end_date: Optional[str] = None + granularity: str = Field("daily") + set_baseline: bool = False + + +class AutoComputeRequest(BaseModel): + project: str + feature_view_name: Optional[str] = None + + +class ComputeLogMetricsRequest(BaseModel): + project: str + feature_service_name: str + start_date: Optional[str] = None + end_date: Optional[str] = None + granularity: str = Field("daily") + set_baseline: bool = False + + +class AutoComputeLogRequest(BaseModel): + project: str + feature_service_name: Optional[str] = None + + +class ComputeTransientRequest(BaseModel): + project: str + feature_view_name: str + feature_names: Optional[List[str]] = None + start_date: Optional[str] = None + end_date: Optional[str] = None + + +def get_monitoring_router(grpc_handler, server=None): + router = APIRouter() + + _monitoring_service = None + + def _get_monitoring_service(): + nonlocal _monitoring_service + if _monitoring_service is None: + from feast.monitoring.monitoring_service import MonitoringService + + store = server.store if server else grpc_handler.store + _monitoring_service = MonitoringService(store) + return _monitoring_service + + def _get_store(): + return server.store if server else grpc_handler.store + + # ------------------------------------------------------------------ # + # DQM Job: submit and track + # ------------------------------------------------------------------ # + + @router.post("/monitoring/compute", tags=["Monitoring"]) + async def compute_metrics(request: ComputeMetricsRequest): + """Submit a DQM job to compute and store metrics. Returns job_id.""" + if request.granularity not in VALID_GRANULARITIES: + raise HTTPException( + status_code=400, + detail=f"Invalid granularity '{request.granularity}'. " + f"Must be one of {VALID_GRANULARITIES}", + ) + + store = _get_store() + if request.feature_view_name: + fv = store.registry.get_feature_view( + name=request.feature_view_name, project=request.project + ) + assert_permissions(fv, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + + params: Dict[str, Any] = {} + if request.start_date: + params["start_date"] = request.start_date + if request.end_date: + params["end_date"] = request.end_date + if request.feature_names: + params["feature_names"] = request.feature_names + params["granularity"] = request.granularity + params["set_baseline"] = request.set_baseline + + job_id = svc.submit_job( + project=request.project, + job_type="compute", + feature_view_name=request.feature_view_name, + parameters=params, + ) + + # Execute synchronously for now; async worker is a future enhancement + try: + result = svc.execute_job(job_id) + return {"job_id": job_id, **result} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/monitoring/auto_compute", tags=["Monitoring"]) + async def auto_compute(request: AutoComputeRequest): + """Auto-detect date ranges and compute all granularities.""" + store = _get_store() + if request.feature_view_name: + fv = store.registry.get_feature_view( + name=request.feature_view_name, project=request.project + ) + assert_permissions(fv, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + + job_id = svc.submit_job( + project=request.project, + job_type="auto_compute", + feature_view_name=request.feature_view_name, + ) + + try: + result = svc.execute_job(job_id) + return {"job_id": job_id, **result} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + # ------------------------------------------------------------------ # + # Log source: compute from feature serving logs + # ------------------------------------------------------------------ # + + @router.post("/monitoring/compute/log", tags=["Monitoring"]) + async def compute_log_metrics(request: ComputeLogMetricsRequest): + """Compute metrics from feature serving logs for a feature service.""" + if request.granularity not in VALID_GRANULARITIES: + raise HTTPException( + status_code=400, + detail=f"Invalid granularity '{request.granularity}'. " + f"Must be one of {VALID_GRANULARITIES}", + ) + + store = _get_store() + fs = store.registry.get_feature_service( + name=request.feature_service_name, project=request.project + ) + assert_permissions(fs, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + + start_d = date.fromisoformat(request.start_date) if request.start_date else None + end_d = date.fromisoformat(request.end_date) if request.end_date else None + + try: + result = svc.compute_log_metrics( + project=request.project, + feature_service_name=request.feature_service_name, + start_date=start_d, + end_date=end_d, + granularity=request.granularity, + set_baseline=request.set_baseline, + ) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/monitoring/auto_compute/log", tags=["Monitoring"]) + async def auto_compute_log(request: AutoComputeLogRequest): + """Auto-detect date ranges from log data and compute all granularities.""" + store = _get_store() + if request.feature_service_name: + fs = store.registry.get_feature_service( + name=request.feature_service_name, project=request.project + ) + assert_permissions(fs, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + try: + result = svc.auto_compute_log_metrics( + project=request.project, + feature_service_name=request.feature_service_name, + ) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/monitoring/jobs/{job_id}", tags=["Monitoring"]) + async def get_job_status(job_id: str): + svc = _get_monitoring_service() + job = svc.get_job(job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") + return job + + # ------------------------------------------------------------------ # + # Transient compute (not stored) + # ------------------------------------------------------------------ # + + @router.post("/monitoring/compute/transient", tags=["Monitoring"]) + async def compute_transient(request: ComputeTransientRequest): + """Compute metrics on-the-fly for an arbitrary date range. Results are + returned directly and NOT persisted to the monitoring tables.""" + store = _get_store() + fv = store.registry.get_feature_view( + name=request.feature_view_name, project=request.project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + + start_d = date.fromisoformat(request.start_date) if request.start_date else None + end_d = date.fromisoformat(request.end_date) if request.end_date else None + + result = svc.compute_transient( + project=request.project, + feature_view_name=request.feature_view_name, + feature_names=request.feature_names, + start_date=start_d, + end_date=end_d, + ) + return result + + # ------------------------------------------------------------------ # + # Read endpoints + # ------------------------------------------------------------------ # + + @router.get("/monitoring/metrics/features", tags=["Monitoring"]) + async def get_feature_metrics( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_name: Optional[str] = Query(None), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_feature_metrics( + project=project, + feature_service_name=feature_service_name, + feature_view_name=feature_view_name, + feature_name=feature_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + @router.get("/monitoring/metrics/feature_views", tags=["Monitoring"]) + async def get_feature_view_metrics( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_feature_view_metrics( + project=project, + feature_service_name=feature_service_name, + feature_view_name=feature_view_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + @router.get("/monitoring/metrics/feature_services", tags=["Monitoring"]) + async def get_feature_service_metrics( + project: str = Query(...), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_service_name: + fs = store.registry.get_feature_service( + name=feature_service_name, project=project + ) + assert_permissions(fs, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_feature_service_metrics( + project=project, + feature_service_name=feature_service_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + @router.get("/monitoring/metrics/baseline", tags=["Monitoring"]) + async def get_baseline( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_name: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_baseline( + project=project, + feature_view_name=feature_view_name, + feature_name=feature_name, + data_source_type=data_source_type, + ) + + @router.get("/monitoring/metrics/timeseries", tags=["Monitoring"]) + async def get_timeseries( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_name: Optional[str] = Query(None), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_timeseries( + project=project, + feature_view_name=feature_view_name, + feature_name=feature_name, + feature_service_name=feature_service_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + return router diff --git a/sdk/python/feast/cli/cli.py b/sdk/python/feast/cli/cli.py index 886c91f69ae..726e215a1b9 100644 --- a/sdk/python/feast/cli/cli.py +++ b/sdk/python/feast/cli/cli.py @@ -35,6 +35,7 @@ get_historical_features, get_online_features, ) +from feast.cli.monitor import monitor_cmd from feast.cli.on_demand_feature_views import on_demand_feature_views_cmd from feast.cli.permissions import feast_permissions_cmd from feast.cli.projects import projects_cmd @@ -650,6 +651,7 @@ def demo_notebooks_command(ctx: click.Context, output_dir: str, overwrite: bool) cli.add_command(serve_registry_command) cli.add_command(serve_transformations_command) cli.add_command(dbt_cmd) +cli.add_command(monitor_cmd) if __name__ == "__main__": cli() diff --git a/sdk/python/feast/cli/monitor.py b/sdk/python/feast/cli/monitor.py new file mode 100644 index 00000000000..3741700cea0 --- /dev/null +++ b/sdk/python/feast/cli/monitor.py @@ -0,0 +1,206 @@ +from datetime import date +from typing import List, Optional + +import click + +from feast.infra.offline_stores.offline_store import OfflineStore +from feast.repo_operations import create_feature_store + +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + + +@click.group(name="monitor") +def monitor_cmd(): + """Feature monitoring commands.""" + pass + + +@monitor_cmd.command("run") +@click.option( + "--project", + "-p", + default=None, + help="Feast project name. Defaults to the project in feature_store.yaml.", +) +@click.option( + "--feature-view", + "-v", + default=None, + help="Feature view name. If omitted, all feature views are computed.", +) +@click.option( + "--feature-name", + "-f", + multiple=True, + help="Feature name(s) to compute. Can be specified multiple times.", +) +@click.option( + "--start-date", + default=None, + help="Start date (YYYY-MM-DD). If omitted, auto-detected from source data.", +) +@click.option( + "--end-date", + default=None, + help="End date (YYYY-MM-DD). If omitted, auto-detected from source data.", +) +@click.option( + "--granularity", + "-g", + default=None, + type=click.Choice(list(VALID_GRANULARITIES)), + help="Metric granularity. If omitted, all granularities are computed (auto mode).", +) +@click.option( + "--set-baseline", + is_flag=True, + default=False, + help="Mark this computation as the baseline for drift detection.", +) +@click.option( + "--source-type", + type=click.Choice(["batch", "log", "all"]), + default="batch", + help="Data source type: 'batch' (offline store), 'log' (serving logs), or 'all'.", +) +@click.pass_context +def monitor_run( + ctx: click.Context, + project: Optional[str], + feature_view: Optional[str], + feature_name: tuple, + start_date: Optional[str], + end_date: Optional[str], + granularity: Optional[str], + set_baseline: bool, + source_type: str, +): + """Compute feature quality metrics. + + Without --start-date/--end-date/--granularity, runs in auto mode: + detects date ranges from source data and computes all granularities. + + Use --source-type log to compute metrics from feature serving logs + (requires feature services with logging configured). + """ + store = create_feature_store(ctx) + + if project is None: + project = store.project + + from feast.monitoring.monitoring_service import MonitoringService + + svc = MonitoringService(store) + + auto_mode = start_date is None and end_date is None and granularity is None + feat_names: Optional[List[str]] = list(feature_name) if feature_name else None + + if source_type in ("batch", "all"): + _run_batch_monitoring( + svc, + project, + feature_view, + feat_names, + start_date, + end_date, + granularity, + set_baseline, + auto_mode, + ) + + if source_type in ("log", "all"): + _run_log_monitoring( + svc, + project, + feature_view, + start_date, + end_date, + granularity, + auto_mode, + ) + + +def _run_batch_monitoring( + svc, + project, + feature_view, + feat_names, + start_date, + end_date, + granularity, + set_baseline, + auto_mode, +): + if auto_mode and not set_baseline: + click.echo("Auto-computing batch metrics for all granularities...") + result = svc.auto_compute( + project=project, + feature_view_name=feature_view, + ) + click.echo(f"Status: {result['status']}") + click.echo(f"Feature views computed: {result['computed_feature_views']}") + click.echo(f"Features computed: {result['computed_features']}") + click.echo(f"Granularities: {', '.join(result['granularities'])}") + click.echo(f"Duration: {result['duration_ms']}ms") + else: + start_d = date.fromisoformat(start_date) if start_date else None + end_d = date.fromisoformat(end_date) if end_date else None + + result = svc.compute_metrics( + project=project, + feature_view_name=feature_view, + feature_names=feat_names, + start_date=start_d, + end_date=end_d, + granularity=granularity or "daily", + set_baseline=set_baseline, + ) + + click.echo(f"Status: {result['status']}") + click.echo(f"Granularity: {result['granularity']}") + click.echo(f"Features computed: {result['computed_features']}") + click.echo(f"Feature views computed: {result['computed_feature_views']}") + click.echo(f"Feature services computed: {result['computed_feature_services']}") + click.echo(f"Metric dates: {', '.join(result['metric_dates'])}") + click.echo(f"Duration: {result['duration_ms']}ms") + + if set_baseline: + click.echo("Baseline: SET") + + +def _run_log_monitoring( + svc, project, feature_service_name, start_date, end_date, granularity, auto_mode +): + if auto_mode: + click.echo("Auto-computing log metrics for all granularities...") + result = svc.auto_compute_log_metrics( + project=project, + feature_service_name=feature_service_name, + ) + click.echo(f"Status: {result['status']}") + click.echo(f"Feature services computed: {result['computed_feature_services']}") + click.echo(f"Features computed: {result['computed_features']}") + click.echo(f"Granularities: {', '.join(result['granularities'])}") + click.echo(f"Duration: {result['duration_ms']}ms") + else: + if not feature_service_name: + click.echo( + "Error: --feature-view (as feature service name) is required for log source with explicit dates." + ) + return + + start_d = date.fromisoformat(start_date) if start_date else None + end_d = date.fromisoformat(end_date) if end_date else None + + result = svc.compute_log_metrics( + project=project, + feature_service_name=feature_service_name, + start_date=start_d, + end_date=end_d, + granularity=granularity or "daily", + ) + + click.echo(f"Status: {result['status']}") + click.echo("Source: log") + click.echo(f"Features computed: {result.get('computed_features', 0)}") + click.echo(f"Duration: {result['duration_ms']}ms") diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index f60eeb9d87d..58cd06f49e2 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -148,28 +148,71 @@ class ChatRequest(BaseModel): messages: List[ChatMessage] -def _resolve_feature_counts( +def _parse_feature_info( features: Union[List[str], "feast.FeatureService"], ) -> tuple: - """Return (feature_count, feature_view_count) from the resolved features. + """Return ``(feature_view_names, feature_count)`` from resolved features. ``features`` is either a list of ``"feature_view:feature"`` strings or a ``FeatureService`` with ``feature_view_projections``. + + Returns: + (fv_names, feat_count) where fv_names is a list of unique feature + view name strings and feat_count is the total number of features. """ from feast.feature_service import FeatureService + from feast.utils import _parse_feature_ref if isinstance(features, FeatureService): projections = features.feature_view_projections - fv_count = len(projections) + fv_names = [p.name for p in projections] feat_count = sum(len(p.features) for p in projections) elif isinstance(features, list): feat_count = len(features) - fv_names = {ref.split(":")[0].split("@")[0] for ref in features if ":" in ref} - fv_count = len(fv_names) + fv_names = list({_parse_feature_ref(ref)[0] for ref in features if ":" in ref}) else: + fv_names = [] feat_count = 0 - fv_count = 0 - return str(feat_count), str(fv_count) + return fv_names, feat_count + + +def _resolve_feature_counts( + features: Union[List[str], "feast.FeatureService"], +) -> tuple: + """Return ``(feature_count_str, feature_view_count_str)`` for Prometheus labels.""" + fv_names, feat_count = _parse_feature_info(features) + return str(feat_count), str(len(fv_names)) + + +def _emit_online_audit( + request: GetOnlineFeaturesRequest, + features: Union[List[str], "feast.FeatureService"], + entity_count: int, + status: str, + latency_ms: float, +): + """Best-effort audit log emission for online feature requests.""" + try: + from feast.permissions.security_manager import get_security_manager + + requestor_id = "anonymous" + sm = get_security_manager() + if sm and sm.current_user: + requestor_id = sm.current_user.username or "anonymous" + + fv_names, feat_count = _parse_feature_info(features) + + feast_metrics.emit_online_audit_log( + requestor_id=requestor_id, + entity_keys=list(request.entities.keys()), + entity_count=entity_count, + feature_views=fv_names, + feature_count=feat_count, + status=status, + latency_ms=latency_ms, + ) + except Exception: + logger.warning("Failed to emit online audit log", exc_info=True) async def _get_features( @@ -387,11 +430,22 @@ async def get_online_features(request: GetOnlineFeaturesRequest) -> Any: include_feature_view_version_metadata=request.include_feature_view_version_metadata, ) - if store._get_provider().async_supported.online.read: - response = await store.get_online_features_async(**read_params) # type: ignore - else: - response = await run_in_threadpool( - lambda: store.get_online_features(**read_params) # type: ignore + audit_start_ms = time.monotonic() * 1000 + audit_status = "success" + try: + if store._get_provider().async_supported.online.read: + response = await store.get_online_features_async(**read_params) # type: ignore + else: + response = await run_in_threadpool( + lambda: store.get_online_features(**read_params) # type: ignore + ) + except Exception: + audit_status = "error" + raise + finally: + audit_latency_ms = time.monotonic() * 1000 - audit_start_ms + _emit_online_audit( + request, features, entity_count, audit_status, audit_latency_ms ) response_dict = await run_in_threadpool( diff --git a/sdk/python/feast/infra/feature_servers/base_config.py b/sdk/python/feast/infra/feature_servers/base_config.py index df324dc57d3..14ad2fe505e 100644 --- a/sdk/python/feast/infra/feature_servers/base_config.py +++ b/sdk/python/feast/infra/feature_servers/base_config.py @@ -82,6 +82,17 @@ class MetricsConfig(FeastConfigBaseModel): """Emit per-feature-view freshness gauges (feast_feature_freshness_seconds).""" + offline_features: StrictBool = True + """Emit offline store retrieval metrics + (feast_offline_store_request_total, + feast_offline_store_request_latency_seconds, + feast_offline_store_row_count).""" + + audit_logging: StrictBool = False + """Emit structured JSON audit log entries for online and offline + feature requests via the ``feast.audit`` logger. Captures requestor + identity, entity keys, feature views, row counts, and latency.""" + class BaseFeatureServerConfig(FeastConfigBaseModel): """Base Feature Server config that should be extended""" diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 3d0f84bb3a5..7e563dde905 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -1,7 +1,8 @@ import contextlib +import json import tempfile import uuid -from datetime import date, datetime, timedelta +from datetime import date, datetime, timedelta, timezone from pathlib import Path from typing import ( Any, @@ -42,6 +43,17 @@ RetrievalMetadata, ) from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + MON_TABLE_JOB, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -477,6 +489,645 @@ def offline_write_batch( def _escape_query_columns(columns: List[str]) -> List[str]: return [f"`{x}`" for x in columns] + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + assert isinstance(data_source, BigQuerySource) + return _bq_compute_monitoring_metrics( + config, + data_source, + feature_columns, + timestamp_field, + start_date=start_date, + end_date=end_date, + histogram_bins=histogram_bins, + top_n=top_n, + ) + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + assert isinstance(data_source, BigQuerySource) + return _bq_get_monitoring_max_timestamp(config, data_source, timestamp_field) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + _bq_ensure_monitoring_tables(config) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + _bq_save_monitoring_metrics(config, metric_type, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + return _bq_query_monitoring_metrics( + config, project, metric_type, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + _bq_clear_monitoring_baseline( + config, project, feature_view_name, feature_name, data_source_type + ) + + +# ------------------------------------------------------------------ # +# BigQuery monitoring metrics (native) +# ------------------------------------------------------------------ # + + +def _bq_monitoring_table_fqn(config: RepoConfig, table_name: str) -> str: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + project_id = config.offline_store.project_id + if not project_id: + client = _get_bigquery_client( + project=config.offline_store.billing_project_id, + location=config.offline_store.location, + ) + project_id = client.project + return f"`{project_id}.{config.offline_store.dataset}.{table_name}`" + + +def _bq_scalar_param_type(column: str) -> str: + if column == "is_baseline": + return "BOOL" + if column == "metric_date": + return "DATE" + if column == "computed_at": + return "TIMESTAMP" + if column in { + "row_count", + "null_count", + "total_row_count", + "total_features", + "features_with_nulls", + "total_feature_views", + }: + return "INT64" + if column in { + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "avg_null_rate", + "max_null_rate", + }: + return "FLOAT64" + return "STRING" + + +def _bq_merge_row( + config: RepoConfig, + table_fqn: str, + columns: List[str], + pk_columns: List[str], + row: Dict[str, Any], +) -> None: + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + non_pk = [c for c in columns if c not in pk_columns] + params: List[Any] = [] + using_parts: List[str] = [] + on_parts: List[str] = [] + merge_idx = 0 + for col in columns: + p = f"p{merge_idx}" + merge_idx += 1 + val = row.get(col) + if col == "histogram" and val is not None and not isinstance(val, str): + val = json.dumps(val) + param_type = _bq_scalar_param_type(col) + params.append(bigquery.ScalarQueryParameter(p, param_type, val)) + using_parts.append(f"@{p} AS {col}") + on_parts = [f"T.{col} = S.{col}" for col in pk_columns] + update_set = ", ".join(f"{c} = S.{c}" for c in non_pk) + merge_sql = f""" +MERGE {table_fqn} T +USING (SELECT {", ".join(using_parts)}) S +ON {" AND ".join(on_parts)} +WHEN MATCHED THEN UPDATE SET {update_set} +WHEN NOT MATCHED THEN INSERT ({", ".join(columns)}) VALUES ({", ".join(f"S.{c}" for c in columns)}) +""" + job_config = bigquery.QueryJobConfig(query_parameters=params) + client.query(merge_sql, job_config=job_config).result() + + +def _bq_save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], +) -> None: + table_short, columns, pk_columns = monitoring_table_meta(metric_type) + table_fqn = _bq_monitoring_table_fqn(config, table_short) + for row in metrics: + _bq_merge_row(config, table_fqn, columns, pk_columns, row) + + +def _bq_query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, +) -> List[Dict[str, Any]]: + table_short, columns, _ = monitoring_table_meta(metric_type) + table_fqn = _bq_monitoring_table_fqn(config, table_short) + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + params: List[Any] = [] + conditions: List[str] = [] + if project: + params.append( + bigquery.ScalarQueryParameter("project", "STRING", project), + ) + conditions.append("project_id = @project") + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(f"`{key}` = @{key}") + params.append( + bigquery.ScalarQueryParameter( + key, _bq_scalar_param_type(key), value + ) + ) + if start_date: + conditions.append("metric_date >= @start_date") + params.append(bigquery.ScalarQueryParameter("start_date", "DATE", start_date)) + if end_date: + conditions.append("metric_date <= @end_date") + params.append(bigquery.ScalarQueryParameter("end_date", "DATE", end_date)) + col_list = ", ".join(f"`{c}`" for c in columns) + where_sql = " AND ".join(conditions) if conditions else "TRUE" + order_col = "metric_date" if "metric_date" in columns else "job_id" + sql = f"SELECT {col_list} FROM {table_fqn} WHERE {where_sql} ORDER BY `{order_col}` ASC" + job_config = bigquery.QueryJobConfig(query_parameters=params) + job = client.query(sql, job_config=job_config) + job.result() + results: List[Dict[str, Any]] = [] + for r in job: + record = {columns[i]: r[i] for i in range(len(columns))} + results.append(normalize_monitoring_row(record)) + return results + + +def _bq_clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, +) -> None: + table_fqn = _bq_monitoring_table_fqn(config, MON_TABLE_FEATURE) + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + params: List[Any] = [ + bigquery.ScalarQueryParameter("project", "STRING", project), + ] + conditions = ["project_id = @project", "is_baseline = TRUE"] + if feature_view_name: + conditions.append("feature_view_name = @feature_view_name") + params.append( + bigquery.ScalarQueryParameter( + "feature_view_name", "STRING", feature_view_name + ) + ) + if feature_name: + conditions.append("feature_name = @feature_name") + params.append( + bigquery.ScalarQueryParameter("feature_name", "STRING", feature_name) + ) + if data_source_type: + conditions.append("data_source_type = @data_source_type") + params.append( + bigquery.ScalarQueryParameter( + "data_source_type", "STRING", data_source_type + ) + ) + sql = f"UPDATE {table_fqn} SET is_baseline = FALSE WHERE {' AND '.join(conditions)}" + job_config = bigquery.QueryJobConfig(query_parameters=params) + client.query(sql, job_config=job_config).result() + + +def _bq_ensure_monitoring_tables(config: RepoConfig) -> None: + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + ds = config.offline_store.dataset + proj = config.offline_store.project_id or client.project + feature_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE}` ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + feature_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOL NOT NULL, + feature_type STRING NOT NULL, + row_count INT64, + null_count INT64, + null_rate FLOAT64, + mean FLOAT64, + stddev FLOAT64, + min_val FLOAT64, + max_val FLOAT64, + p50 FLOAT64, + p75 FLOAT64, + p90 FLOAT64, + p95 FLOAT64, + p99 FLOAT64, + histogram STRING +) +PRIMARY KEY (project_id, feature_view_name, feature_name, metric_date, granularity, data_source_type) NOT ENFORCED +""" + view_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE_VIEW}` ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOL NOT NULL, + total_row_count INT64, + total_features INT64, + features_with_nulls INT64, + avg_null_rate FLOAT64, + max_null_rate FLOAT64 +) +PRIMARY KEY (project_id, feature_view_name, metric_date, granularity, data_source_type) NOT ENFORCED +""" + service_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE_SERVICE}` ( + project_id STRING NOT NULL, + feature_service_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOL NOT NULL, + total_feature_views INT64, + total_features INT64, + avg_null_rate FLOAT64, + max_null_rate FLOAT64 +) +PRIMARY KEY (project_id, feature_service_name, metric_date, granularity, data_source_type) NOT ENFORCED +""" + job_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_JOB}` ( + job_id STRING NOT NULL, + project_id STRING NOT NULL, + feature_view_name STRING, + job_type STRING NOT NULL, + status STRING NOT NULL, + parameters STRING, + metric_date DATE NOT NULL, + started_at TIMESTAMP, + completed_at TIMESTAMP, + error_message STRING, + result_summary STRING +) +PRIMARY KEY (job_id) NOT ENFORCED +""" + for ddl in (feature_ddl, view_ddl, service_ddl, job_ddl): + client.query(ddl).result() + + +def _bq_get_monitoring_max_timestamp( + config: RepoConfig, + data_source: BigQuerySource, + timestamp_field: str, +) -> Optional[datetime]: + from_expression = data_source.get_table_query_string() + ts_col = f"`{timestamp_field}`" + sql = f"SELECT MAX({ts_col}) AS _max_ts FROM {from_expression}" + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + job = client.query(sql) + job.result() + rows = list(job) + if not rows or rows[0][0] is None: + return None + val = rows[0][0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return val # type: ignore[no-any-return] + + +def _bq_numeric_histogram( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + if min_val == max_val: + sql = ( + f"SELECT COUNT(*) AS cnt FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter}" + ) + job = client.query(sql) + job.result() + hrows = list(job) + cnt = int(hrows[0][0]) if hrows else 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + sql = f""" +SELECT + LEAST( + GREATEST( + CAST(FLOOR((CAST({q_col} AS FLOAT64) - {min_val}) / {bin_width}) AS INT64) + 1, + 1 + ), + {bins} + ) AS bucket, + COUNT(*) AS cnt +FROM {from_expression} AS _src +WHERE {q_col} IS NOT NULL AND {ts_filter} +GROUP BY bucket +ORDER BY bucket +""" + job = client.query(sql) + job.result() + rows = list(job) + counts = [0] * bins + for bucket, cnt in rows: + b = int(bucket) + if 1 <= b <= bins: + counts[b - 1] += int(cnt) + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _bq_numeric_stats( + config: RepoConfig, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + select_parts: List[str] = ["COUNT(*) AS _row_count"] + for i, col in enumerate(feature_names): + q = f"`{col}`" + c = f"CAST({q} AS FLOAT64)" + select_parts.extend( + [ + f"COUNT({q}) AS c{i}_nn", + f"AVG({c}) AS c{i}_avg", + f"STDDEV_SAMP({c}) AS c{i}_stddev", + f"MIN({c}) AS c{i}_min", + f"MAX({c}) AS c{i}_max", + f"APPROX_QUANTILES({c}, 100)[OFFSET(50)] AS c{i}_p50", + f"APPROX_QUANTILES({c}, 100)[OFFSET(75)] AS c{i}_p75", + f"APPROX_QUANTILES({c}, 100)[OFFSET(90)] AS c{i}_p90", + f"APPROX_QUANTILES({c}, 100)[OFFSET(95)] AS c{i}_p95", + f"APPROX_QUANTILES({c}, 100)[OFFSET(99)] AS c{i}_p99", + ] + ) + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_filter}" + ) + job = client.query(query) + job.result() + rows = list(job) + if not rows: + return [empty_numeric_metric(n) for n in feature_names] + row = rows[0] + row_count = row["_row_count"] or 0 + results: List[Dict[str, Any]] = [] + for i, col in enumerate(feature_names): + base = f"c{i}_" + non_null = row[f"{base}nn"] or 0 + null_count = int(row_count) - int(non_null) + min_v = opt_float(row[f"{base}min"]) + max_v = opt_float(row[f"{base}max"]) + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": int(row_count), + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(row[f"{base}avg"]), + "stddev": opt_float(row[f"{base}stddev"]), + "min_val": min_v, + "max_val": max_v, + "p50": opt_float(row[f"{base}p50"]), + "p75": opt_float(row[f"{base}p75"]), + "p90": opt_float(row[f"{base}p90"]), + "p95": opt_float(row[f"{base}p95"]), + "p99": opt_float(row[f"{base}p99"]), + "histogram": None, + } + if min_v is not None and max_v is not None and non_null and int(non_null) > 0: + result["histogram"] = _bq_numeric_histogram( + config, + from_expression, + col, + ts_filter, + histogram_bins, + min_v, + max_v, + ) + results.append(result) + return results + + +def _bq_categorical_stats( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + query = f""" +WITH filtered AS ( + SELECT * FROM {from_expression} AS _src WHERE {ts_filter} +) +SELECT + (SELECT COUNT(*) FROM filtered) AS row_count, + (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, + (SELECT COUNT(DISTINCT {q_col}) FROM filtered WHERE {q_col} IS NOT NULL) AS unique_count, + CAST({q_col} AS STRING) AS value, + COUNT(*) AS cnt +FROM filtered +WHERE {q_col} IS NOT NULL +GROUP BY CAST({q_col} AS STRING) +ORDER BY cnt DESC +LIMIT {int(top_n)} +""" + job = client.query(query) + job.result() + rows = list(job) + if not rows: + return empty_categorical_metric(col_name) + row_count = rows[0]["row_count"] + null_count = rows[0]["null_count"] + unique_count = rows[0]["unique_count"] + top_entries = [{"value": r["value"], "count": r["cnt"]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _bq_compute_monitoring_metrics( + config: RepoConfig, + data_source: BigQuerySource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, +) -> List[Dict[str, Any]]: + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + date_partition_column=data_source.date_partition_column, + quote_fields=False, + cast_style="timestamp_func", + ) + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + if numeric_features: + results.extend( + _bq_numeric_stats( + config, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + for col_name in categorical_features: + results.append( + _bq_categorical_stats(config, from_expression, col_name, ts_filter, top_n) + ) + return results + class BigQueryRetrievalJob(RetrievalJob): def __init__( diff --git a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py index 43c37f8ec10..25517fa74c3 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py +++ b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py @@ -1,6 +1,7 @@ -from datetime import datetime +import json +from datetime import date, datetime, timezone from pathlib import Path -from typing import Any, Callable, List, Literal, Optional, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union import ibis import pandas as pd @@ -22,7 +23,19 @@ write_logged_features_ibis, ) from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + MON_TABLE_JOB, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.utils import compute_non_entity_date_range @@ -179,6 +192,299 @@ def _build_entity_df_from_feature_sources( return pd.concat(entity_dfs, ignore_index=True).drop_duplicates() +# ------------------------------------------------------------------ # +# Oracle monitoring helpers +# ------------------------------------------------------------------ # + + +def _oracle_quote_ident(name: str) -> str: + return f'"{name}"' + + +def _oracle_ts_where(ts_filter: str) -> str: + return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1" + + +def _oracle_fetchall(con, sql: str): + cur = con.raw_sql(sql) + try: + return cur.fetchall() + finally: + if hasattr(cur, "close"): + cur.close() + + +def _oracle_exec(con, sql: str) -> None: + cur = con.raw_sql(sql) + try: + pass + finally: + if hasattr(cur, "close"): + cur.close() + + +def _oracle_numeric_histogram( + con, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = _oracle_quote_ident(col_name) + + if min_val == max_val: + tw = _oracle_ts_where(ts_filter) + cnt_row = _oracle_fetchall( + con, + f"SELECT COUNT(*) FROM {from_expression} _src " + f"WHERE {q_col} IS NOT NULL AND {tw}", + ) + cnt = (cnt_row[0][0] if cnt_row else 0) or 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + bw = bin_width if bin_width != 0 else 1e-300 + + tw = _oracle_ts_where(ts_filter) + query = ( + f"SELECT bucket, COUNT(*) AS cnt FROM (" + f" SELECT " + f" CASE WHEN {q_col} IS NULL THEN NULL " + f" WHEN {min_val} = {upper} THEN 1 " + f" ELSE LEAST(GREATEST(" + f" FLOOR((CAST({q_col} AS NUMBER) - {min_val}) / {bw}) + 1, " + f" 1), {bins}) " + f" END AS bucket " + f" FROM {from_expression} _src " + f" WHERE {q_col} IS NOT NULL AND {tw}" + f") sub " + f"WHERE bucket IS NOT NULL " + f"GROUP BY bucket ORDER BY bucket" + ) + + rows = _oracle_fetchall(con, query) + counts = [0] * bins + for bucket, cnt in rows: + b = int(bucket) + if 1 <= b <= bins: + counts[b - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _oracle_numeric_stats( + con, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = _oracle_quote_ident(col) + c = f"CAST({q} AS NUMBER)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + tw = _oracle_ts_where(ts_filter) + query = f"SELECT {', '.join(select_parts)} FROM {from_expression} _src WHERE {tw}" + + row = (_oracle_fetchall(con, query) or [None])[0] + + if row is None: + return [empty_numeric_metric(n) for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _oracle_numeric_histogram( + con, + from_expression, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _oracle_categorical_stats( + con, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = _oracle_quote_ident(col_name) + + tw = _oracle_ts_where(ts_filter) + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} _src WHERE {tw}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" TO_CHAR({q_col}) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} " + f"ORDER BY cnt DESC " + f"FETCH FIRST {int(top_n)} ROWS ONLY" + ) + + rows = _oracle_fetchall(con, query) + + if not rows: + return empty_categorical_metric(col_name) + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _oracle_escape_literal(val: Any) -> str: + if val is None: + return "NULL" + if isinstance(val, bool): + return "1" if val else "0" + if isinstance(val, (int, float)) and not isinstance(val, bool): + return str(val) + if isinstance(val, datetime): + s = val.isoformat(sep=" ", timespec="seconds") + return f"TIMESTAMP '{s}'" + if isinstance(val, date): + return f"DATE '{val.isoformat()}'" + s = str(val).replace("'", "''") + return f"'{s}'" + + +def _oracle_merge_metric_row( + con, table: str, columns: List[str], pk_cols: List[str], row: Dict[str, Any] +) -> None: + def qid(c: str) -> str: + return f'"{c}"' + + non_pk = [c for c in columns if c not in pk_cols] + vals = [] + for c in columns: + v = row.get(c) + if c == "histogram" and v is not None and not isinstance(v, str): + v = json.dumps(v) + vals.append(_oracle_escape_literal(v)) + + join_cond = " AND ".join(f"t.{qid(c)} = s.{qid(c)}" for c in pk_cols) + insert_cols = ", ".join(qid(c) for c in columns) + insert_vals = ", ".join(f"s.{qid(c)}" for c in columns) + update_set = ", ".join(f"t.{qid(c)} = s.{qid(c)}" for c in non_pk) + + src_select = ", ".join( + f"{vals[i]} AS {qid(columns[i])}" for i in range(len(columns)) + ) + + sql = ( + f"MERGE INTO {table} t " + f"USING (SELECT {src_select} FROM dual) s " + f"ON ({join_cond}) " + f"WHEN MATCHED THEN UPDATE SET {update_set} " + f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})" + ) + _oracle_exec(con, sql) + + +def _oracle_try_execute_ddl(con, ddl: str) -> None: + """Run DDL; ignore ORA-00955 (name already used).""" + escaped = ddl.strip().replace("'", "''") + plsql = ( + "BEGIN\n" + f" EXECUTE IMMEDIATE '{escaped}';\n" + "EXCEPTION\n" + " WHEN OTHERS THEN\n" + " IF SQLCODE != -955 THEN RAISE;\n" + " END IF;\n" + "END;" + ) + _oracle_exec(con, plsql) + + class OracleOfflineStore(OfflineStore): @staticmethod def pull_latest_from_table_or_query( @@ -306,3 +612,289 @@ def write_logged_features( logging_config=logging_config, registry=registry, ) + + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + assert isinstance(data_source, OracleSource) + + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + cast_style="timestamp", + date_time_separator=" ", + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + con = get_ibis_connection(config) + + if numeric_features: + results.extend( + _oracle_numeric_stats( + con, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _oracle_categorical_stats( + con, + from_expression, + col_name, + ts_filter, + top_n, + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + assert isinstance(data_source, OracleSource) + + from_expression = data_source.get_table_query_string() + ts_col = _oracle_quote_ident(timestamp_field) + + con = get_ibis_connection(config) + rows = _oracle_fetchall( + con, + f"SELECT MAX({ts_col}) FROM {from_expression} _src", + ) + row = rows[0] if rows else None + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return None + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + con = get_ibis_connection(config) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {MON_TABLE_FEATURE} ( + project_id VARCHAR2(255) NOT NULL, + feature_view_name VARCHAR2(255) NOT NULL, + feature_name VARCHAR2(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL, + data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL, + is_baseline NUMBER(1) DEFAULT 0 NOT NULL, + feature_type VARCHAR2(50) NOT NULL, + row_count NUMBER, + null_count NUMBER, + null_rate NUMBER, + mean NUMBER, + stddev NUMBER, + min_val NUMBER, + max_val NUMBER, + p50 NUMBER, + p75 NUMBER, + p90 NUMBER, + p95 NUMBER, + p99 NUMBER, + histogram VARCHAR2(4000), + CONSTRAINT pk_fm PRIMARY KEY (project_id, feature_view_name, + feature_name, metric_date, granularity, data_source_type) + ) + """, + ) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {MON_TABLE_FEATURE_VIEW} ( + project_id VARCHAR2(255) NOT NULL, + feature_view_name VARCHAR2(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL, + data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL, + is_baseline NUMBER(1) DEFAULT 0 NOT NULL, + total_row_count NUMBER, + total_features NUMBER, + features_with_nulls NUMBER, + avg_null_rate NUMBER, + max_null_rate NUMBER, + CONSTRAINT pk_fvm PRIMARY KEY (project_id, feature_view_name, + metric_date, granularity, data_source_type) + ) + """, + ) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {MON_TABLE_FEATURE_SERVICE} ( + project_id VARCHAR2(255) NOT NULL, + feature_service_name VARCHAR2(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL, + data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL, + is_baseline NUMBER(1) DEFAULT 0 NOT NULL, + total_feature_views NUMBER, + total_features NUMBER, + avg_null_rate NUMBER, + max_null_rate NUMBER, + CONSTRAINT pk_fsm PRIMARY KEY (project_id, feature_service_name, + metric_date, granularity, data_source_type) + ) + """, + ) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {MON_TABLE_JOB} ( + job_id VARCHAR2(36) NOT NULL, + project_id VARCHAR2(255) NOT NULL, + feature_view_name VARCHAR2(255), + job_type VARCHAR2(50) NOT NULL, + status VARCHAR2(20) DEFAULT 'pending' NOT NULL, + parameters VARCHAR2(4000), + metric_date DATE NOT NULL, + started_at TIMESTAMP WITH TIME ZONE, + completed_at TIMESTAMP WITH TIME ZONE, + error_message VARCHAR2(4000), + result_summary VARCHAR2(4000), + CONSTRAINT pk_fmj PRIMARY KEY (job_id) + ) + """, + ) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + + table, columns, pk_columns = monitoring_table_meta(metric_type) + con = get_ibis_connection(config) + for row in metrics: + _oracle_merge_metric_row(con, table, columns, pk_columns, row) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + + table, columns, _ = monitoring_table_meta(metric_type) + + conditions: list = [] + if project: + conditions.append( + f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}" + ) + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append( + f"{_oracle_quote_ident(key)} = {_oracle_escape_literal(value)}" + ) + if start_date: + conditions.append( + f"{_oracle_quote_ident('metric_date')} >= {_oracle_escape_literal(start_date)}" + ) + if end_date: + conditions.append( + f"{_oracle_quote_ident('metric_date')} <= {_oracle_escape_literal(end_date)}" + ) + + where_sql = " AND ".join(conditions) if conditions else "1=1" + col_list = ", ".join(_oracle_quote_ident(c) for c in columns) + order_col = "metric_date" if "metric_date" in columns else "job_id" + + con = get_ibis_connection(config) + rows = _oracle_fetchall( + con, + f"SELECT {col_list} FROM {table} WHERE {where_sql} " + f"ORDER BY {_oracle_quote_ident(order_col)} ASC", + ) + + results = [] + for row in rows: + record = dict(zip(columns, row)) + results.append(normalize_monitoring_row(record)) + + return results + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + + conditions = [ + f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}" + ] + if feature_view_name: + conditions.append( + f"{_oracle_quote_ident('feature_view_name')} = " + f"{_oracle_escape_literal(feature_view_name)}" + ) + if feature_name: + conditions.append( + f"{_oracle_quote_ident('feature_name')} = " + f"{_oracle_escape_literal(feature_name)}" + ) + if data_source_type: + conditions.append( + f"{_oracle_quote_ident('data_source_type')} = " + f"{_oracle_escape_literal(data_source_type)}" + ) + conditions.append(f"{_oracle_quote_ident('is_baseline')} = 1") + + where_sql = " AND ".join(conditions) + con = get_ibis_connection(config) + _oracle_exec( + con, + f"UPDATE {MON_TABLE_FEATURE} SET {_oracle_quote_ident('is_baseline')} = 0 " + f"WHERE {where_sql}", + ) diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py index 50e48208647..b9fccecba79 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -1,6 +1,6 @@ import contextlib from dataclasses import asdict -from datetime import datetime, timezone +from datetime import date, datetime, timezone from enum import Enum from typing import ( Any, @@ -42,6 +42,17 @@ get_query_schema, ) from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + MON_TABLE_JOB, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -289,6 +300,260 @@ def pull_all_from_table_or_query( on_demand_feature_views=None, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + assert isinstance(data_source, PostgreSQLSource) + + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + cast_style="timestamptz", + date_time_separator=" ", + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + with _get_conn(config.offline_store) as conn: + conn.read_only = True + + if numeric_features: + results.extend( + _sql_numeric_stats( + conn, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _sql_categorical_stats( + conn, + from_expression, + col_name, + ts_filter, + top_n, + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + assert isinstance(data_source, PostgreSQLSource) + + from_expression = data_source.get_table_query_string_with_alias("max_ts_alias") + + with _get_conn(config.offline_store) as conn: + conn.read_only = True + with conn.cursor() as cur: + cur.execute(f'SELECT MAX("{timestamp_field}") FROM {from_expression}') + row = cur.fetchone() + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + + # ------------------------------------------------------------------ # + # Monitoring metrics storage (native PostgreSQL) + # ------------------------------------------------------------------ # + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + feature_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + feature_type VARCHAR(50) NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE PRECISION, + mean DOUBLE PRECISION, + stddev DOUBLE PRECISION, + min_val DOUBLE PRECISION, + max_val DOUBLE PRECISION, + p50 DOUBLE PRECISION, + p75 DOUBLE PRECISION, + p90 DOUBLE PRECISION, + p95 DOUBLE PRECISION, + p99 DOUBLE PRECISION, + histogram JSONB, + PRIMARY KEY (project_id, feature_view_name, feature_name, + metric_date, granularity, data_source_type) + ); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_project + ON {MON_TABLE_FEATURE} (project_id); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_view + ON {MON_TABLE_FEATURE} (project_id, feature_view_name); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_date + ON {MON_TABLE_FEATURE} (metric_date); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_granularity + ON {MON_TABLE_FEATURE} (granularity); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_baseline + ON {MON_TABLE_FEATURE} (project_id, feature_view_name, feature_name) + WHERE is_baseline = TRUE; + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_row_count BIGINT, + total_features INTEGER, + features_with_nulls INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_view_name, metric_date, + granularity, data_source_type) + ); + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} ( + project_id VARCHAR(255) NOT NULL, + feature_service_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_feature_views INTEGER, + total_features INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_service_name, metric_date, + granularity, data_source_type) + ); + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {MON_TABLE_JOB} ( + job_id VARCHAR(36) PRIMARY KEY, + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255), + job_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'pending', + parameters TEXT, + metric_date DATE NOT NULL, + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + error_message TEXT, + result_summary TEXT + ); + CREATE INDEX IF NOT EXISTS idx_fm_jobs_status + ON {MON_TABLE_JOB} (status); + CREATE INDEX IF NOT EXISTS idx_fm_jobs_project + ON {MON_TABLE_JOB} (project_id); + """) + conn.commit() + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + + table, columns, pk_columns = monitoring_table_meta(metric_type) + _mon_upsert(config.offline_store, table, columns, pk_columns, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional["date"] = None, + end_date: Optional["date"] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + + _, columns, _ = monitoring_table_meta(metric_type) + return _mon_query( + config.offline_store, + metric_type, + columns, + project, + filters, + start_date, + end_date, + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + + conditions = [sql.SQL("project_id = %s")] + params: list = [project] + + if feature_view_name: + conditions.append(sql.SQL("feature_view_name = %s")) + params.append(feature_view_name) + if feature_name: + conditions.append(sql.SQL("feature_name = %s")) + params.append(feature_name) + if data_source_type: + conditions.append(sql.SQL("data_source_type = %s")) + params.append(data_source_type) + + conditions.append(sql.SQL("is_baseline = TRUE")) + + query = sql.SQL("UPDATE {} SET is_baseline = FALSE WHERE {}").format( + sql.Identifier(MON_TABLE_FEATURE), + sql.SQL(" AND ").join(conditions), + ) + + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute(query, params) + conn.commit() + class PostgreSQLRetrievalJob(RetrievalJob): def __init__( @@ -782,3 +1047,295 @@ def _get_entity_schema( {% endfor %} {% endif %} """ + + +# ------------------------------------------------------------------ # +# Monitoring SQL push-down helpers +# ------------------------------------------------------------------ # + + +def _sql_numeric_stats( + conn, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + """Batch-compute numeric statistics via one SQL query, then histograms.""" + # 11 aggregate columns per feature (non_null, mean..p99) + 1 row_count + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f'"{col}"' + c = f"{q}::float8" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_filter}" + ) + + with conn.cursor() as cur: + cur.execute(query) + row = cur.fetchone() + + if row is None: + return [empty_numeric_metric(n) for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _sql_numeric_histogram( + conn, + from_expression, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _sql_numeric_histogram( + conn, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + if min_val == max_val: + with conn.cursor() as cur: + cur.execute( + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter}" + ) + cnt = (cur.fetchone() or (0,))[0] + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + + query = ( + f"SELECT width_bucket({q_col}::float8, {min_val}, {upper}, {bins}) AS bucket, " + f"COUNT(*) AS cnt " + f"FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter} " + f"GROUP BY bucket ORDER BY bucket" + ) + + with conn.cursor() as cur: + cur.execute(query) + rows = cur.fetchall() + + counts = [0] * bins + for bucket, cnt in rows: + if 1 <= bucket <= bins: + counts[bucket - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _sql_categorical_stats( + conn, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_filter}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" {q_col}::text AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + with conn.cursor() as cur: + cur.execute(query) + rows = cur.fetchall() + + if not rows: + return empty_categorical_metric(col_name) + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +# ------------------------------------------------------------------ # +# Monitoring metrics storage helpers +# ------------------------------------------------------------------ # + + +def _mon_upsert( + pg_config: PostgreSQLConfig, + table: str, + columns: List[str], + pk_columns: List[str], + rows: List[Dict[str, Any]], +) -> None: + import json as _json + + non_pk = [c for c in columns if c not in pk_columns] + col_ids = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + placeholders = sql.SQL(", ").join(sql.Placeholder() for _ in columns) + update_clause = sql.SQL(", ").join( + sql.SQL("{} = EXCLUDED.{}").format(sql.Identifier(c), sql.Identifier(c)) + for c in non_pk + ) + pk_ids = sql.SQL(", ").join(sql.Identifier(c) for c in pk_columns) + + query = sql.SQL( + "INSERT INTO {} ({}) VALUES ({}) ON CONFLICT ({}) DO UPDATE SET {}" + ).format(sql.Identifier(table), col_ids, placeholders, pk_ids, update_clause) + + with _get_conn(pg_config) as conn, conn.cursor() as cur: + for row in rows: + values = [] + for col in columns: + val = row.get(col) + if col == "histogram" and val is not None: + val = _json.dumps(val) + values.append(val) + cur.execute(query, values) + conn.commit() + + +def _mon_query( + pg_config: PostgreSQLConfig, + metric_type: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional["date"] = None, + end_date: Optional["date"] = None, +) -> List[Dict[str, Any]]: + table, _, _ = monitoring_table_meta(metric_type) + + conditions: list = [] + params: list = [] + + if project: + conditions.append(sql.SQL("project_id = %s")) + params.append(project) + + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(sql.SQL("{} = %s").format(sql.Identifier(key))) + params.append(value) + + if start_date: + conditions.append(sql.SQL("metric_date >= %s")) + params.append(start_date) + if end_date: + conditions.append(sql.SQL("metric_date <= %s")) + params.append(end_date) + + col_ids = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + where = sql.SQL(" AND ").join(conditions) if conditions else sql.SQL("TRUE") + order_col = "metric_date" if "metric_date" in columns else "job_id" + query = sql.SQL("SELECT {} FROM {} WHERE {} ORDER BY {} ASC").format( + col_ids, + sql.Identifier(table), + where, + sql.Identifier(order_col), + ) + + with _get_conn(pg_config) as conn, conn.cursor() as cur: + conn.read_only = True + cur.execute(query, params) + rows = cur.fetchall() + + results = [] + for row in rows: + record = dict(zip(columns, row)) + results.append(normalize_monitoring_row(record)) + + return results diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index 3fc675ea402..9ffcea1d3d0 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -1,9 +1,11 @@ +import json import os import tempfile import uuid import warnings from dataclasses import asdict, dataclass -from datetime import datetime, timezone +from datetime import date, datetime, timezone +from datetime import time as dt_time from typing import ( TYPE_CHECKING, Any, @@ -50,6 +52,17 @@ ) from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + MON_TABLE_JOB, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage from feast.type_map import spark_schema_to_np_dtypes @@ -423,6 +436,494 @@ def pull_all_from_table_or_query( config=config, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + assert isinstance(data_source, SparkSource) + + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + quote_fields=False, + ) + ts_clause = ts_filter if ts_filter else "1=1" + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + if numeric_features: + results.extend( + _spark_sql_numeric_stats( + spark_session, + from_expression, + numeric_features, + ts_clause, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _spark_sql_categorical_stats( + spark_session, + from_expression, + col_name, + ts_clause, + top_n, + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + assert isinstance(data_source, SparkSource) + + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + from_expression = data_source.get_table_query_string() + q_ts = f"`{timestamp_field}`" + sql = f"SELECT MAX({q_ts}) AS max_ts FROM {from_expression} AS _src" + row = spark_session.sql(sql).collect() + if not row or row[0][0] is None: + return None + val = row[0][0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, dt_time.min, tzinfo=timezone.utc) + return pandas.to_datetime(val, utc=True).to_pydatetime() + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + for stmt in _SPARK_MONITORING_DDL_STATEMENTS: + spark_session.sql(stmt) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + table, columns, pk_columns = monitoring_table_meta(metric_type) + pdf_new = pd.DataFrame([{c: m.get(c) for c in columns} for m in metrics]) + pdf_new = _spark_normalize_histogram_column(pdf_new) + + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + if spark_session.catalog.tableExists(table): + pdf_old = spark_session.table(table).toPandas() + pdf_merged = _spark_pandas_upsert(pdf_old, pdf_new, pk_columns) + else: + pdf_merged = pdf_new + + spark_session.createDataFrame(pdf_merged).write.mode("overwrite").saveAsTable( + table + ) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + table, columns, _ = monitoring_table_meta(metric_type) + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + if not spark_session.catalog.tableExists(table): + return [] + + from pyspark.sql import functions as F + + df = spark_session.table(table) + if project: + df = df.filter(F.col("project_id") == project) + if filters: + for key, value in filters.items(): + if value is not None: + df = df.filter(F.col(key) == value) + if start_date is not None and "metric_date" in df.columns: + df = df.filter(F.col("metric_date") >= F.lit(start_date)) + if end_date is not None and "metric_date" in df.columns: + df = df.filter(F.col("metric_date") <= F.lit(end_date)) + + order_col = "metric_date" if "metric_date" in df.columns else "job_id" + rows = df.orderBy(order_col).collect() + return _spark_rows_to_metric_dicts(rows, columns) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + if not spark_session.catalog.tableExists(MON_TABLE_FEATURE): + return + + pdf = spark_session.table(MON_TABLE_FEATURE).toPandas() + if pdf is None: + return + mask = (pdf["project_id"] == project) & (pdf["is_baseline"] == True) # noqa: E712 + if feature_view_name is not None: + mask &= pdf["feature_view_name"] == feature_view_name + if feature_name is not None: + mask &= pdf["feature_name"] == feature_name + if data_source_type is not None: + mask &= pdf["data_source_type"] == data_source_type + + pdf.loc[mask, "is_baseline"] = False + spark_session.createDataFrame(pdf).write.mode("overwrite").saveAsTable( + MON_TABLE_FEATURE + ) + + +_SPARK_MONITORING_DDL_STATEMENTS = [ + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + feature_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOLEAN NOT NULL, + feature_type STRING NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE, + mean DOUBLE, + stddev DOUBLE, + min_val DOUBLE, + max_val DOUBLE, + p50 DOUBLE, + p75 DOUBLE, + p90 DOUBLE, + p95 DOUBLE, + p99 DOUBLE, + histogram STRING +) USING PARQUET +""", + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOLEAN NOT NULL, + total_row_count BIGINT, + total_features INT, + features_with_nulls INT, + avg_null_rate DOUBLE, + max_null_rate DOUBLE +) USING PARQUET +""", + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} ( + project_id STRING NOT NULL, + feature_service_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOLEAN NOT NULL, + total_feature_views INT, + total_features INT, + avg_null_rate DOUBLE, + max_null_rate DOUBLE +) USING PARQUET +""", + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_JOB} ( + job_id STRING NOT NULL, + project_id STRING NOT NULL, + feature_view_name STRING, + job_type STRING NOT NULL, + status STRING NOT NULL, + parameters STRING, + metric_date DATE NOT NULL, + started_at TIMESTAMP, + completed_at TIMESTAMP, + error_message STRING, + result_summary STRING +) USING PARQUET +""", +] + + +def _spark_normalize_histogram_column(pdf: pd.DataFrame) -> pd.DataFrame: + if "histogram" not in pdf.columns: + return pdf + out = pdf.copy() + + def _ser(x: Any) -> Any: + if x is None: + return None + if isinstance(x, str): + return x + return json.dumps(x) + + out["histogram"] = out["histogram"].map(_ser) + return out + + +def _spark_pandas_upsert( + pdf_old: pd.DataFrame, + pdf_new: pd.DataFrame, + pk_columns: List[str], +) -> pd.DataFrame: + if pdf_old.empty: + return pdf_new + old_idx = pdf_old.set_index(pk_columns) + new_idx = pdf_new.set_index(pk_columns) + kept = old_idx.loc[~old_idx.index.isin(new_idx.index)] + kept_df = kept.reset_index() + return pd.concat([kept_df, pdf_new], ignore_index=True) + + +def _spark_sql_numeric_stats( + spark_session: SparkSession, + from_expression: str, + feature_names: List[str], + ts_clause: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f"`{col}`" + c = f"CAST({q} AS DOUBLE)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_APPROX({c}, 0.50)", + f"PERCENTILE_APPROX({c}, 0.75)", + f"PERCENTILE_APPROX({c}, 0.90)", + f"PERCENTILE_APPROX({c}, 0.95)", + f"PERCENTILE_APPROX({c}, 0.99)", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_clause}" + ) + rows = spark_session.sql(query).collect() + if not rows or rows[0] is None: + return [empty_numeric_metric(n) for n in feature_names] + + row = rows[0] + row_count = int(row[0] or 0) + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = int(row[base] or 0) + null_count = row_count - non_null + + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _spark_sql_numeric_histogram( + spark_session, + from_expression, + col, + ts_clause, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _spark_sql_numeric_histogram( + spark_session: SparkSession, + from_expression: str, + col_name: str, + ts_clause: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + + if min_val == max_val: + sql = ( + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_clause}" + ) + cnt = int(spark_session.sql(sql).collect()[0][0] or 0) + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + cast_col = f"CAST({q_col} AS DOUBLE)" + inner = ( + f"CASE WHEN {min_val} = {max_val} THEN 1 " + f"ELSE LEAST(GREATEST(FLOOR(({cast_col} - {min_val}) / {bin_width}) + 1, 1), {bins}) " + f"END AS bucket" + ) + + query = ( + f"SELECT bucket, COUNT(*) AS cnt FROM (" + f" SELECT {inner} " + f" FROM {from_expression} AS _src " + f" WHERE {q_col} IS NOT NULL AND {ts_clause}" + f") AS _b WHERE bucket IS NOT NULL " + f"GROUP BY bucket ORDER BY bucket" + ) + hrows = spark_session.sql(query).collect() + counts = [0] * bins + for hr in hrows: + bucket = int(hr[0] or 0) + cnt = int(hr[1] or 0) + if 1 <= bucket <= bins: + counts[bucket - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _spark_sql_categorical_stats( + spark_session: SparkSession, + from_expression: str, + col_name: str, + ts_clause: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_clause}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" CAST({q_col} AS STRING) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + rows = spark_session.sql(query).collect() + if not rows: + return empty_categorical_metric(col_name) + + row_count = int(rows[0][0] or 0) + null_count = int(rows[0][1] or 0) + unique_count = int(rows[0][2] or 0) + + top_entries = [{"value": r[3], "count": int(r[4] or 0)} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _spark_rows_to_metric_dicts( + rows: List[Any], + columns: List[str], +) -> List[Dict[str, Any]]: + out: List[Dict[str, Any]] = [] + for r in rows: + d = r.asDict() + rec = {c: d.get(c) for c in columns} + out.append(normalize_monitoring_row(rec)) + return out + class SparkRetrievalJob(RetrievalJob): def __init__( diff --git a/sdk/python/feast/infra/offline_stores/dask.py b/sdk/python/feast/infra/offline_stores/dask.py index 3e640ce5af0..cc7b9dbe4df 100644 --- a/sdk/python/feast/infra/offline_stores/dask.py +++ b/sdk/python/feast/infra/offline_stores/dask.py @@ -1,15 +1,18 @@ +import json import os import uuid -from datetime import datetime, timezone +from datetime import date, datetime, timezone from pathlib import Path from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union import dask import dask.dataframe as dd +import numpy as np import pandas as pd import pyarrow +import pyarrow.compute as pc import pyarrow.dataset -import pyarrow.parquet +import pyarrow.parquet as pq import pytz from feast.data_source import DataSource @@ -34,6 +37,18 @@ get_pyarrow_schema_from_batch_source, ) from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + FEATURE_METRICS_COLUMNS, + FEATURE_METRICS_PK, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, + JOB_COLUMNS, + JOB_PK, + normalize_monitoring_row, + opt_float, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -582,6 +597,398 @@ def offline_write_batch( writer.write_table(new_table) writer.close() + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + table = _dask_read_batch_arrow(data_source, config.repo_path) + table = _dask_filter_arrow_by_timestamp( + table, timestamp_field, start_date, end_date + ) + + results: List[Dict[str, Any]] = [] + for name, ftype in feature_columns: + if name not in table.column_names: + continue + col = table[name] + if ftype == "numeric": + m = _dask_compute_numeric_metrics(col, histogram_bins) + elif ftype == "categorical": + m = _dask_compute_categorical_metrics(col, top_n) + else: + continue + m["feature_name"] = name + results.append(m) + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + absolute_path = FileSource.get_uri_for_file_path( + repo_path=config.repo_path, + uri=data_source.file_options.uri, + ) + filesystem, path = FileSource.create_filesystem_and_path( + str(absolute_path), data_source.file_options.s3_endpoint_override + ) + try: + t = pq.read_table(path, filesystem=filesystem, columns=[timestamp_field]) + except Exception: + return None + if t.num_rows == 0: + return None + arr = t[timestamp_field] + mx = pc.max(arr) # type: ignore[attr-defined] + val = mx.as_py() + if val is None: + return None + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return None + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + base = os.path.join(_dask_monitoring_base(config), _DASK_MON_DIR) + os.makedirs(base, exist_ok=True) + + tables = [ + (_DASK_FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS), + (_DASK_VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS), + (_DASK_SERVICE_METRICS_FILE, FEATURE_SERVICE_METRICS_COLUMNS), + (_DASK_JOB_FILE, JOB_COLUMNS), + ] + for fname, columns in tables: + fpath = _dask_monitoring_path(config, fname) + if not os.path.isfile(fpath): + os.makedirs(os.path.dirname(fpath), exist_ok=True) + pd.DataFrame(columns=columns).to_parquet(fpath, index=False) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + + fname, columns, pk = _dask_mon_table_meta(metric_type) + path = _dask_monitoring_path(config, fname) + _dask_parquet_upsert(path, columns, pk, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + + fname, columns, _ = _dask_mon_table_meta(metric_type) + path = _dask_monitoring_path(config, fname) + return _dask_parquet_query( + path, columns, project, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + + path = _dask_monitoring_path(config, _DASK_FEATURE_METRICS_FILE) + tab = _dask_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return + + df = tab.to_pandas() + mask = df["project_id"] == project + if feature_view_name is not None: + mask = mask & (df["feature_view_name"] == feature_view_name) + if feature_name is not None: + mask = mask & (df["feature_name"] == feature_name) + if data_source_type is not None: + mask = mask & (df["data_source_type"] == data_source_type) + mask = mask & (df["is_baseline"].isin([True, 1])) + df.loc[mask, "is_baseline"] = False + pq.write_table(pyarrow.Table.from_pandas(df, preserve_index=False), path) + + +_DASK_MON_DIR = "feast_monitoring" +_DASK_FEATURE_METRICS_FILE = "feature_metrics.parquet" +_DASK_VIEW_METRICS_FILE = "feature_view_metrics.parquet" +_DASK_SERVICE_METRICS_FILE = "feature_service_metrics.parquet" +_DASK_JOB_FILE = "jobs.parquet" + + +def _dask_monitoring_base(config: RepoConfig) -> str: + base = config.repo_path + return str(base) if base else "." + + +def _dask_monitoring_path(config: RepoConfig, filename: str) -> str: + return os.path.join(_dask_monitoring_base(config), _DASK_MON_DIR, filename) + + +def _dask_mon_table_meta(metric_type: str): + if metric_type == "feature": + return _DASK_FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK + if metric_type == "feature_view": + return ( + _DASK_VIEW_METRICS_FILE, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, + ) + if metric_type == "feature_service": + return ( + _DASK_SERVICE_METRICS_FILE, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + ) + if metric_type == "job": + return _DASK_JOB_FILE, JOB_COLUMNS, JOB_PK + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _dask_read_parquet_if_exists(path: str) -> Optional[pyarrow.Table]: + if not os.path.isfile(path): + return None + return pq.read_table(path) + + +def _dask_read_batch_arrow( + data_source: FileSource, repo_path: Optional[Path] +) -> pyarrow.Table: + absolute_path = FileSource.get_uri_for_file_path( + repo_path=repo_path, + uri=data_source.file_options.uri, + ) + filesystem, path = FileSource.create_filesystem_and_path( + str(absolute_path), data_source.file_options.s3_endpoint_override + ) + return pq.read_table(path, filesystem=filesystem) + + +def _dask_filter_arrow_by_timestamp( + table: pyarrow.Table, + timestamp_field: str, + start_date: Optional[datetime], + end_date: Optional[datetime], +) -> pyarrow.Table: + if start_date is None and end_date is None: + return table + arr = table[timestamp_field] + mask = None + if start_date is not None: + mask = pc.greater_equal(arr, pyarrow.scalar(start_date)) # type: ignore[attr-defined] + if end_date is not None: + m2 = pc.less_equal(arr, pyarrow.scalar(end_date)) # type: ignore[attr-defined] + mask = m2 if mask is None else pc.and_(mask, m2) # type: ignore[attr-defined] + return table.filter(mask) + + +def _dask_compute_numeric_metrics( + column: pyarrow.ChunkedArray, histogram_bins: int +) -> Dict[str, Any]: + total = len(column) + null_count = column.null_count + result: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(column) # type: ignore[attr-defined] + if len(valid) == 0: + return result + + float_array = pc.cast(valid, pyarrow.float64()) + result["mean"] = opt_float(pc.mean(float_array).as_py()) # type: ignore[attr-defined] + result["stddev"] = opt_float(pc.stddev(float_array, ddof=1).as_py()) # type: ignore[attr-defined] + + min_max = pc.min_max(float_array) # type: ignore[attr-defined] + result["min_val"] = min_max["min"].as_py() + result["max_val"] = min_max["max"].as_py() + + quantiles = pc.quantile(float_array, q=[0.50, 0.75, 0.90, 0.95, 0.99]) # type: ignore[attr-defined] + q_values = quantiles.to_pylist() + result["p50"] = q_values[0] + result["p75"] = q_values[1] + result["p90"] = q_values[2] + result["p95"] = q_values[3] + result["p99"] = q_values[4] + + np_array = float_array.to_numpy() + counts, bin_edges = np.histogram(np_array, bins=histogram_bins) + result["histogram"] = { + "bins": bin_edges.tolist(), + "counts": counts.tolist(), + "bin_width": float(bin_edges[1] - bin_edges[0]) if len(bin_edges) > 1 else 0, + } + + return result + + +def _dask_compute_categorical_metrics( + column: pyarrow.ChunkedArray, top_n: int +) -> Dict[str, Any]: + total = len(column) + null_count = column.null_count + result: Dict[str, Any] = { + "feature_type": "categorical", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(column) # type: ignore[attr-defined] + if len(valid) == 0: + return result + + value_counts = pc.value_counts(valid) # type: ignore[attr-defined] + entries = [ + {"value": vc["values"].as_py(), "count": vc["counts"].as_py()} + for vc in value_counts + ] + entries.sort(key=lambda x: x["count"], reverse=True) + + unique_count = len(entries) + top_entries = entries[:top_n] + other_count = sum(e["count"] for e in entries[top_n:]) + + result["histogram"] = { + "values": top_entries, + "other_count": other_count, + "unique_count": unique_count, + } + + return result + + +def _dask_parquet_upsert( + path: str, + columns: List[str], + pk_cols: List[str], + new_rows: List[Dict[str, Any]], +) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + + prepared: List[Dict[str, Any]] = [] + for row in new_rows: + r = dict(row) + if ( + "histogram" in r + and r["histogram"] is not None + and not isinstance(r["histogram"], str) + ): + r["histogram"] = json.dumps(r["histogram"]) + prepared.append(r) + + new_df = pd.DataFrame(prepared, columns=columns) + existing = _dask_read_parquet_if_exists(path) + if existing is not None: + old_df = existing.to_pandas() + combined = pd.concat([old_df, new_df], ignore_index=True) + else: + combined = new_df + + combined = combined.drop_duplicates(subset=pk_cols, keep="last") + table = pyarrow.Table.from_pandas(combined, preserve_index=False) + pq.write_table(table, path) + + +def _dask_parquet_query( + path: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]], + start_date: Optional[date], + end_date: Optional[date], +) -> List[Dict[str, Any]]: + tab = _dask_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return [] + + df = tab.to_pandas() + if project: + df = df[df["project_id"] == project] + if filters: + for key, value in filters.items(): + if value is not None: + df = df[df[key] == value] + if "metric_date" in df.columns: + if start_date is not None: + df = df[df["metric_date"] >= start_date] + if end_date is not None: + df = df[df["metric_date"] <= end_date] + df = df.sort_values("metric_date", ascending=True) + else: + df = df.sort_values("job_id", ascending=True) + + results = [] + for _, row in df.iterrows(): + record = {c: row.get(c) for c in columns} + normalize_monitoring_row(record) + for key in ("metric_date", "computed_at"): + val = record.get(key) + if ( + val is not None + and not isinstance(val, str) + and hasattr(val, "isoformat") + ): + record[key] = val.isoformat() + results.append(record) + + return results + def _get_entity_df_event_timestamp_range( entity_df: Union[pd.DataFrame, str], diff --git a/sdk/python/feast/infra/offline_stores/duckdb.py b/sdk/python/feast/infra/offline_stores/duckdb.py index e0a69e53c56..7701e306ed3 100644 --- a/sdk/python/feast/infra/offline_stores/duckdb.py +++ b/sdk/python/feast/infra/offline_stores/duckdb.py @@ -1,11 +1,15 @@ +import json import os -from datetime import datetime +from datetime import date, datetime, timezone from pathlib import Path -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +import duckdb import ibis import pandas as pd import pyarrow +import pyarrow as pa +import pyarrow.parquet as pq from ibis.expr.types import Table from pydantic import StrictStr @@ -23,7 +27,22 @@ write_logged_features_ibis, ) from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + FEATURE_METRICS_COLUMNS, + FEATURE_METRICS_PK, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, + JOB_COLUMNS, + JOB_PK, + empty_categorical_metric, + empty_numeric_metric, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig @@ -113,6 +132,336 @@ def _write_data_source( ) +# ------------------------------------------------------------------ # +# DuckDB monitoring (Parquet-backed) +# ------------------------------------------------------------------ # + +MONITORING_DIR = "feast_monitoring" +FEATURE_METRICS_FILE = "feature_metrics.parquet" +VIEW_METRICS_FILE = "feature_view_metrics.parquet" +SERVICE_METRICS_FILE = "feature_service_metrics.parquet" +JOB_METRICS_FILE = "jobs.parquet" + + +def _duckdb_monitoring_base(config: RepoConfig) -> str: + base = config.repo_path + return str(base) if base else "." + + +def _duckdb_monitoring_path(config: RepoConfig, filename: str) -> str: + return os.path.join(_duckdb_monitoring_base(config), MONITORING_DIR, filename) + + +def _duckdb_parquet_from_expression(config: RepoConfig, data_source: FileSource) -> str: + absolute_path = FileSource.get_uri_for_file_path( + repo_path=_duckdb_monitoring_base(config), + uri=data_source.file_options.uri, + ) + return str(absolute_path).replace("'", "''") + + +def _duckdb_quote_ident(name: str) -> str: + return f'"{name}"' + + +def _duckdb_ts_where(ts_filter: str) -> str: + return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1" + + +def _duckdb_numeric_stats( + conn: duckdb.DuckDBPyConnection, + from_expr: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = _duckdb_quote_ident(col) + c = f"CAST({q} AS DOUBLE)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + tw = _duckdb_ts_where(ts_filter) + query = f"SELECT {', '.join(select_parts)} FROM {from_expr} AS _src WHERE {tw}" + row = conn.execute(query).fetchone() + + if row is None: + return [empty_numeric_metric(n) for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _duckdb_numeric_histogram( + conn, + from_expr, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _duckdb_numeric_histogram( + conn: duckdb.DuckDBPyConnection, + from_expr: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = _duckdb_quote_ident(col_name) + + tw = _duckdb_ts_where(ts_filter) + if min_val == max_val: + row = conn.execute( + f"SELECT COUNT(*) FROM {from_expr} AS _src " + f"WHERE {q_col} IS NOT NULL AND {tw}" + ).fetchone() + cnt = row[0] if row else 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + + query = ( + f"SELECT LEAST(FLOOR((CAST({q_col} AS DOUBLE) - {min_val}) / {bin_width}) + 1, {bins}) AS bucket, " + f"COUNT(*) AS cnt " + f"FROM {from_expr} AS _src " + f"WHERE {q_col} IS NOT NULL AND {tw} " + f"GROUP BY bucket ORDER BY bucket" + ) + rows = conn.execute(query).fetchall() + + counts = [0] * bins + for bucket, cnt in rows: + b = int(bucket) + if 1 <= b <= bins: + counts[b - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _duckdb_categorical_stats( + conn: duckdb.DuckDBPyConnection, + from_expr: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = _duckdb_quote_ident(col_name) + + tw = _duckdb_ts_where(ts_filter) + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expr} AS _src WHERE {tw}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" CAST({q_col} AS VARCHAR) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} " + f"ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + rows = conn.execute(query).fetchall() + + if not rows: + return empty_categorical_metric(col_name) + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _duckdb_mon_table_meta(metric_type: str): + if metric_type == "feature": + return FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK + if metric_type == "feature_view": + return VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS, FEATURE_VIEW_METRICS_PK + if metric_type == "feature_service": + return ( + SERVICE_METRICS_FILE, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + ) + if metric_type == "job": + return JOB_METRICS_FILE, JOB_COLUMNS, JOB_PK + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _duckdb_read_parquet_if_exists(path: str) -> Optional[pa.Table]: + if not os.path.isfile(path): + return None + return pq.read_table(path) + + +def _duckdb_parquet_upsert( + path: str, + columns: List[str], + pk_cols: List[str], + new_rows: List[Dict[str, Any]], +) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + + prepared: List[Dict[str, Any]] = [] + for row in new_rows: + r = dict(row) + if ( + "histogram" in r + and r["histogram"] is not None + and not isinstance(r["histogram"], str) + ): + r["histogram"] = json.dumps(r["histogram"]) + prepared.append(r) + + new_df = pd.DataFrame(prepared, columns=columns) + existing = _duckdb_read_parquet_if_exists(path) + if existing is not None: + old_df = existing.to_pandas() + combined = pd.concat([old_df, new_df], ignore_index=True) + else: + combined = new_df + + combined = combined.drop_duplicates(subset=pk_cols, keep="last") + table = pa.Table.from_pandas(combined, preserve_index=False) + pq.write_table(table, path) + + +def _duckdb_parquet_query( + path: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]], + start_date: Optional[date], + end_date: Optional[date], +) -> List[Dict[str, Any]]: + tab = _duckdb_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return [] + + df = tab.to_pandas() + if project: + df = df[df["project_id"] == project] + if filters: + for key, value in filters.items(): + if value is not None: + df = df[df[key] == value] + if "metric_date" in df.columns: + if start_date is not None: + df = df[df["metric_date"] >= start_date] + if end_date is not None: + df = df[df["metric_date"] <= end_date] + df = df.sort_values("metric_date", ascending=True) + else: + df = df.sort_values("job_id", ascending=True) + + results = [] + for _, row in df.iterrows(): + record = {c: row.get(c) for c in columns} + normalize_monitoring_row(record) + for key in ("metric_date", "computed_at"): + val = record.get(key) + if ( + val is not None + and not isinstance(val, str) + and hasattr(val, "isoformat") + ): + record[key] = val.isoformat() + results.append(record) + + return results + + +def _duckdb_sql_from_expression(config: RepoConfig, data_source: FileSource) -> str: + p = _duckdb_parquet_from_expression(config, data_source) + if isinstance(data_source.file_format, ParquetFormat): + return f"read_parquet('{p}')" + if isinstance(data_source.file_format, DeltaFormat): + return f"delta_scan('{p}')" + raise NotImplementedError( + "DuckDB monitoring compute supports Parquet and Delta file sources only." + ) + + class DuckDBOfflineStoreConfig(FeastConfigBaseModel): type: StrictStr = "duckdb" # """ Offline store type selector""" @@ -229,3 +578,157 @@ def write_logged_features( logging_config=logging_config, registry=registry, ) + + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + from_expr = _duckdb_sql_from_expression(config, data_source) + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + cast_style="timestamp", + date_time_separator=" ", + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + conn = duckdb.connect() + if numeric_features: + results.extend( + _duckdb_numeric_stats( + conn, + from_expr, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + for col_name in categorical_features: + results.append( + _duckdb_categorical_stats( + conn, + from_expr, + col_name, + ts_filter, + top_n, + ) + ) + conn.close() + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + from_expr = _duckdb_sql_from_expression(config, data_source) + ts_col = _duckdb_quote_ident(timestamp_field) + conn = duckdb.connect() + row = conn.execute( + f"SELECT MAX({ts_col}) AS m FROM {from_expr} AS _src" + ).fetchone() + conn.close() + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return None + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + base = os.path.join(_duckdb_monitoring_base(config), MONITORING_DIR) + os.makedirs(base, exist_ok=True) + + tables = [ + (FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS), + (VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS), + (SERVICE_METRICS_FILE, FEATURE_SERVICE_METRICS_COLUMNS), + (JOB_METRICS_FILE, JOB_COLUMNS), + ] + for fname, columns in tables: + path = _duckdb_monitoring_path(config, fname) + if not os.path.isfile(path): + os.makedirs(os.path.dirname(path), exist_ok=True) + pd.DataFrame(columns=columns).to_parquet(path, index=False) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + + fname, columns, pk = _duckdb_mon_table_meta(metric_type) + path = _duckdb_monitoring_path(config, fname) + _duckdb_parquet_upsert(path, columns, pk, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + + fname, columns, _ = _duckdb_mon_table_meta(metric_type) + path = _duckdb_monitoring_path(config, fname) + return _duckdb_parquet_query( + path, columns, project, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + + path = _duckdb_monitoring_path(config, FEATURE_METRICS_FILE) + tab = _duckdb_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return + + df = tab.to_pandas() + mask = df["project_id"] == project + if feature_view_name is not None: + mask = mask & (df["feature_view_name"] == feature_view_name) + if feature_name is not None: + mask = mask & (df["feature_name"] == feature_name) + if data_source_type is not None: + mask = mask & (df["data_source_type"] == data_source_type) + mask = mask & (df["is_baseline"].isin([True, 1])) + df.loc[mask, "is_baseline"] = False + pq.write_table(pa.Table.from_pandas(df, preserve_index=False), path) diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 4ae0c680c3b..816b8f454af 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +import time import warnings from abc import ABC -from datetime import datetime +from datetime import date, datetime, timedelta, timezone from pathlib import Path from typing import ( TYPE_CHECKING, @@ -70,6 +72,21 @@ def __init__( self.max_event_timestamp = max_event_timestamp +def _extract_retrieval_metadata(job: "RetrievalJob") -> tuple: + """Return ``(feature_view_names, feature_count)`` from a RetrievalJob's metadata.""" + try: + meta = job.metadata + if meta: + feature_count = len(meta.features) + feature_views = list( + {ref.split(":")[0] for ref in meta.features if ":" in ref} + ) + return feature_views, feature_count + except (NotImplementedError, AttributeError): + pass + return [], 0 + + class RetrievalJob(ABC): """A RetrievalJob manages the execution of a query to retrieve data from the offline store.""" @@ -152,7 +169,51 @@ def to_arrow( validation_reference (optional): The validation to apply against the retrieved dataframe. timeout (optional): The query timeout if applicable. """ - features_table = self._to_arrow_internal(timeout=timeout) + start_wall = time.monotonic() + status_label = "success" + row_count = 0 + try: + features_table = self._to_arrow_internal(timeout=timeout) + row_count = features_table.num_rows + except Exception: + status_label = "error" + raise + finally: + try: + from feast import metrics as feast_metrics + + elapsed = time.monotonic() - start_wall + + if feast_metrics._config.offline_features: + feast_metrics.offline_store_request_total.labels( + method="to_arrow", status=status_label + ).inc() + feast_metrics.offline_store_request_latency_seconds.labels( + method="to_arrow" + ).observe(elapsed) + feast_metrics.offline_store_row_count.labels( + method="to_arrow" + ).observe(row_count) + + if feast_metrics._config.audit_logging: + feature_views, feature_count = _extract_retrieval_metadata(self) + end_dt = datetime.now(tz=timezone.utc) + start_dt = end_dt - timedelta(seconds=elapsed) + feast_metrics.emit_offline_audit_log( + method="to_arrow", + feature_views=feature_views, + feature_count=feature_count, + row_count=row_count, + status=status_label, + start_time=start_dt.isoformat(), + end_time=end_dt.isoformat(), + duration_ms=elapsed * 1000, + ) + except Exception: + logging.getLogger(__name__).debug( + "Failed to record offline store metrics", exc_info=True + ) + if self.on_demand_feature_views: # Build a mapping of ODFV name to requested feature names # This ensures we only return the features that were explicitly requested @@ -559,3 +620,137 @@ def get_table_column_names_and_types_from_data_source( data_source: DataSource object """ return data_source.get_table_column_names_and_types(config=config) + + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + """ + Compute monitoring metrics (stats, percentiles, histograms) directly + in the offline store using its native compute engine. + + Backends that don't support this should leave it unimplemented; + the monitoring service will fall back to Python-based computation. + + Args: + config: The config for the current feature store. + data_source: The data source to compute metrics from. + feature_columns: List of (feature_name, feature_type) where + feature_type is "numeric" or "categorical". + timestamp_field: Column used for time-range filtering. + start_date: Start of the time range. + end_date: End of the time range. + histogram_bins: Number of bins for numeric histograms. + top_n: Number of top values for categorical histograms. + + Returns: + A list of metric dicts, one per feature, matching the format + produced by MetricsCalculator.compute_all. + """ + raise NotImplementedError + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + """ + Return the maximum event timestamp from the data source. + + Used by the monitoring service to determine date ranges for + auto-compute. Backends that don't support this should leave it + unimplemented; the caller will fall back to a full-table scan. + + Args: + config: The config for the current feature store. + data_source: The data source to query. + timestamp_field: The timestamp column name. + + Returns: + The maximum timestamp, or None if no data exists. + """ + raise NotImplementedError + + # ------------------------------------------------------------------ # + # Monitoring metrics storage (native) + # ------------------------------------------------------------------ # + + MONITORING_VALID_GRANULARITIES = ( + "daily", + "weekly", + "biweekly", + "monthly", + "quarterly", + ) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + """Create the monitoring metrics tables if they do not exist. + + Backends that don't support native monitoring storage should + leave this unimplemented; the monitoring service will raise an + error indicating the backend lacks storage support. + """ + raise NotImplementedError + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + """Persist monitoring metrics (upsert semantics). + + Args: + config: The config for the current feature store. + metric_type: One of "feature", "feature_view", "feature_service". + metrics: List of metric dicts to upsert. + """ + raise NotImplementedError + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + """Read monitoring metrics with optional filtering. + + Args: + config: The config for the current feature store. + project: Feast project name. + metric_type: One of "feature", "feature_view", "feature_service". + filters: Column-value pairs for WHERE clauses. + start_date: Inclusive lower bound on metric_date. + end_date: Inclusive upper bound on metric_date. + + Returns: + List of metric dicts ordered by metric_date ascending. + """ + raise NotImplementedError + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + """Set is_baseline=FALSE for matching feature metric rows. + + Used to ensure only one baseline exists per feature before + writing a new baseline. + """ + raise NotImplementedError diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 900dfcfab80..ec708ccf798 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -1,6 +1,7 @@ import contextlib +import json import uuid -from datetime import datetime, timezone +from datetime import date, datetime, timezone from pathlib import Path from typing import ( Any, @@ -40,6 +41,17 @@ ) from feast.infra.registry.base_registry import BaseRegistry from feast.infra.utils import aws_utils +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + MON_TABLE_JOB, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -378,6 +390,584 @@ def offline_write_batch( fail_if_exists=False, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + assert isinstance(data_source, RedshiftSource) + + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + ) + ts_clause = ts_filter if ts_filter else "1=1" + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + if numeric_features: + results.extend( + _redshift_sql_numeric_stats( + config, + from_expression, + numeric_features, + ts_clause, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _redshift_sql_categorical_stats( + config, from_expression, col_name, ts_clause, top_n + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + assert isinstance(data_source, RedshiftSource) + + from_expression = data_source.get_table_query_string() + q_ts = f'"{timestamp_field}"' + sql = f"SELECT MAX({q_ts}) AS max_ts FROM {from_expression} AS _src" + rows = _redshift_execute_fetch_rows(config, sql) + if not rows or not rows[0]: + return None + val = _redshift_field_value(rows[0][0]) + if val is None: + return None + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return parser.parse(str(val)) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + for stmt in _REDSHIFT_MONITORING_DDL_STATEMENTS: + _redshift_execute_statement(config, stmt) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + table, columns, pk_columns = monitoring_table_meta(metric_type) + for row in metrics: + _redshift_merge_metric_row(config, table, columns, pk_columns, row) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + _, columns, _ = monitoring_table_meta(metric_type) + return _redshift_mon_query( + config, metric_type, columns, project, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + parts = [ + f"project_id = {_redshift_sql_literal(project)}", + "is_baseline = TRUE", + ] + if feature_view_name is not None: + parts.append( + f"feature_view_name = {_redshift_sql_literal(feature_view_name)}" + ) + if feature_name is not None: + parts.append(f"feature_name = {_redshift_sql_literal(feature_name)}") + if data_source_type is not None: + parts.append( + f"data_source_type = {_redshift_sql_literal(data_source_type)}" + ) + where_sql = " AND ".join(parts) + sql = f"UPDATE {MON_TABLE_FEATURE} SET is_baseline = FALSE WHERE {where_sql}" + _redshift_execute_statement(config, sql) + + +_REDSHIFT_MONITORING_DDL_STATEMENTS = [ + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + feature_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + feature_type VARCHAR(50) NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE PRECISION, + mean DOUBLE PRECISION, + stddev DOUBLE PRECISION, + min_val DOUBLE PRECISION, + max_val DOUBLE PRECISION, + p50 DOUBLE PRECISION, + p75 DOUBLE PRECISION, + p90 DOUBLE PRECISION, + p95 DOUBLE PRECISION, + p99 DOUBLE PRECISION, + histogram VARCHAR(65535), + PRIMARY KEY (project_id, feature_view_name, feature_name, + metric_date, granularity, data_source_type) +); +""", + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_row_count BIGINT, + total_features INTEGER, + features_with_nulls INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_view_name, metric_date, + granularity, data_source_type) +); +""", + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} ( + project_id VARCHAR(255) NOT NULL, + feature_service_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_feature_views INTEGER, + total_features INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_service_name, metric_date, + granularity, data_source_type) +); +""", + f""" +CREATE TABLE IF NOT EXISTS {MON_TABLE_JOB} ( + job_id VARCHAR(36) NOT NULL, + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255), + job_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'pending', + parameters VARCHAR(65535), + metric_date DATE NOT NULL, + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + error_message VARCHAR(65535), + result_summary VARCHAR(65535), + PRIMARY KEY (job_id) +); +""", +] + + +def _redshift_execute_statement(config: RepoConfig, sql: str) -> str: + client = aws_utils.get_redshift_data_client(config.offline_store.region) + return aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + sql, + ) + + +def _redshift_get_statement_pages( + client: Any, statement_id: str +) -> Tuple[List[Dict[str, Any]], List[List[Dict[str, Any]]]]: + column_metadata: List[Dict[str, Any]] = [] + all_records: List[List[Dict[str, Any]]] = [] + next_token: Optional[str] = None + while True: + kwargs: Dict[str, Any] = {"Id": statement_id} + if next_token: + kwargs["NextToken"] = next_token + resp = client.get_statement_result(**kwargs) + if not column_metadata: + column_metadata = resp.get("ColumnMetadata", []) + all_records.extend(resp.get("Records", [])) + next_token = resp.get("NextToken") + if not next_token: + break + return column_metadata, all_records + + +def _redshift_execute_fetch_rows( + config: RepoConfig, sql: str +) -> List[List[Dict[str, Any]]]: + client = aws_utils.get_redshift_data_client(config.offline_store.region) + sid = aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + sql, + ) + _, records = _redshift_get_statement_pages(client, sid) + return records + + +def _redshift_field_value(field: Dict[str, Any]) -> Any: + if field.get("isNull"): + return None + if "stringValue" in field: + return field["stringValue"] + if "longValue" in field: + return field["longValue"] + if "doubleValue" in field: + return field["doubleValue"] + if "booleanValue" in field: + return field["booleanValue"] + return None + + +def _redshift_sql_literal(val: Any) -> str: + if val is None: + return "NULL" + if isinstance(val, bool): + return "TRUE" if val else "FALSE" + if isinstance(val, (int, float)) and not isinstance(val, bool): + return str(val) + if isinstance(val, date) and not isinstance(val, datetime): + return f"DATE '{val.isoformat()}'" + if isinstance(val, datetime): + s = val.isoformat(sep=" ", timespec="seconds") + return f"TIMESTAMP '{s}'" + s = str(val).replace("'", "''") + return f"'{s}'" + + +def _redshift_merge_metric_row( + config: RepoConfig, + table: str, + columns: List[str], + pk_columns: List[str], + row: Dict[str, Any], +) -> None: + non_pk = [c for c in columns if c not in pk_columns] + client = aws_utils.get_redshift_data_client(config.offline_store.region) + + select_parts = ", ".join( + f"{_redshift_sql_literal_for_column(c, row.get(c))} AS {c}" for c in columns + ) + on_clause = " AND ".join(f"t.{c} = s.{c}" for c in pk_columns) + update_set = ", ".join(f"{c} = s.{c}" for c in non_pk) + insert_cols = ", ".join(columns) + insert_vals = ", ".join(f"s.{c}" for c in columns) + + merge_sql = f""" +MERGE INTO {table} AS t +USING (SELECT {select_parts}) AS s +ON {on_clause} +WHEN MATCHED THEN UPDATE SET {update_set} +WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals}) +""".strip() + aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + merge_sql, + ) + + +def _redshift_sql_literal_for_column(column: str, val: Any) -> str: + if val is None: + return "NULL" + if column == "histogram" and val is not None: + dumped = json.dumps(val).replace("'", "''") + return f"'{dumped}'" + return _redshift_sql_literal(val) + + +def _redshift_mon_query( + config: RepoConfig, + metric_type: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]], + start_date: Optional[date], + end_date: Optional[date], +) -> List[Dict[str, Any]]: + table, _, _ = monitoring_table_meta(metric_type) + conditions: list = [] + if project: + conditions.append(f"project_id = {_redshift_sql_literal(project)}") + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(f'"{key}" = {_redshift_sql_literal(value)}') + if start_date: + conditions.append(f"metric_date >= DATE '{start_date.isoformat()}'") + if end_date: + conditions.append(f"metric_date <= DATE '{end_date.isoformat()}'") + where_sql = " AND ".join(conditions) if conditions else "TRUE" + col_sql = ", ".join(f'"{c}"' for c in columns) + order_col = "metric_date" if "metric_date" in columns else "job_id" + sql = ( + f'SELECT {col_sql} FROM "{table}" WHERE {where_sql} ORDER BY "{order_col}" ASC' + ) + + client = aws_utils.get_redshift_data_client(config.offline_store.region) + sid = aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + sql, + ) + meta, rows = _redshift_get_statement_pages(client, sid) + col_names = [c["name"] for c in meta] + out: List[Dict[str, Any]] = [] + for rec in rows: + record = {col_names[i]: _redshift_field_value(rec[i]) for i in range(len(rec))} + out.append(normalize_monitoring_row(record)) + return out + + +def _redshift_sql_numeric_stats( + config: RepoConfig, + from_expression: str, + feature_names: List[str], + ts_clause: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f'"{col}"' + c = f"CAST({q} AS DOUBLE PRECISION)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_clause}" + ) + rows = _redshift_execute_fetch_rows(config, query) + if not rows or not rows[0]: + return [empty_numeric_metric(n) for n in feature_names] + + row = rows[0] + row_count = int(_redshift_field_value(row[0]) or 0) + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = int(_redshift_field_value(row[base]) or 0) + null_count = row_count - non_null + + min_val = opt_float(_redshift_field_value(row[base + 3])) + max_val = opt_float(_redshift_field_value(row[base + 4])) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(_redshift_field_value(row[base + 1])), + "stddev": opt_float(_redshift_field_value(row[base + 2])), + "min_val": min_val, + "max_val": max_val, + "p50": opt_float(_redshift_field_value(row[base + 5])), + "p75": opt_float(_redshift_field_value(row[base + 6])), + "p90": opt_float(_redshift_field_value(row[base + 7])), + "p95": opt_float(_redshift_field_value(row[base + 8])), + "p99": opt_float(_redshift_field_value(row[base + 9])), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _redshift_sql_numeric_histogram( + config, + from_expression, + col, + ts_clause, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _redshift_sql_numeric_histogram( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_clause: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + if min_val == max_val: + sql = ( + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_clause}" + ) + r = _redshift_execute_fetch_rows(config, sql) + cnt = int(_redshift_field_value(r[0][0]) or 0) if r and r[0] else 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + cast_col = f"CAST({q_col} AS DOUBLE PRECISION)" + + inner = ( + f"CASE WHEN {min_val} = {max_val} THEN 1 " + f"ELSE LEAST(GREATEST(FLOOR(({cast_col} - {min_val}) / {bin_width}) + 1, 1), {bins}) " + f"END AS bucket" + ) + + query = ( + f"SELECT bucket, COUNT(*) AS cnt FROM (" + f" SELECT {inner} " + f" FROM {from_expression} AS _src " + f" WHERE {q_col} IS NOT NULL AND {ts_clause}" + f") AS _b WHERE bucket IS NOT NULL " + f"GROUP BY bucket ORDER BY bucket" + ) + hrows = _redshift_execute_fetch_rows(config, query) + counts = [0] * bins + for hr in hrows: + bucket = int(_redshift_field_value(hr[0]) or 0) + cnt = int(_redshift_field_value(hr[1]) or 0) + if 1 <= bucket <= bins: + counts[bucket - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _redshift_sql_categorical_stats( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_clause: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_clause}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" CAST({q_col} AS VARCHAR(65535)) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + rows = _redshift_execute_fetch_rows(config, query) + if not rows: + return empty_categorical_metric(col_name) + + row_count = int(_redshift_field_value(rows[0][0]) or 0) + null_count = int(_redshift_field_value(rows[0][1]) or 0) + unique_count = int(_redshift_field_value(rows[0][2]) or 0) + + top_entries = [ + { + "value": _redshift_field_value(r[3]), + "count": int(_redshift_field_value(r[4]) or 0), + } + for r in rows + ] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + class RedshiftRetrievalJob(RetrievalJob): def __init__( diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 7226c908d13..a369d23bec2 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -3,7 +3,8 @@ import os import uuid import warnings -from datetime import datetime, timezone +from datetime import date, datetime, timezone +from decimal import Decimal from functools import reduce from pathlib import Path from typing import ( @@ -50,6 +51,17 @@ write_pandas, write_parquet, ) +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + MON_TABLE_JOB, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage from feast.types import ( @@ -421,6 +433,249 @@ def offline_write_batch( auto_create_table=True, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + assert isinstance(data_source, SnowflakeSource) + + from_expression = data_source.get_table_query_string() + from_expression = _qualify_snowflake_from_expression( + config, data_source, from_expression + ) + ts_filter = get_timestamp_filter_sql( + start_date, end_date, timestamp_field, tz=timezone.utc + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + with GetSnowflakeConnection(config.offline_store) as conn: + if numeric_features: + results.extend( + _snowflake_sql_numeric_stats( + conn, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _snowflake_sql_categorical_stats( + conn, from_expression, col_name, ts_filter, top_n + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + assert isinstance(data_source, SnowflakeSource) + + from_expression = data_source.get_table_query_string() + from_expression = _qualify_snowflake_from_expression( + config, data_source, from_expression + ) + + with GetSnowflakeConnection(config.offline_store) as conn: + cursor = execute_snowflake_statement( + conn, + f'SELECT MAX("{timestamp_field}") FROM {from_expression} AS _src', + ) + row = cursor.fetchone() + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, pd.Timestamp): + val = val.to_pydatetime() + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + fq_feature = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE) + fq_view = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE_VIEW) + fq_service = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE_SERVICE) + + ddl_feature = f""" + CREATE TABLE IF NOT EXISTS {fq_feature} ( + "project_id" VARCHAR(255) NOT NULL, + "feature_view_name" VARCHAR(255) NOT NULL, + "feature_name" VARCHAR(255) NOT NULL, + "metric_date" DATE NOT NULL, + "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily', + "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch', + "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(), + "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE, + "feature_type" VARCHAR(50) NOT NULL, + "row_count" BIGINT, + "null_count" BIGINT, + "null_rate" DOUBLE, + "mean" DOUBLE, + "stddev" DOUBLE, + "min_val" DOUBLE, + "max_val" DOUBLE, + "p50" DOUBLE, + "p75" DOUBLE, + "p90" DOUBLE, + "p95" DOUBLE, + "p99" DOUBLE, + "histogram" VARIANT, + PRIMARY KEY ("project_id", "feature_view_name", "feature_name", + "metric_date", "granularity", "data_source_type") + ) + """ + ddl_view = f""" + CREATE TABLE IF NOT EXISTS {fq_view} ( + "project_id" VARCHAR(255) NOT NULL, + "feature_view_name" VARCHAR(255) NOT NULL, + "metric_date" DATE NOT NULL, + "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily', + "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch', + "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(), + "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE, + "total_row_count" BIGINT, + "total_features" INTEGER, + "features_with_nulls" INTEGER, + "avg_null_rate" DOUBLE, + "max_null_rate" DOUBLE, + PRIMARY KEY ("project_id", "feature_view_name", "metric_date", + "granularity", "data_source_type") + ) + """ + ddl_service = f""" + CREATE TABLE IF NOT EXISTS {fq_service} ( + "project_id" VARCHAR(255) NOT NULL, + "feature_service_name" VARCHAR(255) NOT NULL, + "metric_date" DATE NOT NULL, + "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily', + "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch', + "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(), + "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE, + "total_feature_views" INTEGER, + "total_features" INTEGER, + "avg_null_rate" DOUBLE, + "max_null_rate" DOUBLE, + PRIMARY KEY ("project_id", "feature_service_name", "metric_date", + "granularity", "data_source_type") + ) + """ + + fq_job = _snowflake_monitoring_table_fqn(config, MON_TABLE_JOB) + ddl_job = f""" + CREATE TABLE IF NOT EXISTS {fq_job} ( + "job_id" VARCHAR(36) NOT NULL, + "project_id" VARCHAR(255) NOT NULL, + "feature_view_name" VARCHAR(255), + "job_type" VARCHAR(50) NOT NULL, + "status" VARCHAR(20) NOT NULL DEFAULT 'pending', + "parameters" VARCHAR, + "metric_date" DATE NOT NULL, + "started_at" TIMESTAMP_TZ, + "completed_at" TIMESTAMP_TZ, + "error_message" VARCHAR, + "result_summary" VARCHAR, + PRIMARY KEY ("job_id") + ) + """ + + with GetSnowflakeConnection(config.offline_store) as conn: + execute_snowflake_statement(conn, ddl_feature) + execute_snowflake_statement(conn, ddl_view) + execute_snowflake_statement(conn, ddl_service) + execute_snowflake_statement(conn, ddl_job) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + table, columns, pk_columns = monitoring_table_meta(metric_type) + _snowflake_mon_merge_upsert( + config.offline_store, table, columns, pk_columns, metrics + ) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + _, columns, _ = monitoring_table_meta(metric_type) + return _snowflake_mon_query( + config.offline_store, + metric_type, + columns, + project, + filters, + start_date, + end_date, + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + fq_table = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE) + conditions = [f'"project_id" = {_snowflake_sql_literal(project)}'] + if feature_view_name: + conditions.append( + f'"feature_view_name" = {_snowflake_sql_literal(feature_view_name)}' + ) + if feature_name: + conditions.append( + f'"feature_name" = {_snowflake_sql_literal(feature_name)}' + ) + if data_source_type: + conditions.append( + f'"data_source_type" = {_snowflake_sql_literal(data_source_type)}' + ) + conditions.append('"is_baseline" = TRUE') + + sql = f'UPDATE {fq_table} SET "is_baseline" = FALSE WHERE ' + " AND ".join( + conditions + ) + + with GetSnowflakeConnection(config.offline_store) as conn: + execute_snowflake_statement(conn, sql) + class SnowflakeRetrievalJob(RetrievalJob): def __init__( @@ -640,6 +895,335 @@ def _get_file_names_from_copy_into(self, cursor, native_export_path) -> List[str ] +# ------------------------------------------------------------------ # +# Snowflake monitoring SQL push-down & storage helpers +# ------------------------------------------------------------------ # + + +def _escape_snowflake_sql_string(value: str) -> str: + return value.replace("'", "''") + + +def _snowflake_sql_literal(val: Any) -> str: + if val is None: + return "NULL" + if isinstance(val, bool): + return "TRUE" if val else "FALSE" + if isinstance(val, (int, float)) and not isinstance(val, bool): + if isinstance(val, float) and (np.isnan(val) or np.isinf(val)): + return "NULL" + return str(val) + if isinstance(val, Decimal): + return str(val) + if isinstance(val, date) and not isinstance(val, datetime): + return f"DATE '{val.isoformat()}'" + if isinstance(val, datetime): + dt = val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return f"TIMESTAMP_TZ '{dt.isoformat()}'" + if isinstance(val, str): + return f"'{_escape_snowflake_sql_string(val)}'" + return f"'{_escape_snowflake_sql_string(str(val))}'" + + +def _qualify_snowflake_from_expression( + config: RepoConfig, + data_source: SnowflakeSource, + from_expression: str, +) -> str: + if not data_source.database and not data_source.schema and data_source.table: + return ( + f'"{config.offline_store.database}"."{config.offline_store.schema_}".' + f"{from_expression}" + ) + if not data_source.database and data_source.schema and data_source.table: + return f'"{config.offline_store.database}".{from_expression}' + return from_expression + + +def _snowflake_monitoring_table_fqn( + config: RepoConfig, + table_name: str, +) -> str: + os = config.offline_store + assert isinstance(os, SnowflakeOfflineStoreConfig) + return f'"{os.database}"."{os.schema_}"."{table_name}"' + + +def _snowflake_sql_numeric_histogram( + conn: SnowflakeConnection, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + if min_val == max_val: + cursor = execute_snowflake_statement( + conn, + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter}", + ) + row = cursor.fetchone() + cnt = (row or (0,))[0] + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + + query = ( + f"SELECT WIDTH_BUCKET(CAST({q_col} AS DOUBLE), {min_val}, {upper}, {bins}) " + f"AS bucket, COUNT(*) AS cnt " + f"FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter} " + f"GROUP BY bucket ORDER BY bucket" + ) + + cursor = execute_snowflake_statement(conn, query) + rows = cursor.fetchall() + + counts = [0] * bins + for bucket, cnt in rows: + if bucket is not None and 1 <= int(bucket) <= bins: + counts[int(bucket) - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _snowflake_sql_numeric_stats( + conn: SnowflakeConnection, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f'"{col}"' + c = f"CAST({q} AS DOUBLE)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_filter}" + ) + + cursor = execute_snowflake_statement(conn, query) + row = cursor.fetchone() + + if row is None: + return [empty_numeric_metric(n) for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _snowflake_sql_numeric_histogram( + conn, + from_expression, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _snowflake_sql_categorical_stats( + conn: SnowflakeConnection, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_filter}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" TO_VARCHAR({q_col}) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + cursor = execute_snowflake_statement(conn, query) + rows = cursor.fetchall() + + if not rows: + return empty_categorical_metric(col_name) + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _snowflake_mon_merge_upsert( + offline_store: SnowflakeOfflineStoreConfig, + table: str, + columns: List[str], + pk_columns: List[str], + rows: List[Dict[str, Any]], +) -> None: + fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"' + non_pk = [c for c in columns if c not in pk_columns] + + with GetSnowflakeConnection(offline_store) as conn: + for row in rows: + select_parts: List[str] = [] + for col in columns: + val = row.get(col) + if col == "histogram": + if val is not None: + json_str = json.dumps(val) + select_parts.append( + f'PARSE_JSON({_snowflake_sql_literal(json_str)}) AS "{col}"' + ) + else: + select_parts.append(f'NULL AS "{col}"') + else: + select_parts.append(f'{_snowflake_sql_literal(val)} AS "{col}"') + + using = ", ".join(select_parts) + on_parts = [f't."{pk}" = s."{pk}"' for pk in pk_columns] + update_parts = [f't."{c}" = s."{c}"' for c in non_pk] + insert_cols = ", ".join(f'"{c}"' for c in columns) + insert_vals = ", ".join(f's."{c}"' for c in columns) + + sql = ( + f"MERGE INTO {fq} AS t " + f"USING (SELECT {using}) AS s " + f"ON {' AND '.join(on_parts)} " + f"WHEN MATCHED THEN UPDATE SET {', '.join(update_parts)} " + f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})" + ) + + execute_snowflake_statement(conn, sql) + + +def _snowflake_mon_query( + offline_store: SnowflakeOfflineStoreConfig, + metric_type: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, +) -> List[Dict[str, Any]]: + table, _, _ = monitoring_table_meta(metric_type) + fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"' + + conditions: List[str] = [] + if project: + conditions.append(f'"project_id" = {_snowflake_sql_literal(project)}') + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(f'"{key}" = {_snowflake_sql_literal(value)}') + + if start_date: + conditions.append(f'"metric_date" >= {_snowflake_sql_literal(start_date)}') + if end_date: + conditions.append(f'"metric_date" <= {_snowflake_sql_literal(end_date)}') + + col_list = ", ".join(f'"{c}"' for c in columns) + where_clause = " AND ".join(conditions) if conditions else "TRUE" + order_col = "metric_date" if "metric_date" in columns else "job_id" + sql = f'SELECT {col_list} FROM {fq} WHERE {where_clause} ORDER BY "{order_col}" ASC' + + with GetSnowflakeConnection(offline_store) as conn: + cursor = execute_snowflake_statement(conn, sql) + rows = cursor.fetchall() + + results: List[Dict[str, Any]] = [] + for row in rows: + record = dict(zip(columns, row)) + results.append(normalize_monitoring_row(record)) + + return results + + def _get_entity_schema( entity_df: Union[pd.DataFrame, str], snowflake_conn: SnowflakeConnection, diff --git a/sdk/python/feast/metrics.py b/sdk/python/feast/metrics.py index 694f25a687e..13a855d587b 100644 --- a/sdk/python/feast/metrics.py +++ b/sdk/python/feast/metrics.py @@ -42,6 +42,7 @@ """ import atexit +import json import logging import os import shutil @@ -51,7 +52,7 @@ from contextlib import contextmanager from dataclasses import dataclass from datetime import datetime, timezone -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional import psutil @@ -123,6 +124,8 @@ class _MetricsFlags: push: bool = False materialization: bool = False freshness: bool = False + offline_features: bool = False + audit_logging: bool = False _config = _MetricsFlags() @@ -144,6 +147,8 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag push=True, materialization=True, freshness=True, + offline_features=True, + audit_logging=False, ) return _MetricsFlags( enabled=True, @@ -153,6 +158,8 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag push=getattr(metrics_config, "push", True), materialization=getattr(metrics_config, "materialization", True), freshness=getattr(metrics_config, "freshness", True), + offline_features=getattr(metrics_config, "offline_features", True), + audit_logging=getattr(metrics_config, "audit_logging", False), ) @@ -260,6 +267,33 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag multiprocess_mode="max", ) +# --------------------------------------------------------------------------- +# Offline store retrieval metrics +# --------------------------------------------------------------------------- +offline_store_request_total = Counter( + "feast_offline_store_request_total", + "Total offline store retrieval requests", + ["method", "status"], +) +offline_store_request_latency_seconds = Histogram( + "feast_offline_store_request_latency_seconds", + "Latency of offline store retrieval operations in seconds", + ["method"], + buckets=(0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0), +) +offline_store_row_count = Histogram( + "feast_offline_store_row_count", + "Number of rows returned by offline store retrieval", + ["method"], + buckets=(100, 1000, 10000, 100000, 500000, 1000000, 5000000), +) + +# --------------------------------------------------------------------------- +# Audit logger — separate from the main feast logger so operators can +# route SOX-style audit entries to a dedicated sink. +# --------------------------------------------------------------------------- +audit_logger = logging.getLogger("feast.audit") + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -388,6 +422,72 @@ def track_materialization( ) +def emit_online_audit_log( + *, + requestor_id: str, + entity_keys: List[str], + entity_count: int, + feature_views: List[str], + feature_count: int, + status: str, + latency_ms: float, +): + """Emit a structured JSON audit log entry for an online feature request.""" + if not _config.audit_logging: + return + audit_logger.info( + _json_dumps( + { + "event": "online_feature_request", + "timestamp": datetime.now(tz=timezone.utc).isoformat(), + "requestor_id": requestor_id, + "entity_keys": entity_keys, + "entity_count": entity_count, + "feature_views": feature_views, + "feature_count": feature_count, + "status": status, + "latency_ms": round(latency_ms, 2), + } + ) + ) + + +def emit_offline_audit_log( + *, + method: str, + feature_views: List[str], + feature_count: int, + row_count: int, + status: str, + start_time: str, + end_time: str, + duration_ms: float, +): + """Emit a structured JSON audit log entry for an offline feature retrieval.""" + if not _config.audit_logging: + return + audit_logger.info( + _json_dumps( + { + "event": "offline_feature_retrieval", + "timestamp": datetime.now(tz=timezone.utc).isoformat(), + "method": method, + "start_time": start_time, + "end_time": end_time, + "feature_views": feature_views, + "feature_count": feature_count, + "row_count": row_count, + "status": status, + "duration_ms": round(duration_ms, 2), + } + ) + ) + + +def _json_dumps(obj: dict) -> str: + return json.dumps(obj, separators=(",", ":")) + + def update_feature_freshness( store: "FeatureStore", ) -> None: @@ -507,6 +607,8 @@ def start_metrics_server( push=True, materialization=True, freshness=True, + offline_features=True, + audit_logging=False, ) from prometheus_client import CollectorRegistry, make_wsgi_app diff --git a/sdk/python/feast/monitoring/__init__.py b/sdk/python/feast/monitoring/__init__.py new file mode 100644 index 00000000000..69a921060a5 --- /dev/null +++ b/sdk/python/feast/monitoring/__init__.py @@ -0,0 +1,7 @@ +from feast.monitoring.dqm_job_manager import DQMJobManager +from feast.monitoring.metrics_calculator import MetricsCalculator + +__all__ = [ + "DQMJobManager", + "MetricsCalculator", +] diff --git a/sdk/python/feast/monitoring/dqm_job_manager.py b/sdk/python/feast/monitoring/dqm_job_manager.py new file mode 100644 index 00000000000..76103a169a7 --- /dev/null +++ b/sdk/python/feast/monitoring/dqm_job_manager.py @@ -0,0 +1,148 @@ +import json +import logging +import uuid +from datetime import date, datetime, timezone +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + +JOB_STATUS_PENDING = "pending" +JOB_STATUS_RUNNING = "running" +JOB_STATUS_COMPLETED = "completed" +JOB_STATUS_FAILED = "failed" + + +class DQMJobManager: + """DQM job manager that persists jobs via the offline store abstraction.""" + + def __init__(self, offline_store, config): + self._offline_store = offline_store + self._config = config + + def ensure_table(self) -> None: + self._offline_store.ensure_monitoring_tables(self._config) + + def submit( + self, + project: str, + job_type: str, + feature_view_name: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ) -> str: + job_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc) + row = { + "job_id": job_id, + "project_id": project, + "feature_view_name": feature_view_name, + "job_type": job_type, + "status": JOB_STATUS_PENDING, + "parameters": json.dumps(parameters) if parameters else None, + "metric_date": now.date(), + "started_at": None, + "completed_at": None, + "error_message": None, + "result_summary": None, + } + self._offline_store.save_monitoring_metrics(self._config, "job", [row]) + return job_id + + def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: + rows = self._offline_store.query_monitoring_metrics( + config=self._config, + project="", + metric_type="job", + filters={"job_id": job_id}, + ) + if not rows: + return None + record = rows[0] + for key in ("parameters", "result_summary"): + val = record.get(key) + if isinstance(val, str): + try: + record[key] = json.loads(val) + except (json.JSONDecodeError, TypeError): + pass + return record + + def update_status( + self, + job_id: str, + status: str, + error_message: Optional[str] = None, + result_summary: Optional[Dict[str, Any]] = None, + ) -> None: + job = self.get_job(job_id) + if job is None: + return + + now = datetime.now(timezone.utc) + job["status"] = status + + if status == JOB_STATUS_RUNNING: + job["started_at"] = now + elif status in (JOB_STATUS_COMPLETED, JOB_STATUS_FAILED): + job["completed_at"] = now + + if error_message is not None: + job["error_message"] = error_message + if result_summary is not None: + job["result_summary"] = json.dumps(result_summary) + + if "parameters" in job and not isinstance(job["parameters"], str): + job["parameters"] = ( + json.dumps(job["parameters"]) if job["parameters"] else None + ) + + if isinstance(job.get("metric_date"), str): + job["metric_date"] = date.fromisoformat(job["metric_date"]) + + self._offline_store.save_monitoring_metrics(self._config, "job", [job]) + + def execute_job(self, job_id: str, monitoring_service) -> Dict[str, Any]: + """Execute a DQM job synchronously. Manages status transitions.""" + job = self.get_job(job_id) + if job is None: + raise ValueError(f"Failed to find DQM job '{job_id}'") + + self.update_status(job_id, JOB_STATUS_RUNNING) + + try: + params = job.get("parameters") or {} + job_type = job["job_type"] + project = job["project_id"] + + if job_type == "auto_compute": + result = monitoring_service.auto_compute( + project=project, + feature_view_name=job.get("feature_view_name"), + ) + elif job_type == "baseline": + result = monitoring_service.compute_baseline( + project=project, + feature_view_name=job.get("feature_view_name"), + feature_names=params.get("feature_names"), + ) + elif job_type == "compute": + result = monitoring_service.compute_metrics( + project=project, + feature_view_name=job.get("feature_view_name"), + feature_names=params.get("feature_names"), + start_date=date.fromisoformat(params["start_date"]) + if params.get("start_date") + else None, + end_date=date.fromisoformat(params["end_date"]) + if params.get("end_date") + else None, + granularity=params.get("granularity", "daily"), + ) + else: + raise ValueError(f"Unknown job type '{job_type}'") + + self.update_status(job_id, JOB_STATUS_COMPLETED, result_summary=result) + return result + + except Exception as e: + self.update_status(job_id, JOB_STATUS_FAILED, error_message=str(e)) + raise diff --git a/sdk/python/feast/monitoring/metrics_calculator.py b/sdk/python/feast/monitoring/metrics_calculator.py new file mode 100644 index 00000000000..1b8b3b3e7ca --- /dev/null +++ b/sdk/python/feast/monitoring/metrics_calculator.py @@ -0,0 +1,187 @@ +import logging +import math +from typing import Dict, List, Optional, Tuple + +import numpy as np +import pyarrow as pa +import pyarrow.compute as pc + +from feast.types import PrimitiveFeastType + +logger = logging.getLogger(__name__) + + +def _safe_float(val): + """Return None for None/NaN/Inf, otherwise float.""" + if val is None: + return None + f = float(val) + if math.isnan(f) or math.isinf(f): + return None + return f + + +_NUMERIC_TYPES = { + PrimitiveFeastType.INT32, + PrimitiveFeastType.INT64, + PrimitiveFeastType.FLOAT32, + PrimitiveFeastType.FLOAT64, + PrimitiveFeastType.DECIMAL, +} + +_CATEGORICAL_TYPES = { + PrimitiveFeastType.STRING, + PrimitiveFeastType.BOOL, +} + + +class MetricsCalculator: + def __init__(self, histogram_bins: int = 20, top_n: int = 10): + self.histogram_bins = histogram_bins + self.top_n = top_n + + @staticmethod + def classify_feature(dtype) -> Optional[str]: + primitive = dtype + if hasattr(dtype, "base_type"): + primitive = dtype.base_type if dtype.base_type else dtype + + if isinstance(primitive, PrimitiveFeastType): + if primitive in _NUMERIC_TYPES: + return "numeric" + if primitive in _CATEGORICAL_TYPES: + return "categorical" + return None + + @staticmethod + def classify_feature_arrow(arrow_type: pa.DataType) -> Optional[str]: + """Classify a PyArrow data type as numeric or categorical.""" + if ( + pa.types.is_integer(arrow_type) + or pa.types.is_floating(arrow_type) + or pa.types.is_decimal(arrow_type) + ): + return "numeric" + if ( + pa.types.is_string(arrow_type) + or pa.types.is_large_string(arrow_type) + or pa.types.is_boolean(arrow_type) + ): + return "categorical" + return None + + def compute_numeric(self, array: pa.Array) -> Dict: + total = len(array) + null_count = array.null_count + result = { + "feature_type": "numeric", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(array) # type: ignore[attr-defined] + if len(valid) == 0: + return result + + float_array = pc.cast(valid, pa.float64()) + result["mean"] = _safe_float(pc.mean(float_array).as_py()) # type: ignore[attr-defined] + result["stddev"] = _safe_float(pc.stddev(float_array, ddof=1).as_py()) # type: ignore[attr-defined] + + min_max = pc.min_max(float_array) # type: ignore[attr-defined] + result["min_val"] = min_max["min"].as_py() + result["max_val"] = min_max["max"].as_py() + + quantiles = pc.quantile(float_array, q=[0.50, 0.75, 0.90, 0.95, 0.99]) # type: ignore[attr-defined] + q_values = quantiles.to_pylist() + result["p50"] = q_values[0] + result["p75"] = q_values[1] + result["p90"] = q_values[2] + result["p95"] = q_values[3] + result["p99"] = q_values[4] + + np_array = float_array.to_numpy() + counts, bin_edges = np.histogram(np_array, bins=self.histogram_bins) + result["histogram"] = { + "bins": bin_edges.tolist(), + "counts": counts.tolist(), + "bin_width": float(bin_edges[1] - bin_edges[0]) + if len(bin_edges) > 1 + else 0, + } + + return result + + def compute_categorical(self, array: pa.Array) -> Dict: + total = len(array) + null_count = array.null_count + result = { + "feature_type": "categorical", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(array) # type: ignore[attr-defined] + if len(valid) == 0: + return result + + value_counts = pc.value_counts(valid) # type: ignore[attr-defined] + entries = [ + {"value": vc["values"].as_py(), "count": vc["counts"].as_py()} + for vc in value_counts + ] + entries.sort(key=lambda x: x["count"], reverse=True) + + unique_count = len(entries) + top_entries = entries[: self.top_n] + other_count = sum(e["count"] for e in entries[self.top_n :]) + + result["histogram"] = { + "values": top_entries, + "other_count": other_count, + "unique_count": unique_count, + } + + return result + + def compute_all( + self, + table: pa.Table, + feature_fields: List[Tuple[str, str]], + ) -> List[Dict]: + results = [] + for name, ftype in feature_fields: + if name not in table.column_names: + logger.warning("Column '%s' not found in arrow table, skipping", name) + continue + column = table.column(name) + if ftype == "numeric": + metrics = self.compute_numeric(column) + elif ftype == "categorical": + metrics = self.compute_categorical(column) + else: + continue + metrics["feature_name"] = name + results.append(metrics) + return results diff --git a/sdk/python/feast/monitoring/monitoring_service.py b/sdk/python/feast/monitoring/monitoring_service.py new file mode 100644 index 00000000000..888690b806d --- /dev/null +++ b/sdk/python/feast/monitoring/monitoring_service.py @@ -0,0 +1,1254 @@ +import logging +import math +import time +from collections import defaultdict +from datetime import date, datetime, timedelta, timezone +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +from feast.feature_logging import LOG_TIMESTAMP_FIELD, FeatureServiceLoggingSource +from feast.infra.offline_stores.offline_store import OfflineStore +from feast.monitoring.dqm_job_manager import DQMJobManager +from feast.monitoring.metrics_calculator import MetricsCalculator +from feast.monitoring.monitoring_utils import build_view_aggregate + +if TYPE_CHECKING: + from feast.feature_store import FeatureStore + +logger = logging.getLogger(__name__) + +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + +_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) +_FAR_FUTURE = datetime(2099, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + +GRANULARITY_WINDOWS = { + "daily": timedelta(days=1), + "weekly": timedelta(days=7), + "biweekly": timedelta(days=14), + "monthly": timedelta(days=30), + "quarterly": timedelta(days=90), +} + +_FLOAT_FIELDS = frozenset( + { + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "avg_null_rate", + "max_null_rate", + } +) + + +def _sanitize_floats(row: Dict[str, Any]) -> Dict[str, Any]: + """Replace NaN/Inf float values with None so JSON serialization succeeds.""" + for key in _FLOAT_FIELDS: + val = row.get(key) + if isinstance(val, float) and (math.isnan(val) or math.isinf(val)): + row[key] = None + return row + + +class MonitoringService: + def __init__(self, store: "FeatureStore"): + self._store = store + self._job_manager: Optional[DQMJobManager] = None + self._calculator = MetricsCalculator() + self._monitoring_tables_ensured = False + self._offline_store_cache = None + + def _get_offline_store(self): + if self._offline_store_cache is None: + self._offline_store_cache = self._store._get_provider().offline_store + return self._offline_store_cache + + def _ensure_monitoring_tables(self): + if not self._monitoring_tables_ensured: + self._get_offline_store().ensure_monitoring_tables(self._store.config) + self._monitoring_tables_ensured = True + + @property + def job_manager(self) -> DQMJobManager: + if self._job_manager is None: + self._job_manager = DQMJobManager( + self._get_offline_store(), self._store.config + ) + self._job_manager.ensure_table() + return self._job_manager + + # ------------------------------------------------------------------ # + # Auto-compute: detect dates, compute all granularities + # ------------------------------------------------------------------ # + + def auto_compute( + self, + project: Optional[str] = None, + feature_view_name: Optional[str] = None, + ) -> Dict[str, Any]: + """Detect date ranges from source data and compute all granularities.""" + start_time = time.time() + self._ensure_monitoring_tables() + if project is None: + project = self._store.config.project + + feature_views = self._resolve_feature_views(project, feature_view_name) + total_features = 0 + total_views = 0 + granularities_computed = set() + + for fv in feature_views: + try: + feature_fields = self._classify_fields(fv) + if not feature_fields: + continue + + max_ts = self._get_max_timestamp(fv) + if max_ts is None: + logger.warning( + "No data found for feature view '%s', skipping", fv.name + ) + continue + + now = datetime.now(timezone.utc) + + for granularity, window in GRANULARITY_WINDOWS.items(): + window_start = max_ts - window + metrics_list = self._compute_feature_metrics( + fv, + feature_fields, + window_start, + max_ts, + ) + self._save_computed_metrics( + project=project, + feature_view=fv, + metrics_list=metrics_list, + metric_date=window_start.date(), + granularity=granularity, + set_baseline=False, + now=now, + ) + self._compute_feature_service_metrics( + project=project, + granularity=granularity, + metric_dates=[window_start.date()], + set_baseline=False, + ) + total_features += len(metrics_list) + granularities_computed.add(granularity) + total_views += 1 + except Exception: + logger.exception( + "Failed to auto-compute metrics for feature view '%s'", fv.name + ) + + duration_ms = int((time.time() - start_time) * 1000) + + return { + "status": "completed", + "computed_feature_views": total_views, + "computed_features": total_features, + "granularities": sorted(granularities_computed), + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Log source: compute metrics from feature serving logs + # ------------------------------------------------------------------ # + + def compute_log_metrics( + self, + project: str, + feature_service_name: str, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + granularity: str = "daily", + set_baseline: bool = False, + ) -> Dict[str, Any]: + """Compute monitoring metrics from feature serving logs. + + Requires the feature service to have a logging_config with a + LoggingDestination that can be converted to a DataSource. + """ + self._ensure_monitoring_tables() + if granularity not in VALID_GRANULARITIES: + raise ValueError( + f"Invalid granularity '{granularity}'. " + f"Must be one of {VALID_GRANULARITIES}" + ) + + start_time = time.time() + start_dt, end_dt = self._to_date_range(start_date, end_date) + + if project is None: + project = self._store.config.project + + fs = self._store.registry.get_feature_service( + name=feature_service_name, project=project + ) + log_source = self._resolve_log_source(fs) + if log_source is None: + return { + "status": "skipped", + "reason": f"Feature service '{feature_service_name}' has no logging configured", + "duration_ms": int((time.time() - start_time) * 1000), + } + + data_source, ts_field, feature_fields, log_col_map = log_source + metrics_list = self._compute_from_source( + data_source, + ts_field, + feature_fields, + start_dt, + end_dt, + ) + + now = datetime.now(timezone.utc) + metric_date = start_dt.date() + + self._save_log_metrics( + project=project, + feature_service_name=feature_service_name, + log_col_map=log_col_map, + metrics_list=metrics_list, + metric_date=metric_date, + granularity=granularity, + set_baseline=set_baseline, + now=now, + ) + + duration_ms = int((time.time() - start_time) * 1000) + return { + "status": "completed", + "data_source_type": "log", + "feature_service_name": feature_service_name, + "granularity": granularity, + "computed_features": len(metrics_list), + "metric_date": metric_date.isoformat(), + "duration_ms": duration_ms, + } + + def auto_compute_log_metrics( + self, + project: Optional[str] = None, + feature_service_name: Optional[str] = None, + ) -> Dict[str, Any]: + """Auto-detect date ranges from log data and compute all granularities.""" + start_time = time.time() + self._ensure_monitoring_tables() + if project is None: + project = self._store.config.project + + if feature_service_name: + services = [ + self._store.registry.get_feature_service( + name=feature_service_name, project=project + ) + ] + else: + services = self._store.registry.list_feature_services(project=project) + + total_features = 0 + total_services = 0 + granularities_computed: set = set() + + for fs in services: + try: + log_source = self._resolve_log_source(fs) + if log_source is None: + continue + + data_source, ts_field, feature_fields, log_col_map = log_source + + max_ts = self._get_max_timestamp_for_source(data_source, ts_field) + if max_ts is None: + logger.warning( + "No log data found for feature service '%s', skipping", + fs.name, + ) + continue + + now = datetime.now(timezone.utc) + + for gran, window in GRANULARITY_WINDOWS.items(): + window_start = max_ts - window + metrics_list = self._compute_from_source( + data_source, + ts_field, + feature_fields, + window_start, + max_ts, + ) + self._save_log_metrics( + project=project, + feature_service_name=fs.name, + log_col_map=log_col_map, + metrics_list=metrics_list, + metric_date=window_start.date(), + granularity=gran, + set_baseline=False, + now=now, + ) + total_features += len(metrics_list) + granularities_computed.add(gran) + + total_services += 1 + except Exception: + logger.exception( + "Failed to auto-compute log metrics for feature service '%s'", + fs.name, + ) + + duration_ms = int((time.time() - start_time) * 1000) + return { + "status": "completed", + "data_source_type": "log", + "computed_feature_services": total_services, + "computed_features": total_features, + "granularities": sorted(granularities_computed), + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Baseline: compute from all available source data + # ------------------------------------------------------------------ # + + def compute_baseline( + self, + project: Optional[str] = None, + feature_view_name: Optional[str] = None, + feature_names: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """Compute baseline metrics from all available source data. + + Idempotent: only features without existing baselines are computed. + """ + start_time = time.time() + self._ensure_monitoring_tables() + if project is None: + project = self._store.config.project + + feature_views = self._resolve_feature_views(project, feature_view_name) + total_features = 0 + total_views = 0 + + for fv in feature_views: + try: + fields_needing_baseline = self._get_features_without_baseline( + project, fv, feature_names + ) + if not fields_needing_baseline: + logger.info( + "All features in '%s' already have baselines, skipping", + fv.name, + ) + continue + + feature_fields = self._classify_fields( + fv, fields=fields_needing_baseline + ) + if not feature_fields: + continue + + metrics_list = self._compute_feature_metrics( + fv, + feature_fields, + _EPOCH, + _FAR_FUTURE, + ) + + now = datetime.now(timezone.utc) + self._save_computed_metrics( + project=project, + feature_view=fv, + metrics_list=metrics_list, + metric_date=date.today(), + granularity="daily", + set_baseline=True, + now=now, + ) + + total_features += len(metrics_list) + total_views += 1 + except Exception: + logger.exception( + "Failed to compute baseline for feature view '%s'", fv.name + ) + + duration_ms = int((time.time() - start_time) * 1000) + + return { + "status": "completed", + "computed_features": total_features, + "computed_feature_views": total_views, + "is_baseline": True, + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Compute: explicit dates + granularity (stored) + # ------------------------------------------------------------------ # + + def compute_metrics( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_names: Optional[List[str]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + granularity: str = "daily", + set_baseline: bool = False, + ) -> Dict[str, Any]: + self._ensure_monitoring_tables() + if granularity not in VALID_GRANULARITIES: + raise ValueError( + f"Invalid granularity '{granularity}'. " + f"Must be one of {VALID_GRANULARITIES}" + ) + + start_time = time.time() + start_dt, end_dt = self._to_date_range(start_date, end_date) + + feature_views = self._resolve_feature_views(project, feature_view_name) + + total_features = 0 + total_views = 0 + computed_dates: set = set() + + for fv in feature_views: + try: + fv_metrics = self._compute_for_feature_view( + project=project, + feature_view=fv, + feature_names=feature_names, + start_dt=start_dt, + end_dt=end_dt, + granularity=granularity, + set_baseline=set_baseline, + ) + total_features += fv_metrics["feature_count"] + total_views += 1 + computed_dates.update(fv_metrics["dates"]) + except Exception: + logger.exception( + "Failed to compute metrics for feature view '%s'", fv.name + ) + + total_services = self._compute_feature_service_metrics( + project=project, + granularity=granularity, + metric_dates=list(computed_dates), + set_baseline=set_baseline, + ) + + duration_ms = int((time.time() - start_time) * 1000) + + return { + "status": "completed", + "granularity": granularity, + "computed_features": total_features, + "computed_feature_views": total_views, + "computed_feature_services": total_services, + "metric_dates": sorted(d.isoformat() for d in computed_dates), + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Transient compute (not stored) + # ------------------------------------------------------------------ # + + def compute_transient( + self, + project: str, + feature_view_name: str, + feature_names: Optional[List[str]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> Dict[str, Any]: + """Compute metrics on-the-fly for an arbitrary date range without persisting.""" + start_time = time.time() + start_dt, end_dt = self._to_date_range(start_date, end_date) + effective_start = start_date or (date.today() - timedelta(days=1)) + effective_end = end_date or date.today() + + fv = self._store.registry.get_feature_view( + name=feature_view_name, project=project + ) + + feature_fields = self._classify_fields(fv, feature_names=feature_names) + if not feature_fields: + return { + "status": "completed", + "feature_view_name": feature_view_name, + "start_date": effective_start.isoformat(), + "end_date": effective_end.isoformat(), + "metrics": [], + "duration_ms": int((time.time() - start_time) * 1000), + } + + metrics_list = self._compute_feature_metrics( + fv, + feature_fields, + start_dt, + end_dt, + ) + + for m in metrics_list: + m["feature_view_name"] = feature_view_name + m["start_date"] = effective_start.isoformat() + m["end_date"] = effective_end.isoformat() + + return { + "status": "completed", + "feature_view_name": feature_view_name, + "start_date": effective_start.isoformat(), + "end_date": effective_end.isoformat(), + "metrics": metrics_list, + "duration_ms": int((time.time() - start_time) * 1000), + } + + # ------------------------------------------------------------------ # + # DQM Job helpers + # ------------------------------------------------------------------ # + + def submit_job( + self, + project: str, + job_type: str, + feature_view_name: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ) -> str: + return self.job_manager.submit( + project=project, + job_type=job_type, + feature_view_name=feature_view_name, + parameters=parameters, + ) + + def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: + return self.job_manager.get_job(job_id) + + def execute_job(self, job_id: str) -> Dict[str, Any]: + return self.job_manager.execute_job(job_id, self) + + # ------------------------------------------------------------------ # + # Read helpers (delegate to offline store) + # ------------------------------------------------------------------ # + + def _query( + self, + metric_type: str, + project: str, + filters=None, + start_date=None, + end_date=None, + ): + self._ensure_monitoring_tables() + rows = self._get_offline_store().query_monitoring_metrics( + config=self._store.config, + project=project, + metric_type=metric_type, + filters=filters, + start_date=start_date, + end_date=end_date, + ) + return [_sanitize_floats(r) for r in rows] + + def get_feature_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_view_name": feature_view_name, + "feature_name": feature_name, + "granularity": granularity, + "data_source_type": data_source_type, + } + if feature_service_name: + return self._get_metrics_by_service( + project, + feature_service_name, + lambda fv_name: self._query( + "feature", + project, + {**filters, "feature_view_name": fv_name}, + start_date, + end_date, + ), + ) + return self._query("feature", project, filters, start_date, end_date) + + def get_feature_view_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + feature_view_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_view_name": feature_view_name, + "granularity": granularity, + "data_source_type": data_source_type, + } + if feature_service_name: + return self._get_metrics_by_service( + project, + feature_service_name, + lambda fv_name: self._query( + "feature_view", + project, + {**filters, "feature_view_name": fv_name}, + start_date, + end_date, + ), + ) + return self._query("feature_view", project, filters, start_date, end_date) + + def get_feature_service_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_service_name": feature_service_name, + "granularity": granularity, + "data_source_type": data_source_type, + } + return self._query("feature_service", project, filters, start_date, end_date) + + def get_baseline( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_view_name": feature_view_name, + "feature_name": feature_name, + "data_source_type": data_source_type, + "is_baseline": True, + } + return self._query("feature", project, filters) + + def get_timeseries( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + feature_service_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + return self.get_feature_metrics( + project=project, + feature_service_name=feature_service_name, + feature_view_name=feature_view_name, + feature_name=feature_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=start_date, + end_date=end_date, + ) + + # ------------------------------------------------------------------ # + # Auto-baseline trigger for feast apply + # ------------------------------------------------------------------ # + + def submit_baseline_for_new_features( + self, + project: str, + feature_views: Optional[List] = None, + ) -> List[str]: + """Submit baseline DQM jobs for feature views with new features. + + Called from feast apply. Returns list of submitted job IDs. + Idempotent — only features without existing baselines are included. + """ + if project is None: + project = self._store.config.project + + if feature_views is None: + feature_views = self._store.registry.list_feature_views(project=project) + + job_ids = [] + for fv in feature_views: + new_features = self._get_features_without_baseline(project, fv) + if not new_features: + continue + + feature_names = [f.name for f in new_features] + job_id = self.job_manager.submit( + project=project, + job_type="baseline", + feature_view_name=fv.name, + parameters={"feature_names": feature_names}, + ) + job_ids.append(job_id) + logger.info( + "Queued baseline computation for '%s' features %s (job: %s)", + fv.name, + feature_names, + job_id, + ) + + return job_ids + + # ------------------------------------------------------------------ # + # Private: compute engine dispatch (SQL push-down → Python fallback) + # ------------------------------------------------------------------ # + + def _compute_feature_metrics( + self, + feature_view, + feature_fields: List[Tuple[str, str]], + start_dt: datetime, + end_dt: datetime, + ) -> List[Dict[str, Any]]: + """Compute metrics from a feature view's batch source.""" + return self._compute_from_source( + feature_view.batch_source, + feature_view.batch_source.timestamp_field, + feature_fields, + start_dt, + end_dt, + ) + + def _get_max_timestamp(self, feature_view) -> Optional[datetime]: + """Query the batch source for MAX(event_timestamp).""" + return self._get_max_timestamp_for_source( + feature_view.batch_source, + feature_view.batch_source.timestamp_field, + ) + + # ------------------------------------------------------------------ # + # Private: shared helpers (DRY) + # ------------------------------------------------------------------ # + + @staticmethod + def _to_date_range( + start_date: Optional[date], end_date: Optional[date] + ) -> Tuple[datetime, datetime]: + today = date.today() + if end_date is None: + end_date = today + if start_date is None: + start_date = end_date - timedelta(days=1) + start_dt = datetime( + start_date.year, start_date.month, start_date.day, tzinfo=timezone.utc + ) + end_dt = datetime( + end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=timezone.utc + ) + return start_dt, end_dt + + @staticmethod + def _classify_fields( + feature_view, + feature_names=None, + fields=None, + ) -> List[Tuple[str, str]]: + """Extract and classify features as numeric/categorical. + + Args: + feature_view: FeatureView to extract fields from (used if fields is None). + feature_names: Optional filter list of feature names. + fields: Optional pre-selected Field objects (e.g., from idempotency check). + """ + if fields is None: + fields = feature_view.features + if feature_names: + fields = [f for f in fields if f.name in feature_names] + + result = [] + for field in fields: + ftype = MetricsCalculator.classify_feature(field.dtype) + if ftype is None: + logger.warning( + "Unsupported dtype '%s' for feature '%s', skipping", + field.dtype, + field.name, + ) + continue + result.append((field.name, ftype)) + return result + + def _save_computed_metrics( + self, + project: str, + feature_view, + metrics_list: List[Dict[str, Any]], + metric_date: date, + granularity: str, + set_baseline: bool, + now: datetime, + ) -> None: + if not metrics_list: + return + + offline_store = self._get_offline_store() + config = self._store.config + + if set_baseline: + offline_store.clear_monitoring_baseline( + config=config, + project=project, + feature_view_name=feature_view.name, + ) + + for m in metrics_list: + m["project_id"] = project + m["feature_view_name"] = feature_view.name + m["metric_date"] = metric_date + m["granularity"] = granularity + m["data_source_type"] = "batch" + m["computed_at"] = now + m["is_baseline"] = set_baseline + + offline_store.save_monitoring_metrics(config, "feature", metrics_list) + + view_metric = { + "project_id": project, + "feature_view_name": feature_view.name, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "batch", + "computed_at": now, + "is_baseline": set_baseline, + **build_view_aggregate(metrics_list), + } + offline_store.save_monitoring_metrics(config, "feature_view", [view_metric]) + + def _resolve_join_key_columns(self, feature_view) -> List[str]: + config = self._store.config + return ( + [ + entity.name + for entity in self._store.registry.list_entities(project=config.project) + if entity.name in (feature_view.entities or []) + ] + or feature_view.entities + or [] + ) + + def _get_metrics_by_service( + self, project: str, feature_service_name: str, query_fn + ): + fs = self._store.registry.get_feature_service( + name=feature_service_name, project=project + ) + fv_names = [proj.name for proj in fs.feature_view_projections] + results = [] + for fv_name in fv_names: + results.extend(query_fn(fv_name)) + return results + + def _resolve_feature_views(self, project: str, feature_view_name: Optional[str]): + if feature_view_name: + fv = self._store.registry.get_feature_view( + name=feature_view_name, project=project + ) + return [fv] + return self._store.registry.list_feature_views(project=project) + + def _get_features_without_baseline(self, project, feature_view, feature_names=None): + existing = self.get_baseline( + project=project, + feature_view_name=feature_view.name, + ) + existing_names = {m["feature_name"] for m in existing} + + fields = feature_view.features + if feature_names: + fields = [f for f in fields if f.name in feature_names] + + return [f for f in fields if f.name not in existing_names] + + def _compute_for_feature_view( + self, + project: str, + feature_view, + feature_names: Optional[List[str]], + start_dt: datetime, + end_dt: datetime, + granularity: str, + set_baseline: bool, + ) -> Dict[str, Any]: + feature_fields = self._classify_fields( + feature_view, feature_names=feature_names + ) + if not feature_fields: + return {"feature_count": 0, "dates": set()} + + metrics_list = self._compute_feature_metrics( + feature_view, + feature_fields, + start_dt, + end_dt, + ) + + now = datetime.now(timezone.utc) + metric_date = start_dt.date() + + self._save_computed_metrics( + project=project, + feature_view=feature_view, + metrics_list=metrics_list, + metric_date=metric_date, + granularity=granularity, + set_baseline=set_baseline, + now=now, + ) + + return {"feature_count": len(metrics_list), "dates": {metric_date}} + + # ------------------------------------------------------------------ # + # Private: log source helpers + # ------------------------------------------------------------------ # + + def _resolve_log_source(self, feature_service): + """Resolve log data source for a feature service. + + Returns (DataSource, timestamp_field, feature_fields, log_col_map) + or None if the feature service has no logging configured. + + ``feature_fields`` uses the raw log column names (needed for + SQL/PyArrow column access). ``log_col_map`` maps each raw log + column to ``(feature_view_name, normalized_feature_name)`` so + callers can store metrics under the correct view and feature + name — critical for drift detection across batch and log sources. + """ + if not feature_service.logging_config: + return None + + destination = feature_service.logging_config.destination + try: + data_source = destination.to_data_source() + except NotImplementedError: + logger.warning( + "Logging destination for '%s' does not support to_data_source()", + feature_service.name, + ) + return None + + logging_source = FeatureServiceLoggingSource( + feature_service, + self._store.config.project, + ) + schema = logging_source.get_schema(self._store.registry) + + skip_cols = { + LOG_TIMESTAMP_FIELD, + "__log_date", + "__request_id", + } + entity_columns = set() + view_feature_names: dict = {} + for proj in feature_service.feature_view_projections: + view_alias = proj.name_to_use() + try: + fv = self._store.registry.get_feature_view( + name=proj.name, project=self._store.config.project + ) + for ec in fv.entity_columns: + entity_columns.add(ec.name) + except Exception: + pass + for feat in proj.features: + log_col = f"{view_alias}__{feat.name}" + view_feature_names[log_col] = (proj.name, feat.name) + + feature_fields = [] + log_col_map: dict = {} + for field in schema: + if field.name in skip_cols or field.name in entity_columns: + continue + if field.name.endswith("__timestamp") or field.name.endswith("__status"): + continue + ftype = MetricsCalculator.classify_feature_arrow(field.type) + if ftype is not None: + feature_fields.append((field.name, ftype)) + if field.name in view_feature_names: + log_col_map[field.name] = view_feature_names[field.name] + + if not feature_fields: + return None + + return data_source, LOG_TIMESTAMP_FIELD, feature_fields, log_col_map + + def _get_max_timestamp_for_source(self, data_source, ts_field): + """Get MAX timestamp from an arbitrary data source. + + Prefers the offline store's native push-down; falls back to reading + the table and computing max in Python. + """ + offline_store = self._get_offline_store() + try: + return offline_store.get_monitoring_max_timestamp( + config=self._store.config, + data_source=data_source, + timestamp_field=ts_field, + ) + except NotImplementedError: + pass + + import pyarrow.compute as pc + + retrieval_job = offline_store.pull_all_from_table_or_query( + config=self._store.config, + data_source=data_source, + join_key_columns=[], + feature_name_columns=[], + timestamp_field=ts_field, + start_date=_EPOCH, + end_date=_FAR_FUTURE, + ) + + table = retrieval_job.to_arrow() + if ts_field not in table.column_names or len(table) == 0: + return None + + max_val = pc.max(table.column(ts_field)).as_py() + if max_val is None: + return None + + if isinstance(max_val, datetime): + return max_val if max_val.tzinfo else max_val.replace(tzinfo=timezone.utc) + return datetime.combine(max_val, datetime.min.time(), tzinfo=timezone.utc) + + def _compute_from_source( + self, + data_source, + ts_field: str, + feature_fields: List[Tuple[str, str]], + start_dt: datetime, + end_dt: datetime, + ) -> List[Dict[str, Any]]: + """Compute metrics from an arbitrary data source (batch or log). + + Prefers SQL push-down; falls back to Python-based computation. + """ + offline_store = self._get_offline_store() + try: + return offline_store.compute_monitoring_metrics( + config=self._store.config, + data_source=data_source, + feature_columns=feature_fields, + timestamp_field=ts_field, + start_date=start_dt, + end_date=end_dt, + histogram_bins=self._calculator.histogram_bins, + top_n=self._calculator.top_n, + ) + except NotImplementedError: + logger.debug( + "Offline store does not support compute_monitoring_metrics, " + "falling back to Python-based computation for log source" + ) + retrieval_job = offline_store.pull_all_from_table_or_query( + config=self._store.config, + data_source=data_source, + join_key_columns=[], + feature_name_columns=[name for name, _ in feature_fields], + timestamp_field=ts_field, + start_date=start_dt, + end_date=end_dt, + ) + arrow_table = retrieval_job.to_arrow() + return self._calculator.compute_all(arrow_table, feature_fields) + + def _save_log_metrics( + self, + project: str, + feature_service_name: str, + log_col_map: Dict[str, Tuple[str, str]], + metrics_list: List[Dict[str, Any]], + metric_date: date, + granularity: str, + set_baseline: bool, + now: datetime, + ) -> None: + """Save log-sourced metrics tagged with data_source_type='log'. + + Normalizes log column names (``driver_stats__conv_rate``) back to + their originating ``feature_view_name`` and ``feature_name`` so + that drift detection can join batch and log metrics on the same + feature identity. + """ + if not metrics_list: + return + + offline_store = self._get_offline_store() + config = self._store.config + + for m in metrics_list: + log_col = m.get("feature_name", "") + view_name, feat_name = log_col_map.get( + log_col, (feature_service_name, log_col) + ) + m["project_id"] = project + m["feature_view_name"] = view_name + m["feature_name"] = feat_name + m["metric_date"] = metric_date + m["granularity"] = granularity + m["data_source_type"] = "log" + m["computed_at"] = now + m["is_baseline"] = set_baseline + + offline_store.save_monitoring_metrics(config, "feature", metrics_list) + + # --- per-feature-view aggregates (grouped by originating view) --- + by_view: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + for m in metrics_list: + by_view[m["feature_view_name"]].append(m) + + view_metrics = [ + { + "project_id": project, + "feature_view_name": vname, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "log", + "computed_at": now, + "is_baseline": set_baseline, + **build_view_aggregate(vmetrics), + } + for vname, vmetrics in by_view.items() + ] + offline_store.save_monitoring_metrics(config, "feature_view", view_metrics) + + # --- feature service aggregate --- + svc_agg = build_view_aggregate(metrics_list) + svc_metric = { + "project_id": project, + "feature_service_name": feature_service_name, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "log", + "computed_at": now, + "is_baseline": set_baseline, + "total_feature_views": len(by_view), + "total_features": svc_agg["total_features"], + "avg_null_rate": svc_agg["avg_null_rate"], + "max_null_rate": svc_agg["max_null_rate"], + } + offline_store.save_monitoring_metrics(config, "feature_service", [svc_metric]) + + def _read_batch_source(self, feature_view, feature_fields, start_dt, end_dt): + config = self._store.config + data_source = feature_view.batch_source + offline_store = self._get_offline_store() + + retrieval_job = offline_store.pull_all_from_table_or_query( + config=config, + data_source=data_source, + join_key_columns=self._resolve_join_key_columns(feature_view), + feature_name_columns=[name for name, _ in feature_fields], + timestamp_field=data_source.timestamp_field, + created_timestamp_column=data_source.created_timestamp_column, + start_date=start_dt, + end_date=end_dt, + ) + + return retrieval_job.to_arrow() + + def _compute_feature_service_metrics( + self, + project: str, + granularity: str, + metric_dates: List[date], + set_baseline: bool, + ) -> int: + if not metric_dates: + return 0 + + feature_services = self._store.registry.list_feature_services(project=project) + if not feature_services: + return 0 + + offline_store = self._get_offline_store() + config = self._store.config + now = datetime.now(timezone.utc) + count = 0 + + for fs in feature_services: + try: + fv_names = {proj.name for proj in fs.feature_view_projections} + + for metric_date in metric_dates: + fv_metrics = offline_store.query_monitoring_metrics( + config=config, + project=project, + metric_type="feature_view", + filters={ + "granularity": granularity, + "data_source_type": "batch", + }, + start_date=metric_date, + end_date=metric_date, + ) + + relevant = [ + m for m in fv_metrics if m.get("feature_view_name") in fv_names + ] + if not relevant: + continue + + null_rates = [ + m["avg_null_rate"] + for m in relevant + if m.get("avg_null_rate") is not None + ] + + service_metric = { + "project_id": project, + "feature_service_name": fs.name, + "metric_date": metric_date + if isinstance(metric_date, date) + else date.fromisoformat(str(metric_date)), + "granularity": granularity, + "data_source_type": "batch", + "computed_at": now, + "is_baseline": set_baseline, + "total_feature_views": len(relevant), + "total_features": sum( + m.get("total_features", 0) for m in relevant + ), + "avg_null_rate": ( + sum(null_rates) / len(null_rates) if null_rates else 0.0 + ), + "max_null_rate": max(null_rates) if null_rates else 0.0, + } + offline_store.save_monitoring_metrics( + config, + "feature_service", + [service_metric], + ) + count += 1 + except Exception: + logger.exception("Failed to compute service metrics for '%s'", fs.name) + + return count diff --git a/sdk/python/feast/monitoring/monitoring_utils.py b/sdk/python/feast/monitoring/monitoring_utils.py new file mode 100644 index 00000000000..67d545d8786 --- /dev/null +++ b/sdk/python/feast/monitoring/monitoring_utils.py @@ -0,0 +1,272 @@ +"""Shared constants and helpers for monitoring across all offline store backends. + +Every backend needs the same table names, column lists, primary keys, +empty-metric templates, and result-row normalization. Centralizing them +here avoids ~8x duplication and prevents column-list drift. +""" + +import json +import math +from datetime import date, datetime +from typing import Any, Dict, List, Optional, Tuple + +# ------------------------------------------------------------------ # +# Table names +# ------------------------------------------------------------------ # + +MON_TABLE_FEATURE = "feast_monitoring_feature_metrics" +MON_TABLE_FEATURE_VIEW = "feast_monitoring_feature_view_metrics" +MON_TABLE_FEATURE_SERVICE = "feast_monitoring_feature_service_metrics" +MON_TABLE_JOB = "feast_monitoring_jobs" + +# ------------------------------------------------------------------ # +# Column definitions — (ordered, used by INSERT / SELECT / Parquet) +# ------------------------------------------------------------------ # + +FEATURE_METRICS_COLUMNS: List[str] = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] + +FEATURE_METRICS_PK: List[str] = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +FEATURE_VIEW_METRICS_COLUMNS: List[str] = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] + +FEATURE_VIEW_METRICS_PK: List[str] = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +FEATURE_SERVICE_METRICS_COLUMNS: List[str] = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] + +FEATURE_SERVICE_METRICS_PK: List[str] = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + +JOB_COLUMNS: List[str] = [ + "job_id", + "project_id", + "feature_view_name", + "job_type", + "status", + "parameters", + "metric_date", + "started_at", + "completed_at", + "error_message", + "result_summary", +] + +JOB_PK: List[str] = [ + "job_id", +] + + +def monitoring_table_meta( + metric_type: str, +) -> Tuple[str, List[str], List[str]]: + """Return (table_name, columns, pk_columns) for a metric type. + + Raises ValueError for unknown metric types. + """ + if metric_type == "feature": + return MON_TABLE_FEATURE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK + if metric_type == "feature_view": + return ( + MON_TABLE_FEATURE_VIEW, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, + ) + if metric_type == "feature_service": + return ( + MON_TABLE_FEATURE_SERVICE, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + ) + if metric_type == "job": + return MON_TABLE_JOB, JOB_COLUMNS, JOB_PK + raise ValueError(f"Unknown monitoring metric_type: '{metric_type}'") + + +# ------------------------------------------------------------------ # +# Tiny helpers duplicated across backends +# ------------------------------------------------------------------ # + + +def opt_float(val: Any) -> Optional[float]: + """Safely cast a value to float, returning None for None/NaN/Inf.""" + if val is None: + return None + f = float(val) + if math.isnan(f) or math.isinf(f): + return None + return f + + +def empty_numeric_metric(feature_name: str) -> Dict[str, Any]: + """Return a metric dict with all-None stats for a numeric feature.""" + return { + "feature_name": feature_name, + "feature_type": "numeric", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + +def empty_categorical_metric(feature_name: str) -> Dict[str, Any]: + """Return a metric dict with all-None stats for a categorical feature.""" + return { + "feature_name": feature_name, + "feature_type": "categorical", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + +# ------------------------------------------------------------------ # +# Result-row normalization (used after SQL fetch or Parquet read) +# ------------------------------------------------------------------ # + + +def normalize_monitoring_row(record: Dict[str, Any]) -> Dict[str, Any]: + """Normalize a monitoring metric dict for JSON serialization. + + - Replaces float NaN / Inf with None (not JSON-serializable). + - Parses ``histogram`` from JSON string if needed. + - Converts ``metric_date`` / ``computed_at`` to ISO strings. + - Normalizes ``is_baseline`` to Python bool. + """ + import math + + for key, val in record.items(): + if isinstance(val, float) and (math.isnan(val) or math.isinf(val)): + record[key] = None + + hist = record.get("histogram") + if isinstance(hist, str): + try: + record["histogram"] = json.loads(hist) + except (json.JSONDecodeError, TypeError): + pass + + for key in ("metric_date", "computed_at"): + val = record.get(key) + if isinstance(val, (date, datetime)): + record[key] = val.isoformat() + + baseline = record.get("is_baseline") + if baseline is not None: + record["is_baseline"] = bool(baseline) + + return record + + +# ------------------------------------------------------------------ # +# View-level aggregate builder (shared by batch + log save paths) +# ------------------------------------------------------------------ # + + +def build_view_aggregate( + metrics_list: List[Dict[str, Any]], +) -> Dict[str, Any]: + """Compute view-level aggregate stats from per-feature metrics. + + Returns a dict with keys: total_row_count, total_features, + features_with_nulls, avg_null_rate, max_null_rate. + """ + null_rates = [ + m["null_rate"] for m in metrics_list if m.get("null_rate") is not None + ] + return { + "total_row_count": max( + (m["row_count"] for m in metrics_list if m.get("row_count") is not None), + default=0, + ), + "total_features": len(metrics_list), + "features_with_nulls": sum( + 1 for m in metrics_list if (m.get("null_count") or 0) > 0 + ), + "avg_null_rate": sum(null_rates) / len(null_rates) if null_rates else 0.0, + "max_null_rate": max(null_rates) if null_rates else 0.0, + } diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 3fbcb9ec498..a13f481e577 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -231,6 +231,13 @@ class MaterializationConfig(BaseModel): Supported compute engines: local, spark, ray. """ +class DqmConfig(FeastConfigBaseModel): + """Data Quality Monitoring (DQM) configuration.""" + + auto_baseline: StrictBool = True + """Whether baseline distribution is computed automatically on ``feast apply``.""" + + class OpenLineageConfig(FeastBaseModel): """Configuration for OpenLineage integration. @@ -355,6 +362,9 @@ class RepoConfig(FeastBaseModel): openlineage_config: Optional[OpenLineageConfig] = Field(None, alias="openlineage") """ Configuration for OpenLineage data lineage integration (optional). """ + dqm_config: Optional[DqmConfig] = Field(None, alias="dqm") + """ DqmConfig: Data Quality Monitoring configuration (optional). """ + def __init__(self, **data: Any): super().__init__(**data) diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 28fe86602ad..767688193f0 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -1,594 +1,621 @@ -import base64 -import importlib -import json -import logging -import os -import random -import re -import sys -import tempfile -from importlib.abc import Loader -from importlib.machinery import ModuleSpec -from pathlib import Path -from typing import List, Optional, Set, Union - -import click -from click.exceptions import BadParameter - -from feast import PushSource -from feast.batch_feature_view import BatchFeatureView -from feast.constants import FEATURE_STORE_YAML_ENV_NAME -from feast.data_source import DataSource, KafkaSource, KinesisSource -from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add -from feast.entity import Entity -from feast.feature_service import FeatureService -from feast.feature_store import FeatureStore -from feast.feature_view import DUMMY_ENTITY, FeatureView -from feast.file_utils import replace_str_in_file -from feast.infra.registry.base_registry import BaseRegistry -from feast.infra.registry.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry -from feast.names import adjectives, animals -from feast.on_demand_feature_view import OnDemandFeatureView -from feast.permissions.permission import Permission -from feast.project import Project -from feast.repo_config import RepoConfig -from feast.repo_contents import RepoContents -from feast.stream_feature_view import StreamFeatureView - -logger = logging.getLogger(__name__) - - -def py_path_to_module(path: Path) -> str: - return ( - str(path.relative_to(os.getcwd()))[: -len(".py")] - .replace("./", "") - .replace("/", ".") - .replace("\\", ".") - ) - - -def read_feastignore(repo_root: Path) -> List[str]: - """Read .feastignore in the repo root directory (if exists) and return the list of user-defined ignore paths""" - feast_ignore = repo_root / ".feastignore" - if not feast_ignore.is_file(): - return [] - lines = feast_ignore.read_text().strip().split("\n") - ignore_paths = [] - for line in lines: - # Remove everything after the first occurance of "#" symbol (comments) - if line.find("#") >= 0: - line = line[: line.find("#")] - # Strip leading or ending whitespaces - line = line.strip() - # Add this processed line to ignore_paths if it's not empty - if len(line) > 0: - ignore_paths.append(line) - return ignore_paths - - -def get_ignore_files(repo_root: Path, ignore_paths: List[str]) -> Set[Path]: - """Get all ignore files that match any of the user-defined ignore paths""" - ignore_files = set() - for ignore_path in set(ignore_paths): - # ignore_path may contains matchers (* or **). Use glob() to match user-defined path to actual paths - for matched_path in repo_root.glob(ignore_path): - if matched_path.is_file(): - # If the matched path is a file, add that to ignore_files set - ignore_files.add(matched_path.resolve()) - else: - # Otherwise, list all Python files in that directory and add all of them to ignore_files set - ignore_files |= { - sub_path.resolve() - for sub_path in matched_path.glob("**/*.py") - if sub_path.is_file() - } - return ignore_files - - -def get_repo_files(repo_root: Path) -> List[Path]: - """Get the list of all repo files, ignoring undesired files & directories specified in .feastignore""" - # Read ignore paths from .feastignore and create a set of all files that match any of these paths - ignore_paths = read_feastignore(repo_root) + [ - ".git", - ".feastignore", - ".venv", - "**/.ipynb_checkpoints", - "**/.pytest_cache", - "**/__pycache__", - ] - ignore_files = get_ignore_files(repo_root, ignore_paths) - - # List all Python files in the root directory (recursively) - repo_files = { - p.resolve() - for p in repo_root.glob("**/*.py") - if p.is_file() and "__init__.py" != p.name - } - # Ignore all files that match any of the ignore paths in .feastignore - repo_files -= ignore_files - - # Sort repo_files to read them in the same order every time - return sorted(repo_files) - - -def parse_repo(repo_root: Path) -> RepoContents: - """ - Collects unique Feast object definitions from the given feature repo. - - Specifically, if an object foo has already been added, bar will still be added if - (bar == foo), but not if (bar is foo). This ensures that import statements will - not result in duplicates, but defining two equal objects will. - """ - res = RepoContents( - projects=[], - data_sources=[], - entities=[], - feature_views=[], - feature_services=[], - on_demand_feature_views=[], - stream_feature_views=[], - permissions=[], - ) - - for repo_file in get_repo_files(repo_root): - module_path = py_path_to_module(repo_file) - module = importlib.import_module(module_path) - - for attr_name in dir(module): - obj = getattr(module, attr_name) - - if isinstance(obj, DataSource) and not any( - (obj is ds) for ds in res.data_sources - ): - res.data_sources.append(obj) - - # Handle batch sources defined within stream sources. - if ( - isinstance(obj, PushSource) - or isinstance(obj, KafkaSource) - or isinstance(obj, KinesisSource) - ): - batch_source = obj.batch_source - - if batch_source and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - if ( - isinstance(obj, FeatureView) - and not any((obj is fv) for fv in res.feature_views) - and not isinstance(obj, StreamFeatureView) - and not isinstance(obj, BatchFeatureView) - ): - res.feature_views.append(obj) - - # Handle batch sources defined with feature views. - batch_source = obj.batch_source - if batch_source is not None and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - - # Handle stream sources defined with feature views. - if obj.stream_source: - stream_source = obj.stream_source - if not any((stream_source is ds) for ds in res.data_sources): - res.data_sources.append(stream_source) - elif isinstance(obj, StreamFeatureView) and not any( - (obj is sfv) for sfv in res.stream_feature_views - ): - res.stream_feature_views.append(obj) - - # Handle batch sources defined with feature views. - batch_source = obj.batch_source - if batch_source is not None and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - assert obj.stream_source - stream_source = obj.stream_source - if not any((stream_source is ds) for ds in res.data_sources): - res.data_sources.append(stream_source) - elif isinstance(obj, BatchFeatureView) and not any( - (obj is bfv) for bfv in res.feature_views - ): - res.feature_views.append(obj) - - # Handle batch sources defined with feature views. - batch_source = obj.batch_source - if batch_source is not None and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - elif isinstance(obj, Entity) and not any( - (obj is entity) for entity in res.entities - ): - res.entities.append(obj) - elif isinstance(obj, FeatureService) and not any( - (obj is fs) for fs in res.feature_services - ): - res.feature_services.append(obj) - elif isinstance(obj, OnDemandFeatureView) and not any( - (obj is odfv) for odfv in res.on_demand_feature_views - ): - res.on_demand_feature_views.append(obj) - elif isinstance(obj, Permission) and not any( - (obj is p) for p in res.permissions - ): - res.permissions.append(obj) - elif isinstance(obj, Project) and not any((obj is p) for p in res.projects): - res.projects.append(obj) - - res.entities.append(DUMMY_ENTITY) - return res - - -def plan( - repo_config: RepoConfig, - repo_path: Path, - skip_source_validation: bool, - skip_feature_view_validation: bool = False, -): - os.chdir(repo_path) - repo = _get_repo_contents(repo_path, repo_config.project, repo_config) - for project in repo.projects: - repo_config.project = project.name - store, registry = _get_store_and_registry(repo_config) - # TODO: When we support multiple projects in a single repo, we should filter repo contents by project - if not skip_source_validation: - provider = store._get_provider() - data_sources = [ - t.batch_source for t in repo.feature_views if t.batch_source is not None - ] - # Make sure the data source used by this feature view is supported by Feast - for data_source in data_sources: - provider.validate_data_source(store.config, data_source) - - registry_diff, infra_diff, _ = store.plan( - repo, skip_feature_view_validation=skip_feature_view_validation - ) - click.echo(registry_diff.to_string()) - click.echo(infra_diff.to_string()) - - -def _get_repo_contents( - repo_path, - project_name: Optional[str] = None, - repo_config: Optional[RepoConfig] = None, -): - sys.dont_write_bytecode = True - repo = parse_repo(repo_path) - - if len(repo.projects) < 1: - if project_name: - print( - f"No project found in the repository. Using project name {project_name} defined in feature_store.yaml" - ) - project_description = ( - repo_config.project_description if repo_config else None - ) - repo.projects.append( - Project(name=project_name, description=project_description or "") - ) - else: - print( - "No project found in the repository. Either define Project in repository or define a project in feature_store.yaml" - ) - sys.exit(1) - elif len(repo.projects) == 1: - if repo.projects[0].name != project_name: - print( - "Project object name should match with the project name defined in feature_store.yaml" - ) - sys.exit(1) - else: - print( - "Multiple projects found in the repository. Currently no support for multiple projects" - ) - sys.exit(1) - - return repo - - -def _get_store_and_registry(repo_config): - store = FeatureStore(config=repo_config) - registry = store.registry - return store, registry - - -def extract_objects_for_apply_delete(project, registry, repo): - # TODO(achals): This code path should be refactored to handle added & kept entities separately. - ( - _, - objs_to_delete, - objs_to_update, - objs_to_add, - ) = extract_objects_for_keep_delete_update_add(registry, project, repo) - - all_to_apply: List[ - Union[ - Entity, - FeatureView, - OnDemandFeatureView, - StreamFeatureView, - FeatureService, - ] - ] = [] - for object_type in FEAST_OBJECT_TYPES: - to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type]) - all_to_apply.extend(to_apply) - - all_to_delete: List[ - Union[ - Entity, - FeatureView, - OnDemandFeatureView, - StreamFeatureView, - FeatureService, - ] - ] = [] - for object_type in FEAST_OBJECT_TYPES: - all_to_delete.extend(objs_to_delete[object_type]) - - return ( - all_to_apply, - all_to_delete, - set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union( - set(objs_to_update[FeastObjectType.FEATURE_VIEW]) - ), - objs_to_delete[FeastObjectType.FEATURE_VIEW], - ) - - -def apply_total_with_repo_instance( - store: FeatureStore, - project_name: str, - registry: BaseRegistry, - repo: RepoContents, - skip_source_validation: bool, - skip_feature_view_validation: bool = False, - no_promote: bool = False, -): - if not skip_source_validation: - provider = store._get_provider() - data_sources = [ - t.batch_source for t in repo.feature_views if t.batch_source is not None - ] - # Make sure the data source used by this feature view is supported by Feast - for data_source in data_sources: - provider.validate_data_source(store.config, data_source) - - # For each object in the registry, determine whether it should be kept or deleted. - ( - all_to_apply, - all_to_delete, - views_to_keep, - views_to_delete, - ) = extract_objects_for_apply_delete(project_name, registry, repo) - - try: - if store._should_use_plan(): - # Planning phase - compute diffs first without progress bars - registry_diff, infra_diff, new_infra = store.plan( - repo, - skip_feature_view_validation=skip_feature_view_validation, - ) - click.echo(registry_diff.to_string()) - - # Only show progress bars if there are actual infrastructure changes - progress_ctx = None - if len(infra_diff.infra_object_diffs) > 0: - from feast.diff.apply_progress import ApplyProgressContext - - progress_ctx = ApplyProgressContext() - progress_ctx.start_overall_progress() - - # Apply phase - store._apply_diffs( - registry_diff, - infra_diff, - new_infra, - progress_ctx=progress_ctx, - no_promote=no_promote, - ) - click.echo(infra_diff.to_string()) - else: - # Legacy apply path - no progress bars for legacy path - store.apply( - all_to_apply, - objects_to_delete=all_to_delete, - partial=False, - skip_feature_view_validation=skip_feature_view_validation, - no_promote=no_promote, - ) - log_infra_changes(views_to_keep, views_to_delete) - finally: - # Cleanup is handled in the new _apply_diffs method - pass - - -def log_infra_changes( - views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView] -): - from colorama import Fore, Style - - for view in views_to_keep: - click.echo( - f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}" - ) - for view in views_to_delete: - click.echo( - f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}" - ) - - -def create_feature_store( - ctx: click.Context, -) -> FeatureStore: - repo = ctx.obj["CHDIR"] - # If we received a base64 encoded version of feature_store.yaml, use that - config_base64 = os.getenv(FEATURE_STORE_YAML_ENV_NAME) - if config_base64: - print("Received base64 encoded feature_store.yaml") - config_bytes = base64.b64decode(config_base64) - # Create a new unique directory for writing feature_store.yaml - repo_path = Path(tempfile.mkdtemp()) - with open(repo_path / "feature_store.yaml", "wb") as f: - f.write(config_bytes) - return FeatureStore(repo_path=str(repo_path.resolve())) - else: - fs_yaml_file = ctx.obj["FS_YAML_FILE"] - cli_check_repo(repo, fs_yaml_file) - return FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file) - - -def apply_total( - repo_config: RepoConfig, - repo_path: Path, - skip_source_validation: bool, - skip_feature_view_validation: bool = False, - no_promote: bool = False, -): - os.chdir(repo_path) - repo = _get_repo_contents(repo_path, repo_config.project, repo_config) - for project in repo.projects: - repo_config.project = project.name - store, registry = _get_store_and_registry(repo_config) - if not is_valid_name(project.name): - print( - f"{project.name} is not valid. Project name should only have " - f"alphanumerical values, underscores, and hyphens but not start with an underscore or hyphen." - ) - sys.exit(1) - # TODO: When we support multiple projects in a single repo, we should filter repo contents by project. Currently there is no way to associate Feast objects to project. - print(f"Applying changes for project {project.name}") - apply_total_with_repo_instance( - store, - project.name, - registry, - repo, - skip_source_validation, - skip_feature_view_validation, - no_promote=no_promote, - ) - - -def teardown(repo_config: RepoConfig, repo_path: Optional[str]): - # Cannot pass in both repo_path and repo_config to FeatureStore. - feature_store = FeatureStore(repo_path=repo_path, config=repo_config) - feature_store.teardown() - - -def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str: - """For debugging only: output contents of the metadata registry""" - registry_config = repo_config.registry - project = repo_config.project - registry = Registry( - project, - registry_config=registry_config, - repo_path=repo_path, - auth_config=repo_config.auth_config, - ) - registry_dict = registry.to_dict(project=project) - return json.dumps(registry_dict, indent=2, sort_keys=True) - - -def cli_check_repo(repo_path: Path, fs_yaml_file: Path): - sys.path.append(str(repo_path)) - if not fs_yaml_file.exists(): - print( - f"Can't find feature repo configuration file at {fs_yaml_file}. " - "Make sure you're running feast from an initialized feast repository." - ) - sys.exit(1) - - -def init_repo(repo_name: str, template: str, repo_path: Optional[str] = None): - import os - from pathlib import Path - from shutil import copytree - - from colorama import Fore, Style - - # Validate project name - if not is_valid_name(repo_name): - raise BadParameter( - message="Name should be alphanumeric values, underscores, and hyphens but not start with an underscore or hyphen", - param_hint="PROJECT_DIRECTORY", - ) - - # Determine where to create the repository - if repo_path: - # User specified a custom path - target_path = Path(repo_path).resolve() - target_path.mkdir(parents=True, exist_ok=True) - display_path = repo_path - else: - # Default behavior: create subdirectory with project name - target_path = Path(os.path.join(Path.cwd(), repo_name)) - target_path.mkdir(exist_ok=True) - display_path = repo_name - - repo_config_path = target_path / "feature_store.yaml" - - if repo_config_path.exists(): - print( - f"The directory {Style.BRIGHT + Fore.GREEN}{display_path}{Style.RESET_ALL} contains an existing feature " - f"store repository that may cause a conflict" - ) - print() - sys.exit(1) - - # Copy template directory - template_path = str(Path(Path(__file__).parent / "templates" / template).absolute()) - if not os.path.exists(template_path): - raise IOError(f"Could not find template {template}") - copytree(template_path, str(target_path), dirs_exist_ok=True) - - # Rename gitignore files back to .gitignore - for gitignore_path in target_path.rglob("gitignore"): - gitignore_path.rename(gitignore_path.with_name(".gitignore")) - - # Seed the repository - bootstrap_path = target_path / "bootstrap.py" - if os.path.exists(bootstrap_path): - import importlib.util - - spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path)) - assert isinstance(spec, ModuleSpec) - bootstrap = importlib.util.module_from_spec(spec) - assert isinstance(spec.loader, Loader) - spec.loader.exec_module(bootstrap) - bootstrap.bootstrap() # type: ignore - os.remove(bootstrap_path) - - # Template the feature_store.yaml file - feature_store_yaml_path = target_path / "feature_repo" / "feature_store.yaml" - replace_str_in_file( - feature_store_yaml_path, "project: my_project", f"project: {repo_name}" - ) - - # Remove the __pycache__ folder if it exists - import shutil - - shutil.rmtree(target_path / "__pycache__", ignore_errors=True) - - import click - - click.echo() - click.echo( - f"Creating a new Feast repository in {Style.BRIGHT + Fore.GREEN}{target_path}{Style.RESET_ALL}." - ) - click.echo() - - -def is_valid_name(name: str) -> bool: - """A name should be alphanumeric values, underscores, and hyphens but not start with an underscore""" - return ( - not name.startswith(("_", "-")) and re.compile(r"[^\w-]+").search(name) is None - ) - - -def generate_project_name() -> str: - """Generates a unique project name""" - return f"{random.choice(adjectives)}_{random.choice(animals)}" +import base64 +import importlib +import json +import logging +import os +import random +import re +import sys +import tempfile +from importlib.abc import Loader +from importlib.machinery import ModuleSpec +from pathlib import Path +from typing import List, Optional, Set, Union + +import click +from click.exceptions import BadParameter + +from feast import PushSource +from feast.batch_feature_view import BatchFeatureView +from feast.constants import FEATURE_STORE_YAML_ENV_NAME +from feast.data_source import DataSource, KafkaSource, KinesisSource +from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_store import FeatureStore +from feast.feature_view import DUMMY_ENTITY, FeatureView +from feast.file_utils import replace_str_in_file +from feast.infra.registry.base_registry import BaseRegistry +from feast.infra.registry.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry +from feast.names import adjectives, animals +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.permissions.permission import Permission +from feast.project import Project +from feast.repo_config import RepoConfig +from feast.repo_contents import RepoContents +from feast.stream_feature_view import StreamFeatureView + +logger = logging.getLogger(__name__) + + +def py_path_to_module(path: Path) -> str: + return ( + str(path.relative_to(os.getcwd()))[: -len(".py")] + .replace("./", "") + .replace("/", ".") + .replace("\\", ".") + ) + + +def read_feastignore(repo_root: Path) -> List[str]: + """Read .feastignore in the repo root directory (if exists) and return the list of user-defined ignore paths""" + feast_ignore = repo_root / ".feastignore" + if not feast_ignore.is_file(): + return [] + lines = feast_ignore.read_text().strip().split("\n") + ignore_paths = [] + for line in lines: + # Remove everything after the first occurance of "#" symbol (comments) + if line.find("#") >= 0: + line = line[: line.find("#")] + # Strip leading or ending whitespaces + line = line.strip() + # Add this processed line to ignore_paths if it's not empty + if len(line) > 0: + ignore_paths.append(line) + return ignore_paths + + +def get_ignore_files(repo_root: Path, ignore_paths: List[str]) -> Set[Path]: + """Get all ignore files that match any of the user-defined ignore paths""" + ignore_files = set() + for ignore_path in set(ignore_paths): + # ignore_path may contains matchers (* or **). Use glob() to match user-defined path to actual paths + for matched_path in repo_root.glob(ignore_path): + if matched_path.is_file(): + # If the matched path is a file, add that to ignore_files set + ignore_files.add(matched_path.resolve()) + else: + # Otherwise, list all Python files in that directory and add all of them to ignore_files set + ignore_files |= { + sub_path.resolve() + for sub_path in matched_path.glob("**/*.py") + if sub_path.is_file() + } + return ignore_files + + +def get_repo_files(repo_root: Path) -> List[Path]: + """Get the list of all repo files, ignoring undesired files & directories specified in .feastignore""" + # Read ignore paths from .feastignore and create a set of all files that match any of these paths + ignore_paths = read_feastignore(repo_root) + [ + ".git", + ".feastignore", + ".venv", + "**/.ipynb_checkpoints", + "**/.pytest_cache", + "**/__pycache__", + ] + ignore_files = get_ignore_files(repo_root, ignore_paths) + + # List all Python files in the root directory (recursively) + repo_files = { + p.resolve() + for p in repo_root.glob("**/*.py") + if p.is_file() and "__init__.py" != p.name + } + # Ignore all files that match any of the ignore paths in .feastignore + repo_files -= ignore_files + + # Sort repo_files to read them in the same order every time + return sorted(repo_files) + + +def parse_repo(repo_root: Path) -> RepoContents: + """ + Collects unique Feast object definitions from the given feature repo. + + Specifically, if an object foo has already been added, bar will still be added if + (bar == foo), but not if (bar is foo). This ensures that import statements will + not result in duplicates, but defining two equal objects will. + """ + res = RepoContents( + projects=[], + data_sources=[], + entities=[], + feature_views=[], + feature_services=[], + on_demand_feature_views=[], + stream_feature_views=[], + permissions=[], + ) + + for repo_file in get_repo_files(repo_root): + module_path = py_path_to_module(repo_file) + module = importlib.import_module(module_path) + + for attr_name in dir(module): + obj = getattr(module, attr_name) + + if isinstance(obj, DataSource) and not any( + (obj is ds) for ds in res.data_sources + ): + res.data_sources.append(obj) + + # Handle batch sources defined within stream sources. + if ( + isinstance(obj, PushSource) + or isinstance(obj, KafkaSource) + or isinstance(obj, KinesisSource) + ): + batch_source = obj.batch_source + + if batch_source and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + if ( + isinstance(obj, FeatureView) + and not any((obj is fv) for fv in res.feature_views) + and not isinstance(obj, StreamFeatureView) + and not isinstance(obj, BatchFeatureView) + ): + res.feature_views.append(obj) + + # Handle batch sources defined with feature views. + batch_source = obj.batch_source + if batch_source is not None and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + + # Handle stream sources defined with feature views. + if obj.stream_source: + stream_source = obj.stream_source + if not any((stream_source is ds) for ds in res.data_sources): + res.data_sources.append(stream_source) + elif isinstance(obj, StreamFeatureView) and not any( + (obj is sfv) for sfv in res.stream_feature_views + ): + res.stream_feature_views.append(obj) + + # Handle batch sources defined with feature views. + batch_source = obj.batch_source + if batch_source is not None and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + assert obj.stream_source + stream_source = obj.stream_source + if not any((stream_source is ds) for ds in res.data_sources): + res.data_sources.append(stream_source) + elif isinstance(obj, BatchFeatureView) and not any( + (obj is bfv) for bfv in res.feature_views + ): + res.feature_views.append(obj) + + # Handle batch sources defined with feature views. + batch_source = obj.batch_source + if batch_source is not None and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + elif isinstance(obj, Entity) and not any( + (obj is entity) for entity in res.entities + ): + res.entities.append(obj) + elif isinstance(obj, FeatureService) and not any( + (obj is fs) for fs in res.feature_services + ): + res.feature_services.append(obj) + elif isinstance(obj, OnDemandFeatureView) and not any( + (obj is odfv) for odfv in res.on_demand_feature_views + ): + res.on_demand_feature_views.append(obj) + elif isinstance(obj, Permission) and not any( + (obj is p) for p in res.permissions + ): + res.permissions.append(obj) + elif isinstance(obj, Project) and not any((obj is p) for p in res.projects): + res.projects.append(obj) + + res.entities.append(DUMMY_ENTITY) + return res + + +def plan( + repo_config: RepoConfig, + repo_path: Path, + skip_source_validation: bool, + skip_feature_view_validation: bool = False, +): + os.chdir(repo_path) + repo = _get_repo_contents(repo_path, repo_config.project, repo_config) + for project in repo.projects: + repo_config.project = project.name + store, registry = _get_store_and_registry(repo_config) + # TODO: When we support multiple projects in a single repo, we should filter repo contents by project + if not skip_source_validation: + provider = store._get_provider() + data_sources = [ + t.batch_source for t in repo.feature_views if t.batch_source is not None + ] + # Make sure the data source used by this feature view is supported by Feast + for data_source in data_sources: + provider.validate_data_source(store.config, data_source) + + registry_diff, infra_diff, _ = store.plan( + repo, skip_feature_view_validation=skip_feature_view_validation + ) + click.echo(registry_diff.to_string()) + click.echo(infra_diff.to_string()) + + +def _get_repo_contents( + repo_path, + project_name: Optional[str] = None, + repo_config: Optional[RepoConfig] = None, +): + sys.dont_write_bytecode = True + repo = parse_repo(repo_path) + + if len(repo.projects) < 1: + if project_name: + print( + f"No project found in the repository. Using project name {project_name} defined in feature_store.yaml" + ) + project_description = ( + repo_config.project_description if repo_config else None + ) + repo.projects.append( + Project(name=project_name, description=project_description or "") + ) + else: + print( + "No project found in the repository. Either define Project in repository or define a project in feature_store.yaml" + ) + sys.exit(1) + elif len(repo.projects) == 1: + if repo.projects[0].name != project_name: + print( + "Project object name should match with the project name defined in feature_store.yaml" + ) + sys.exit(1) + else: + print( + "Multiple projects found in the repository. Currently no support for multiple projects" + ) + sys.exit(1) + + return repo + + +def _get_store_and_registry(repo_config): + store = FeatureStore(config=repo_config) + registry = store.registry + return store, registry + + +def extract_objects_for_apply_delete(project, registry, repo): + # TODO(achals): This code path should be refactored to handle added & kept entities separately. + ( + _, + objs_to_delete, + objs_to_update, + objs_to_add, + ) = extract_objects_for_keep_delete_update_add(registry, project, repo) + + all_to_apply: List[ + Union[ + Entity, + FeatureView, + OnDemandFeatureView, + StreamFeatureView, + FeatureService, + ] + ] = [] + for object_type in FEAST_OBJECT_TYPES: + to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type]) + all_to_apply.extend(to_apply) + + all_to_delete: List[ + Union[ + Entity, + FeatureView, + OnDemandFeatureView, + StreamFeatureView, + FeatureService, + ] + ] = [] + for object_type in FEAST_OBJECT_TYPES: + all_to_delete.extend(objs_to_delete[object_type]) + + return ( + all_to_apply, + all_to_delete, + set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union( + set(objs_to_update[FeastObjectType.FEATURE_VIEW]) + ), + objs_to_delete[FeastObjectType.FEATURE_VIEW], + ) + + +def apply_total_with_repo_instance( + store: FeatureStore, + project_name: str, + registry: BaseRegistry, + repo: RepoContents, + skip_source_validation: bool, + skip_feature_view_validation: bool = False, + no_promote: bool = False, +): + if not skip_source_validation: + provider = store._get_provider() + data_sources = [ + t.batch_source for t in repo.feature_views if t.batch_source is not None + ] + # Make sure the data source used by this feature view is supported by Feast + for data_source in data_sources: + provider.validate_data_source(store.config, data_source) + + # For each object in the registry, determine whether it should be kept or deleted. + ( + all_to_apply, + all_to_delete, + views_to_keep, + views_to_delete, + ) = extract_objects_for_apply_delete(project_name, registry, repo) + + try: + if store._should_use_plan(): + # Planning phase - compute diffs first without progress bars + registry_diff, infra_diff, new_infra = store.plan( + repo, + skip_feature_view_validation=skip_feature_view_validation, + ) + click.echo(registry_diff.to_string()) + + # Only show progress bars if there are actual infrastructure changes + progress_ctx = None + if len(infra_diff.infra_object_diffs) > 0: + from feast.diff.apply_progress import ApplyProgressContext + + progress_ctx = ApplyProgressContext() + progress_ctx.start_overall_progress() + + # Apply phase + store._apply_diffs( + registry_diff, + infra_diff, + new_infra, + progress_ctx=progress_ctx, + no_promote=no_promote, + ) + click.echo(infra_diff.to_string()) + else: + # Legacy apply path - no progress bars for legacy path + store.apply( + all_to_apply, + objects_to_delete=all_to_delete, + partial=False, + skip_feature_view_validation=skip_feature_view_validation, + no_promote=no_promote, + ) + log_infra_changes(views_to_keep, views_to_delete) + finally: + # Cleanup is handled in the new _apply_diffs method + pass + + _submit_baseline_jobs_if_needed(store, project_name, repo) + + +def _submit_baseline_jobs_if_needed(store, project_name, repo): + """Submit async baseline DQM jobs for new features after feast apply.""" + dqm = store.config.dqm_config + if dqm is not None and not dqm.auto_baseline: + return + + try: + from feast.monitoring.monitoring_service import MonitoringService + + svc = MonitoringService(store) + feature_views = list(repo.feature_views) + if not feature_views: + return + + job_ids = svc.submit_baseline_for_new_features( + project=project_name, feature_views=feature_views + ) + for job_id in job_ids: + click.echo(f" → Queued baseline metrics computation (DQM job: {job_id})") + except Exception: + logging.getLogger(__name__).debug( + "Monitoring baseline submission skipped (non-critical)", exc_info=True + ) + + +def log_infra_changes( + views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView] +): + from colorama import Fore, Style + + for view in views_to_keep: + click.echo( + f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}" + ) + for view in views_to_delete: + click.echo( + f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}" + ) + + +def create_feature_store( + ctx: click.Context, +) -> FeatureStore: + repo = ctx.obj["CHDIR"] + # If we received a base64 encoded version of feature_store.yaml, use that + config_base64 = os.getenv(FEATURE_STORE_YAML_ENV_NAME) + if config_base64: + print("Received base64 encoded feature_store.yaml") + config_bytes = base64.b64decode(config_base64) + # Create a new unique directory for writing feature_store.yaml + repo_path = Path(tempfile.mkdtemp()) + with open(repo_path / "feature_store.yaml", "wb") as f: + f.write(config_bytes) + return FeatureStore(repo_path=str(repo_path.resolve())) + else: + fs_yaml_file = ctx.obj["FS_YAML_FILE"] + cli_check_repo(repo, fs_yaml_file) + return FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file) + + +def apply_total( + repo_config: RepoConfig, + repo_path: Path, + skip_source_validation: bool, + skip_feature_view_validation: bool = False, + no_promote: bool = False, +): + os.chdir(repo_path) + repo = _get_repo_contents(repo_path, repo_config.project, repo_config) + for project in repo.projects: + repo_config.project = project.name + store, registry = _get_store_and_registry(repo_config) + if not is_valid_name(project.name): + print( + f"{project.name} is not valid. Project name should only have " + f"alphanumerical values, underscores, and hyphens but not start with an underscore or hyphen." + ) + sys.exit(1) + # TODO: When we support multiple projects in a single repo, we should filter repo contents by project. Currently there is no way to associate Feast objects to project. + print(f"Applying changes for project {project.name}") + apply_total_with_repo_instance( + store, + project.name, + registry, + repo, + skip_source_validation, + skip_feature_view_validation, + no_promote=no_promote, + ) + + +def teardown(repo_config: RepoConfig, repo_path: Optional[str]): + # Cannot pass in both repo_path and repo_config to FeatureStore. + feature_store = FeatureStore(repo_path=repo_path, config=repo_config) + feature_store.teardown() + + +def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str: + """For debugging only: output contents of the metadata registry""" + registry_config = repo_config.registry + project = repo_config.project + registry = Registry( + project, + registry_config=registry_config, + repo_path=repo_path, + auth_config=repo_config.auth_config, + ) + registry_dict = registry.to_dict(project=project) + return json.dumps(registry_dict, indent=2, sort_keys=True) + + +def cli_check_repo(repo_path: Path, fs_yaml_file: Path): + sys.path.append(str(repo_path)) + if not fs_yaml_file.exists(): + print( + f"Can't find feature repo configuration file at {fs_yaml_file}. " + "Make sure you're running feast from an initialized feast repository." + ) + sys.exit(1) + + +def init_repo(repo_name: str, template: str, repo_path: Optional[str] = None): + import os + from pathlib import Path + from shutil import copytree + + from colorama import Fore, Style + + # Validate project name + if not is_valid_name(repo_name): + raise BadParameter( + message="Name should be alphanumeric values, underscores, and hyphens but not start with an underscore or hyphen", + param_hint="PROJECT_DIRECTORY", + ) + + # Determine where to create the repository + if repo_path: + # User specified a custom path + target_path = Path(repo_path).resolve() + target_path.mkdir(parents=True, exist_ok=True) + display_path = repo_path + else: + # Default behavior: create subdirectory with project name + target_path = Path(os.path.join(Path.cwd(), repo_name)) + target_path.mkdir(exist_ok=True) + display_path = repo_name + + repo_config_path = target_path / "feature_store.yaml" + + if repo_config_path.exists(): + print( + f"The directory {Style.BRIGHT + Fore.GREEN}{display_path}{Style.RESET_ALL} contains an existing feature " + f"store repository that may cause a conflict" + ) + print() + sys.exit(1) + + # Copy template directory + template_path = str(Path(Path(__file__).parent / "templates" / template).absolute()) + if not os.path.exists(template_path): + raise IOError(f"Could not find template {template}") + copytree(template_path, str(target_path), dirs_exist_ok=True) + + # Rename gitignore files back to .gitignore + for gitignore_path in target_path.rglob("gitignore"): + gitignore_path.rename(gitignore_path.with_name(".gitignore")) + + # Seed the repository + bootstrap_path = target_path / "bootstrap.py" + if os.path.exists(bootstrap_path): + import importlib.util + + spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path)) + assert isinstance(spec, ModuleSpec) + bootstrap = importlib.util.module_from_spec(spec) + assert isinstance(spec.loader, Loader) + spec.loader.exec_module(bootstrap) + bootstrap.bootstrap() # type: ignore + os.remove(bootstrap_path) + + # Template the feature_store.yaml file + feature_store_yaml_path = target_path / "feature_repo" / "feature_store.yaml" + replace_str_in_file( + feature_store_yaml_path, "project: my_project", f"project: {repo_name}" + ) + + # Remove the __pycache__ folder if it exists + import shutil + + shutil.rmtree(target_path / "__pycache__", ignore_errors=True) + + import click + + click.echo() + click.echo( + f"Creating a new Feast repository in {Style.BRIGHT + Fore.GREEN}{target_path}{Style.RESET_ALL}." + ) + click.echo() + + +def is_valid_name(name: str) -> bool: + """A name should be alphanumeric values, underscores, and hyphens but not start with an underscore""" + return ( + not name.startswith(("_", "-")) and re.compile(r"[^\w-]+").search(name) is None + ) + + +def generate_project_name() -> str: + """Generates a unique project name""" + return f"{random.choice(adjectives)}_{random.choice(animals)}" diff --git a/sdk/python/tests/integration/monitoring/__init__.py b/sdk/python/tests/integration/monitoring/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/sdk/python/tests/integration/monitoring/__init__.py @@ -0,0 +1 @@ + diff --git a/sdk/python/tests/integration/monitoring/test_monitoring_integration.py b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py new file mode 100644 index 00000000000..213244e1010 --- /dev/null +++ b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py @@ -0,0 +1,1039 @@ +"""Integration tests for the monitoring feature. + +Tests cover: +- Auto-compute (all granularities from source timestamps) +- Compute baseline (idempotent) +- Transient compute +- DQM job lifecycle +- CLI commands +- REST API endpoints +- RBAC enforcement +- Compute engine dispatch (SQL push-down vs Python fallback) +- Log source monitoring (feature serving logs) +""" + +from datetime import date, datetime, timezone +from unittest.mock import MagicMock, patch + +import pyarrow as pa +import pytest +from click.testing import CliRunner + +from feast.monitoring.monitoring_service import VALID_GRANULARITIES, MonitoringService +from feast.types import PrimitiveFeastType + +# ------------------------------------------------------------------ # +# Shared helpers +# ------------------------------------------------------------------ # + + +def _make_feature_field(name, dtype): + field = MagicMock() + field.name = name + field.dtype = dtype + return field + + +def _make_feature_view(name, features, entities=None, batch_source=None): + fv = MagicMock() + fv.name = name + fv.features = features + fv.entities = entities or [] + if batch_source is None: + batch_source = MagicMock() + batch_source.timestamp_field = "event_timestamp" + batch_source.created_timestamp_column = "" + fv.batch_source = batch_source + return fv + + +def _make_feature_service(name, fv_names, logging_config=None, feature_map=None): + """Create a mock FeatureService. + + Args: + feature_map: optional dict mapping view_name -> list of feature names. + Used to build realistic projections with features and name_to_use(). + """ + fs = MagicMock() + fs.name = name + fs.feature_view_projections = [MagicMock(name=n) for n in fv_names] + for proj, n in zip(fs.feature_view_projections, fv_names): + proj.name = n + proj.name_to_use.return_value = n + if feature_map and n in feature_map: + feats = [] + for fname in feature_map[n]: + f = MagicMock() + f.name = fname + feats.append(f) + proj.features = feats + else: + proj.features = [] + fs.logging_config = logging_config + return fs + + +def _make_logging_config_with_source(log_table_schema): + """Create a mock LoggingConfig whose destination.to_data_source() returns a DataSource.""" + logging_config = MagicMock() + mock_data_source = MagicMock() + mock_data_source.timestamp_field = "__log_timestamp" + mock_data_source.created_timestamp_column = "" + logging_config.destination.to_data_source.return_value = mock_data_source + return logging_config, mock_data_source + + +def _make_mock_store(feature_views, feature_services=None): + """Create a mock FeatureStore with offline store that uses Python fallback.""" + store = MagicMock() + store.project = "test_project" + store.config.project = "test_project" + store.config.offline_store = MagicMock() + + store.registry.list_feature_views.return_value = feature_views + store.registry.list_entities.return_value = [] + store.registry.list_feature_services.return_value = feature_services or [] + + if feature_views: + store.registry.get_feature_view.return_value = feature_views[0] + + if feature_services: + store.registry.get_feature_service.return_value = feature_services[0] + + arrow_table = pa.table( + { + "conv_rate": [0.1, 0.5, 0.9, 0.3, 0.7], + "acc_rate": [0.8, 0.6, 0.4, 0.9, 0.2], + "city": ["NYC", "LA", "NYC", "SF", "LA"], + "event_timestamp": [ + datetime(2025, 3, 25, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + ], + } + ) + + mock_retrieval = MagicMock() + mock_retrieval.to_arrow.return_value = arrow_table + + provider = MagicMock() + provider.offline_store.pull_all_from_table_or_query.return_value = mock_retrieval + provider.offline_store.compute_monitoring_metrics.side_effect = NotImplementedError + provider.offline_store.get_monitoring_max_timestamp.side_effect = ( + NotImplementedError + ) + + job_store = {} + + def _mock_save(config, metric_type, metrics): + if metric_type == "job": + for m in metrics: + job_store[m["job_id"]] = dict(m) + + def _mock_query(config, project, metric_type, filters=None, **kwargs): + if metric_type == "job": + results = list(job_store.values()) + if filters: + for key, value in filters.items(): + if value is not None: + results = [r for r in results if r.get(key) == value] + return results + return [] + + provider.offline_store.ensure_monitoring_tables.return_value = None + provider.offline_store.save_monitoring_metrics.side_effect = _mock_save + provider.offline_store.query_monitoring_metrics.side_effect = _mock_query + provider.offline_store.clear_monitoring_baseline.return_value = None + + store._get_provider.return_value = provider + + return store + + +# ------------------------------------------------------------------ # +# Test: Auto-compute +# ------------------------------------------------------------------ # + + +class TestAutoCompute: + def test_auto_compute_all_granularities(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.auto_compute(project="test_project") + + assert result["status"] == "completed" + assert result["computed_feature_views"] == 1 + assert len(result["granularities"]) == len(VALID_GRANULARITIES) + for g in VALID_GRANULARITIES: + assert g in result["granularities"] + + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_called() + + def test_auto_compute_specific_view(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.auto_compute( + project="test_project", + feature_view_name="driver_stats", + ) + + assert result["status"] == "completed" + assert result["computed_feature_views"] == 1 + + +# ------------------------------------------------------------------ # +# Test: Compute baseline +# ------------------------------------------------------------------ # + + +class TestComputeBaseline: + def test_compute_baseline_for_new_features(self): + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.compute_baseline(project="test_project") + + assert result["status"] == "completed" + assert result["is_baseline"] is True + assert result["computed_features"] == 2 + + provider = store._get_provider.return_value + provider.offline_store.clear_monitoring_baseline.assert_called() + provider.offline_store.save_monitoring_metrics.assert_called() + + def test_baseline_idempotent_skips_existing(self): + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("acc_rate", PrimitiveFeastType.FLOAT64), + ], + ) + store = _make_mock_store([fv]) + + # Simulate conv_rate already has baseline via query_monitoring_metrics + provider = store._get_provider.return_value + existing_baseline = { + "project_id": "test_project", + "feature_view_name": "driver_stats", + "feature_name": "conv_rate", + "metric_date": "2025-01-01", + "granularity": "daily", + "data_source_type": "batch", + "computed_at": datetime.now(timezone.utc).isoformat(), + "is_baseline": True, + "feature_type": "numeric", + "row_count": 100, + "null_count": 0, + "null_rate": 0.0, + "mean": 5.0, + "stddev": 1.0, + "min_val": 0.0, + "max_val": 10.0, + "p50": 5.0, + "p75": 7.5, + "p90": 9.0, + "p95": 9.5, + "p99": 9.9, + "histogram": None, + } + + original_side_effect = ( + provider.offline_store.query_monitoring_metrics.side_effect + ) + + def _query_with_baseline(config, project, metric_type, filters=None, **kwargs): + if metric_type == "feature" and filters and filters.get("is_baseline"): + return [existing_baseline] + if original_side_effect: + return original_side_effect( + config, project, metric_type, filters=filters, **kwargs + ) + return [] + + provider.offline_store.query_monitoring_metrics.side_effect = ( + _query_with_baseline + ) + + svc = MonitoringService(store) + result = svc.compute_baseline(project="test_project") + + # Only acc_rate should be computed (conv_rate already has baseline) + assert result["computed_features"] == 1 + + +# ------------------------------------------------------------------ # +# Test: Transient compute +# ------------------------------------------------------------------ # + + +class TestTransientCompute: + def test_transient_returns_metrics_without_saving(self): + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 15), + ) + + assert result["status"] == "completed" + assert result["start_date"] == "2025-01-01" + assert result["end_date"] == "2025-01-15" + assert len(result["metrics"]) == 2 + + # Transient should NOT call save + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_not_called() + + def test_transient_empty_features(self): + fv = _make_feature_view( + "fv", + [_make_feature_field("ts", PrimitiveFeastType.UNIX_TIMESTAMP)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.compute_transient( + project="test_project", + feature_view_name="fv", + ) + assert result["metrics"] == [] + + +# ------------------------------------------------------------------ # +# Test: DQM Job Manager +# ------------------------------------------------------------------ # + + +class TestDQMJobManager: + def _make_manager(self): + from feast.monitoring.dqm_job_manager import DQMJobManager + + stored = {} + + def mock_save(config, metric_type, metrics): + for m in metrics: + stored[m["job_id"]] = dict(m) + + def mock_query(config, project, metric_type, filters=None, **kwargs): + results = list(stored.values()) + if filters: + for key, value in filters.items(): + if value is not None: + results = [r for r in results if r.get(key) == value] + return results + + offline_store = MagicMock() + offline_store.save_monitoring_metrics.side_effect = mock_save + offline_store.query_monitoring_metrics.side_effect = mock_query + return DQMJobManager(offline_store, MagicMock()) + + def test_submit_and_get_job(self): + mgr = self._make_manager() + job_id = mgr.submit( + project="test_project", + job_type="auto_compute", + feature_view_name="driver_stats", + ) + + assert job_id is not None + assert len(job_id) == 36 # UUID format + + job = mgr.get_job(job_id) + assert job is not None + assert job["project_id"] == "test_project" + assert job["job_type"] == "auto_compute" + assert job["status"] == "pending" + + def test_update_status(self): + from feast.monitoring.dqm_job_manager import JOB_STATUS_RUNNING + + mgr = self._make_manager() + job_id = mgr.submit( + project="test_project", + job_type="compute", + ) + mgr.update_status(job_id, JOB_STATUS_RUNNING) + + job = mgr.get_job(job_id) + assert job["status"] == JOB_STATUS_RUNNING + assert job["started_at"] is not None + + +# ------------------------------------------------------------------ # +# Test: CLI +# ------------------------------------------------------------------ # + + +class TestComputeMetricsCLI: + def test_help(self): + from feast.cli.monitor import monitor_cmd + + runner = CliRunner() + result = runner.invoke(monitor_cmd, ["run", "--help"]) + assert result.exit_code == 0 + assert "--granularity" in result.output + assert "--set-baseline" in result.output + assert "--feature-view" in result.output + + @patch("feast.cli.monitor.create_feature_store") + @patch("feast.monitoring.monitoring_service.MonitoringService.auto_compute") + def test_run_auto_mode(self, mock_auto, mock_create_store): + from feast.cli.monitor import monitor_cmd + + mock_store = MagicMock() + mock_store.project = "proj" + mock_create_store.return_value = mock_store + + mock_auto.return_value = { + "status": "completed", + "computed_feature_views": 2, + "computed_features": 5, + "granularities": list(VALID_GRANULARITIES), + "duration_ms": 1200, + } + + runner = CliRunner() + result = runner.invoke(monitor_cmd, ["run"]) + + assert result.exit_code == 0 + assert "Auto-computing" in result.output + assert "Features computed: 5" in result.output + mock_auto.assert_called_once() + + @patch("feast.cli.monitor.create_feature_store") + @patch("feast.monitoring.monitoring_service.MonitoringService.compute_metrics") + def test_run_explicit_granularity(self, mock_compute, mock_create_store): + from feast.cli.monitor import monitor_cmd + + mock_store = MagicMock() + mock_store.project = "proj" + mock_create_store.return_value = mock_store + + mock_compute.return_value = { + "status": "completed", + "granularity": "weekly", + "computed_features": 3, + "computed_feature_views": 1, + "computed_feature_services": 1, + "metric_dates": ["2025-01-01"], + "duration_ms": 500, + } + + runner = CliRunner() + result = runner.invoke( + monitor_cmd, + [ + "run", + "--granularity", + "weekly", + "--start-date", + "2025-01-01", + "--end-date", + "2025-01-07", + ], + ) + + assert result.exit_code == 0 + assert "Granularity: weekly" in result.output + + +# ------------------------------------------------------------------ # +# Test: REST API +# ------------------------------------------------------------------ # + + +class TestRESTEndpoints: + @pytest.fixture + def app(self): + from fastapi import FastAPI + from fastapi.testclient import TestClient + + from feast.api.registry.rest.monitoring import get_monitoring_router + + mock_handler = MagicMock() + mock_server = MagicMock() + + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + mock_server.store = _make_mock_store([fv]) + + app = FastAPI() + app.include_router(get_monitoring_router(mock_handler, mock_server)) + + return TestClient(app), mock_server + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_auto_compute_endpoint(self, mock_perms, app): + client, _ = app + + response = client.post( + "/monitoring/auto_compute", + json={"project": "test_project"}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "completed" + assert "job_id" in data + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_transient_compute_endpoint(self, mock_perms, app): + client, _ = app + + response = client.post( + "/monitoring/compute/transient", + json={ + "project": "test_project", + "feature_view_name": "driver_stats", + "start_date": "2025-01-05", + "end_date": "2025-01-20", + }, + ) + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "completed" + assert len(data["metrics"]) >= 1 + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_get_metrics_with_granularity(self, mock_perms, app): + client, _ = app + + response = client.get( + "/monitoring/metrics/features", + params={"project": "test_project", "granularity": "weekly"}, + ) + + assert response.status_code == 200 + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_get_timeseries(self, mock_perms, app): + client, _ = app + + response = client.get( + "/monitoring/metrics/timeseries", + params={ + "project": "test_project", + "feature_view_name": "driver_stats", + "granularity": "daily", + }, + ) + + assert response.status_code == 200 + + +# ------------------------------------------------------------------ # +# Test: RBAC enforcement +# ------------------------------------------------------------------ # + + +class TestRBACEnforcement: + @pytest.fixture + def app(self): + from fastapi import FastAPI + from fastapi.testclient import TestClient + + from feast.api.registry.rest.monitoring import get_monitoring_router + + mock_handler = MagicMock() + mock_server = MagicMock() + + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + mock_server.store = _make_mock_store([fv]) + + app = FastAPI() + app.include_router(get_monitoring_router(mock_handler, mock_server)) + + return TestClient(app), mock_server + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_compute_requires_update(self, mock_perms, app): + client, _ = app + + from feast.permissions.action import AuthzedAction + + client.post( + "/monitoring/compute", + json={ + "project": "test_project", + "feature_view_name": "driver_stats", + }, + ) + + mock_perms.assert_called() + call_args = mock_perms.call_args + assert AuthzedAction.UPDATE in call_args.kwargs.get( + "actions", call_args[1].get("actions", []) + ) + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_transient_requires_describe(self, mock_perms, app): + client, _ = app + + from feast.permissions.action import AuthzedAction + + client.post( + "/monitoring/compute/transient", + json={ + "project": "test_project", + "feature_view_name": "driver_stats", + }, + ) + + mock_perms.assert_called() + call_args = mock_perms.call_args + assert AuthzedAction.DESCRIBE in call_args.kwargs.get( + "actions", call_args[1].get("actions", []) + ) + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_read_requires_describe(self, mock_perms, app): + client, _ = app + + from feast.permissions.action import AuthzedAction + + client.get( + "/monitoring/metrics/features", + params={"project": "test_project", "feature_view_name": "driver_stats"}, + ) + + mock_perms.assert_called() + call_args = mock_perms.call_args + assert AuthzedAction.DESCRIBE in call_args.kwargs.get( + "actions", call_args[1].get("actions", []) + ) + + +# ------------------------------------------------------------------ # +# Test: SQL push-down dispatch +# ------------------------------------------------------------------ # + + +class TestComputeEngineDispatch: + """Verify that MonitoringService prefers SQL push-down and falls back + to Python-based computation when the offline store doesn't support it.""" + + def _make_store_with_pushdown(self, pushdown_result): + """Create a mock store where the offline store supports push-down.""" + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + store = _make_mock_store([fv]) + provider = store._get_provider.return_value + provider.offline_store.compute_monitoring_metrics.side_effect = None + provider.offline_store.compute_monitoring_metrics.return_value = pushdown_result + provider.offline_store.get_monitoring_max_timestamp.side_effect = None + provider.offline_store.get_monitoring_max_timestamp.return_value = datetime( + 2025, 3, 27, tzinfo=timezone.utc + ) + return store, fv + + def test_uses_sql_pushdown_when_available(self): + """When the offline store supports compute_monitoring_metrics, + pull_all_from_table_or_query should NOT be called.""" + sql_result = [ + { + "feature_name": "conv_rate", + "feature_type": "numeric", + "row_count": 100, + "null_count": 2, + "null_rate": 0.02, + "mean": 0.5, + "stddev": 0.2, + "min_val": 0.0, + "max_val": 1.0, + "p50": 0.5, + "p75": 0.75, + "p90": 0.9, + "p95": 0.95, + "p99": 0.99, + "histogram": { + "bins": [0.0, 0.5, 1.0], + "counts": [50, 50], + "bin_width": 0.5, + }, + }, + ] + store, _ = self._make_store_with_pushdown(sql_result) + svc = MonitoringService(store) + + result = svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + feature_names=["conv_rate"], + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 15), + ) + + assert result["status"] == "completed" + assert len(result["metrics"]) == 1 + assert result["metrics"][0]["mean"] == 0.5 + + provider = store._get_provider.return_value + provider.offline_store.compute_monitoring_metrics.assert_called_once() + provider.offline_store.pull_all_from_table_or_query.assert_not_called() + + def test_falls_back_to_python_when_not_supported(self): + """When compute_monitoring_metrics raises NotImplementedError, + the service falls back to pulling data + Python compute.""" + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + + svc = MonitoringService(store) + result = svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 15), + ) + + assert result["status"] == "completed" + assert len(result["metrics"]) == 1 + assert result["metrics"][0]["feature_name"] == "conv_rate" + + provider = store._get_provider.return_value + provider.offline_store.pull_all_from_table_or_query.assert_called() + + def test_auto_compute_uses_pushdown_for_max_timestamp(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + provider = store._get_provider.return_value + + provider.offline_store.get_monitoring_max_timestamp.side_effect = None + provider.offline_store.get_monitoring_max_timestamp.return_value = datetime( + 2025, 3, 27, tzinfo=timezone.utc + ) + provider.offline_store.compute_monitoring_metrics.side_effect = None + provider.offline_store.compute_monitoring_metrics.return_value = [ + { + "feature_name": "conv_rate", + "feature_type": "numeric", + "row_count": 5, + "null_count": 0, + "null_rate": 0.0, + "mean": 0.5, + "stddev": 0.2, + "min_val": 0.1, + "max_val": 0.9, + "p50": 0.5, + "p75": 0.7, + "p90": 0.9, + "p95": 0.9, + "p99": 0.9, + "histogram": None, + }, + ] + + svc = MonitoringService(store) + result = svc.auto_compute(project="test_project") + + assert result["status"] == "completed" + provider.offline_store.get_monitoring_max_timestamp.assert_called() + provider.offline_store.compute_monitoring_metrics.assert_called() + provider.offline_store.pull_all_from_table_or_query.assert_not_called() + + +# ------------------------------------------------------------------ # +# Test: Native storage dispatch +# ------------------------------------------------------------------ # + + +class TestNativeStorageDispatch: + """Verify that MonitoringService uses OfflineStore for all storage + operations (save, query, clear_baseline, ensure_tables).""" + + def test_save_goes_through_offline_store(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.compute_metrics( + project="test_project", + granularity="daily", + ) + + provider = store._get_provider.return_value + provider.offline_store.ensure_monitoring_tables.assert_called() + provider.offline_store.save_monitoring_metrics.assert_called() + + save_calls = provider.offline_store.save_monitoring_metrics.call_args_list + metric_types_saved = {c[0][1] for c in save_calls} + assert "feature" in metric_types_saved + assert "feature_view" in metric_types_saved + + def test_query_goes_through_offline_store(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.get_feature_metrics(project="test_project", granularity="daily") + + provider = store._get_provider.return_value + provider.offline_store.query_monitoring_metrics.assert_called() + call_args = provider.offline_store.query_monitoring_metrics.call_args + assert call_args[1]["metric_type"] == "feature" + + def test_baseline_clear_goes_through_offline_store(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.compute_baseline(project="test_project") + + provider = store._get_provider.return_value + provider.offline_store.clear_monitoring_baseline.assert_called() + + def test_transient_does_not_save(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + ) + + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_not_called() + + +# ------------------------------------------------------------------ # +# Test: Log source monitoring +# ------------------------------------------------------------------ # + + +class TestLogSourceMonitoring: + """Verify that monitoring can compute metrics from feature serving logs.""" + + # Realistic log column names follow the {view}__{feature} convention + # produced by FeatureServiceLoggingSource.get_schema(). + _LOG_SCHEMA = pa.schema( + [ + ("driver_id", pa.int64()), + ("driver_stats__conv_rate", pa.float64()), + ("driver_stats__conv_rate__timestamp", pa.timestamp("us", tz="UTC")), + ("driver_stats__conv_rate__status", pa.int32()), + ("driver_stats__city", pa.utf8()), + ("driver_stats__city__timestamp", pa.timestamp("us", tz="UTC")), + ("driver_stats__city__status", pa.int32()), + ("__log_timestamp", pa.timestamp("us", tz="UTC")), + ("__log_date", pa.date32()), + ("__request_id", pa.utf8()), + ] + ) + + def _make_log_store(self): + """Create a mock store with a feature service that has logging configured.""" + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + + logging_config, log_data_source = _make_logging_config_with_source( + self._LOG_SCHEMA + ) + + fs = _make_feature_service( + "driver_service", + ["driver_stats"], + logging_config=logging_config, + feature_map={"driver_stats": ["conv_rate", "city"]}, + ) + store = _make_mock_store([fv], feature_services=[fs]) + + log_arrow_table = pa.table( + { + "driver_stats__conv_rate": [0.1, 0.5, 0.9, 0.3, 0.7], + "driver_stats__city": ["NYC", "LA", "NYC", "SF", "LA"], + "__log_timestamp": [ + datetime(2025, 3, 25, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + ], + } + ) + + mock_log_retrieval = MagicMock() + mock_log_retrieval.to_arrow.return_value = log_arrow_table + + provider = store._get_provider.return_value + provider.offline_store.pull_all_from_table_or_query.return_value = ( + mock_log_retrieval + ) + + entity_col = MagicMock() + entity_col.name = "driver_id" + fv.entity_columns = [entity_col] + + return store, fs + + def test_compute_log_metrics(self): + store, fs = self._make_log_store() + svc = MonitoringService(store) + + with patch( + "feast.monitoring.monitoring_service.FeatureServiceLoggingSource" + ) as mock_cls: + mock_instance = MagicMock() + mock_instance.get_schema.return_value = self._LOG_SCHEMA + mock_cls.return_value = mock_instance + + result = svc.compute_log_metrics( + project="test_project", + feature_service_name="driver_service", + start_date=date(2025, 3, 25), + end_date=date(2025, 3, 27), + granularity="daily", + ) + + assert result["status"] == "completed" + assert result["data_source_type"] == "log" + assert result["computed_features"] == 2 + + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_called() + + save_calls = provider.offline_store.save_monitoring_metrics.call_args_list + feature_calls = [c for c in save_calls if c[0][1] == "feature"] + assert len(feature_calls) >= 1 + saved_metrics = feature_calls[0][0][2] + assert all(m["data_source_type"] == "log" for m in saved_metrics) + # Feature names normalized: driver_stats__conv_rate -> conv_rate + saved_names = {m["feature_name"] for m in saved_metrics} + assert saved_names == {"conv_rate", "city"} + # Feature view name is the actual view, not the service + assert all(m["feature_view_name"] == "driver_stats" for m in saved_metrics) + + # Feature service aggregate saved to the service table + svc_calls = [c for c in save_calls if c[0][1] == "feature_service"] + assert len(svc_calls) >= 1 + svc_metric = svc_calls[0][0][2][0] + assert svc_metric["feature_service_name"] == "driver_service" + assert svc_metric["data_source_type"] == "log" + assert svc_metric["total_features"] == 2 + + def test_compute_log_metrics_no_logging_config(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + fs = _make_feature_service("no_log_service", ["driver_stats"]) + fs.logging_config = None + store = _make_mock_store([fv], feature_services=[fs]) + svc = MonitoringService(store) + + result = svc.compute_log_metrics( + project="test_project", + feature_service_name="no_log_service", + ) + + assert result["status"] == "skipped" + assert "no logging configured" in result["reason"] + + def test_auto_compute_log_metrics(self): + store, fs = self._make_log_store() + svc = MonitoringService(store) + + with patch( + "feast.monitoring.monitoring_service.FeatureServiceLoggingSource" + ) as mock_cls: + mock_instance = MagicMock() + mock_instance.get_schema.return_value = self._LOG_SCHEMA + mock_cls.return_value = mock_instance + + result = svc.auto_compute_log_metrics(project="test_project") + + assert result["status"] == "completed" + assert result["data_source_type"] == "log" + assert result["computed_feature_services"] == 1 + assert len(result["granularities"]) == len(VALID_GRANULARITIES) + + def test_log_metrics_tagged_differently_from_batch(self): + """Log metrics should have data_source_type='log', batch should have 'batch'.""" + store, fs = self._make_log_store() + svc = MonitoringService(store) + + with patch( + "feast.monitoring.monitoring_service.FeatureServiceLoggingSource" + ) as mock_cls: + mock_instance = MagicMock() + mock_instance.get_schema.return_value = self._LOG_SCHEMA + mock_cls.return_value = mock_instance + + svc.compute_log_metrics( + project="test_project", + feature_service_name="driver_service", + granularity="daily", + ) + + provider = store._get_provider.return_value + save_calls = provider.offline_store.save_monitoring_metrics.call_args_list + feature_calls = [c for c in save_calls if c[0][1] == "feature"] + for call in feature_calls: + for m in call[0][2]: + assert m["data_source_type"] == "log" + assert m["feature_view_name"] == "driver_stats" + assert m["feature_name"] in ("conv_rate", "city") diff --git a/sdk/python/tests/unit/monitoring/__init__.py b/sdk/python/tests/unit/monitoring/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/sdk/python/tests/unit/monitoring/__init__.py @@ -0,0 +1 @@ + diff --git a/sdk/python/tests/unit/monitoring/test_compute_correctness.py b/sdk/python/tests/unit/monitoring/test_compute_correctness.py new file mode 100644 index 00000000000..9f9005380df --- /dev/null +++ b/sdk/python/tests/unit/monitoring/test_compute_correctness.py @@ -0,0 +1,1831 @@ +""" +Compute correctness tests for monitoring metric calculations. + +Verifies that each offline store backend's SQL/compute helpers produce +mathematically correct results for a known golden dataset. + +- DuckDB and Dask tests run fully in-memory with zero external dependencies. +- PostgreSQL tests require a live Postgres instance (skipped if unavailable). +- Snowflake, BigQuery, Redshift, Spark, Oracle tests require their respective + backends (skipped if unavailable). +""" + +import statistics +from datetime import datetime, timezone +from typing import Any, Dict + +import pyarrow as pa +import pytest + +# --------------------------------------------------------------------------- +# Golden dataset: known values with hand-computable statistics +# --------------------------------------------------------------------------- + +NUMERIC_VALUES = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] +NUMERIC_WITH_NULLS = [1.0, None, 3.0, None, 5.0, None, 7.0, None, 9.0, None] +CATEGORICAL_VALUES = ["a", "b", "a", "c", "a", "b", "d", "a", "b", "c"] + +ROW_COUNT = len(NUMERIC_VALUES) +NON_NULL_VALUES = [v for v in NUMERIC_WITH_NULLS if v is not None] + + +def _expected_numeric_stats(): + """Hand-computed expected values for NUMERIC_VALUES = [1..10].""" + vals = NUMERIC_VALUES + return { + "row_count": 10, + "null_count": 0, + "null_rate": 0.0, + "mean": 5.5, + "stddev": statistics.stdev(vals), # sample stddev ≈ 3.0277 + "min_val": 1.0, + "max_val": 10.0, + "p50": 5.5, + "p75": 7.75, + "p90": 9.1, + "p95": 9.55, + "p99": 9.91, + } + + +def _expected_numeric_with_nulls_stats(): + """Hand-computed expected values for NUMERIC_WITH_NULLS.""" + vals = NON_NULL_VALUES # [1, 3, 5, 7, 9] + return { + "row_count": 10, + "null_count": 5, + "null_rate": 0.5, + "mean": 5.0, + "stddev": statistics.stdev(vals), # sample stddev ≈ 3.1623 + "min_val": 1.0, + "max_val": 9.0, + } + + +def _expected_categorical_stats(): + """Expected values for CATEGORICAL_VALUES.""" + return { + "row_count": 10, + "null_count": 0, + "null_rate": 0.0, + "unique_count": 4, + "top_value": "a", + "top_count": 4, + } + + +# --------------------------------------------------------------------------- +# Shared assertions: validate any backend's numeric/categorical result +# --------------------------------------------------------------------------- + + +def assert_numeric_correctness( + result: Dict[str, Any], expected: Dict, label: str, approx_percentiles: bool = False +): + """Assert that a numeric result from any backend matches expected values.""" + assert result["feature_type"] == "numeric", f"{label}: wrong feature_type" + assert result["row_count"] == expected["row_count"], f"{label}: wrong row_count" + assert result["null_count"] == expected["null_count"], f"{label}: wrong null_count" + assert result["null_rate"] == pytest.approx(expected["null_rate"], abs=1e-6), ( + f"{label}: wrong null_rate" + ) + assert result["mean"] == pytest.approx(expected["mean"], abs=1e-4), ( + f"{label}: wrong mean" + ) + assert result["stddev"] == pytest.approx(expected["stddev"], abs=0.05), ( + f"{label}: wrong stddev" + ) + assert result["min_val"] == pytest.approx(expected["min_val"], abs=1e-6), ( + f"{label}: wrong min_val" + ) + assert result["max_val"] == pytest.approx(expected["max_val"], abs=1e-6), ( + f"{label}: wrong max_val" + ) + + if "p50" in expected and not approx_percentiles: + assert result["p50"] == pytest.approx(expected["p50"], abs=0.5), ( + f"{label}: wrong p50" + ) + assert result["p75"] == pytest.approx(expected["p75"], abs=0.5), ( + f"{label}: wrong p75" + ) + assert result["p90"] == pytest.approx(expected["p90"], abs=0.5), ( + f"{label}: wrong p90" + ) + + # Percentile ordering is always enforced + assert result["p50"] <= result["p75"], f"{label}: p50 > p75" + assert result["p75"] <= result["p90"], f"{label}: p75 > p90" + assert result["p90"] <= result["p95"], f"{label}: p90 > p95" + assert result["p95"] <= result["p99"], f"{label}: p95 > p99" + assert result["p50"] >= result["min_val"], f"{label}: p50 < min" + assert result["p99"] <= result["max_val"], f"{label}: p99 > max" + + +def assert_histogram_correctness( + result: Dict[str, Any], label: str, expected_bins: int = 5 +): + """Assert that a numeric histogram has consistent structure and totals.""" + hist = result.get("histogram") + assert hist is not None, f"{label}: histogram is None" + assert len(hist["counts"]) == expected_bins, f"{label}: wrong number of bins" + assert len(hist["bins"]) == expected_bins + 1, f"{label}: wrong number of bin edges" + total = sum(hist["counts"]) + non_null = result["row_count"] - result["null_count"] + assert total == non_null, f"{label}: histogram total {total} != non_null {non_null}" + assert hist["bins"][0] <= result["min_val"], f"{label}: first bin edge > min_val" + assert hist["bins"][-1] >= result["max_val"], f"{label}: last bin edge < max_val" + + +def assert_categorical_correctness(result: Dict[str, Any], expected: Dict, label: str): + """Assert that a categorical result from any backend matches expected values.""" + assert result["feature_type"] == "categorical", f"{label}: wrong feature_type" + assert result["row_count"] == expected["row_count"], f"{label}: wrong row_count" + assert result["null_count"] == expected["null_count"], f"{label}: wrong null_count" + assert result["null_rate"] == pytest.approx(expected["null_rate"], abs=1e-6), ( + f"{label}: wrong null_rate" + ) + + hist = result["histogram"] + assert hist is not None, f"{label}: histogram is None" + assert hist["unique_count"] == expected["unique_count"], ( + f"{label}: wrong unique_count" + ) + + top_entry = hist["values"][0] + assert top_entry["value"] == expected["top_value"], f"{label}: wrong top value" + assert top_entry["count"] == expected["top_count"], f"{label}: wrong top count" + + total = sum(e["count"] for e in hist["values"]) + hist["other_count"] + expected_total = expected["row_count"] - expected["null_count"] + assert total == expected_total, ( + f"{label}: categorical total {total} != expected {expected_total}" + ) + + +# =================================================================== +# DuckDB compute correctness tests (fully in-memory, no external deps) +# =================================================================== + + +class TestDuckDBComputeCorrectness: + """Test DuckDB SQL helper functions produce correct metric values.""" + + @pytest.fixture(autouse=True) + def setup_duckdb(self): + duckdb = pytest.importorskip("duckdb") + self.conn = duckdb.connect() + + self.conn.execute(""" + CREATE TABLE test_data ( + event_timestamp TIMESTAMP, + numeric_col DOUBLE, + numeric_with_nulls DOUBLE, + categorical_col VARCHAR + ) + """) + + ts = datetime(2025, 1, 15, 12, 0, 0) + for i in range(ROW_COUNT): + n_val = NUMERIC_VALUES[i] + n_null = NUMERIC_WITH_NULLS[i] + c_val = CATEGORICAL_VALUES[i] + n_null_sql = f"{n_null}" if n_null is not None else "NULL" + self.conn.execute( + f"INSERT INTO test_data VALUES " + f"(TIMESTAMP '{ts.strftime('%Y-%m-%d %H:%M:%S')}', " + f"{n_val}, {n_null_sql}, '{c_val}')" + ) + yield + self.conn.close() + + def test_numeric_stats_basic(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + + results = _duckdb_numeric_stats( + self.conn, + "test_data", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + result = results[0] + expected = _expected_numeric_stats() + assert_numeric_correctness(result, expected, "duckdb_numeric") + assert_histogram_correctness(result, "duckdb_numeric", expected_bins=5) + + def test_numeric_stats_with_nulls(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + + results = _duckdb_numeric_stats( + self.conn, + "test_data", + ["numeric_with_nulls"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + result = results[0] + expected = _expected_numeric_with_nulls_stats() + assert_numeric_correctness(result, expected, "duckdb_numeric_nulls") + assert_histogram_correctness(result, "duckdb_numeric_nulls", expected_bins=5) + + def test_numeric_multiple_features(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + + results = _duckdb_numeric_stats( + self.conn, + "test_data", + ["numeric_col", "numeric_with_nulls"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 2 + assert results[0]["feature_name"] == "numeric_col" + assert results[1]["feature_name"] == "numeric_with_nulls" + assert results[0]["mean"] == pytest.approx(5.5, abs=1e-4) + assert results[1]["mean"] == pytest.approx(5.0, abs=1e-4) + assert results[0]["null_count"] == 0 + assert results[1]["null_count"] == 5 + + def test_categorical_stats(self): + from feast.infra.offline_stores.duckdb import _duckdb_categorical_stats + + result = _duckdb_categorical_stats( + self.conn, + "test_data", + "categorical_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "duckdb_categorical") + + def test_categorical_top_n_truncation(self): + from feast.infra.offline_stores.duckdb import _duckdb_categorical_stats + + result = _duckdb_categorical_stats( + self.conn, + "test_data", + "categorical_col", + "1=1", + top_n=2, + ) + + assert len(result["histogram"]["values"]) == 2 + assert result["histogram"]["other_count"] > 0 + total = ( + sum(e["count"] for e in result["histogram"]["values"]) + + result["histogram"]["other_count"] + ) + assert total == 10 + + def test_histogram_bin_edges_cover_range(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_histogram + + hist = _duckdb_numeric_histogram( + self.conn, + "test_data", + "numeric_col", + "1=1", + bins=5, + min_val=1.0, + max_val=10.0, + ) + + assert hist["bins"][0] == pytest.approx(1.0, abs=1e-6) + assert hist["bins"][-1] == pytest.approx(10.0, abs=0.1) + assert sum(hist["counts"]) == 10 + assert hist["bin_width"] == pytest.approx(1.8, abs=0.01) + + def test_histogram_single_value(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_histogram + + self.conn.execute(""" + CREATE TABLE single_val (event_timestamp TIMESTAMP, v DOUBLE) + """) + self.conn.execute( + "INSERT INTO single_val VALUES (TIMESTAMP '2025-01-15 12:00:00', 42.0)" + ) + + hist = _duckdb_numeric_histogram( + self.conn, + "single_val", + "v", + "1=1", + bins=5, + min_val=42.0, + max_val=42.0, + ) + + assert hist["counts"] == [1] + assert hist["bin_width"] == 0.0 + + def test_empty_table(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + + self.conn.execute(""" + CREATE TABLE empty_tbl (event_timestamp TIMESTAMP, v DOUBLE) + """) + results = _duckdb_numeric_stats( + self.conn, + "empty_tbl", + ["v"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + assert results[0]["row_count"] == 0 + assert results[0]["mean"] is None + assert results[0]["histogram"] is None + + def test_stddev_with_single_row(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + + self.conn.execute(""" + CREATE TABLE one_row (event_timestamp TIMESTAMP, v DOUBLE) + """) + self.conn.execute( + "INSERT INTO one_row VALUES (TIMESTAMP '2025-01-15 12:00:00', 7.0)" + ) + results = _duckdb_numeric_stats( + self.conn, + "one_row", + ["v"], + "1=1", + histogram_bins=5, + ) + + assert results[0]["mean"] == pytest.approx(7.0) + assert results[0]["min_val"] == 7.0 + assert results[0]["max_val"] == 7.0 + # STDDEV_SAMP of a single value is NULL + assert results[0]["stddev"] is None + + def test_large_dataset_percentiles(self): + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + + self.conn.execute(""" + CREATE TABLE large_tbl (event_timestamp TIMESTAMP, v DOUBLE) + """) + for i in range(1, 1001): + self.conn.execute( + f"INSERT INTO large_tbl VALUES " + f"(TIMESTAMP '2025-01-15 12:00:00', {float(i)})" + ) + + results = _duckdb_numeric_stats( + self.conn, + "large_tbl", + ["v"], + "1=1", + histogram_bins=10, + ) + + r = results[0] + assert r["mean"] == pytest.approx(500.5, abs=0.1) + assert r["min_val"] == 1.0 + assert r["max_val"] == 1000.0 + assert r["p50"] == pytest.approx(500.5, abs=5.0) + assert r["p90"] == pytest.approx(900.0, abs=10.0) + assert r["p99"] == pytest.approx(990.0, abs=10.0) + assert_histogram_correctness(r, "duckdb_large", expected_bins=10) + + +# =================================================================== +# Dask (PyArrow) compute correctness tests (no external deps) +# =================================================================== + + +class TestDaskComputeCorrectness: + """Test Dask/PyArrow compute helpers produce correct metric values.""" + + def test_numeric_stats_basic(self): + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + + col = pa.chunked_array([pa.array(NUMERIC_VALUES, type=pa.float64())]) + result = _dask_compute_numeric_metrics(col, histogram_bins=5) + + expected = _expected_numeric_stats() + result["feature_name"] = "test" + assert_numeric_correctness(result, expected, "dask_numeric") + assert_histogram_correctness(result, "dask_numeric", expected_bins=5) + + def test_numeric_stats_with_nulls(self): + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + + col = pa.chunked_array([pa.array(NUMERIC_WITH_NULLS, type=pa.float64())]) + result = _dask_compute_numeric_metrics(col, histogram_bins=5) + result["feature_name"] = "test" + + expected = _expected_numeric_with_nulls_stats() + assert_numeric_correctness(result, expected, "dask_numeric_nulls") + assert_histogram_correctness(result, "dask_numeric_nulls", expected_bins=5) + + def test_numeric_all_nulls(self): + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + + col = pa.chunked_array([pa.array([None, None, None], type=pa.float64())]) + result = _dask_compute_numeric_metrics(col, histogram_bins=5) + + assert result["row_count"] == 3 + assert result["null_count"] == 3 + assert result["mean"] is None + assert result["histogram"] is None + + def test_numeric_empty(self): + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + + col = pa.chunked_array([pa.array([], type=pa.float64())]) + result = _dask_compute_numeric_metrics(col, histogram_bins=5) + + assert result["row_count"] == 0 + assert result["mean"] is None + + def test_numeric_single_value(self): + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + + col = pa.chunked_array([pa.array([42.0], type=pa.float64())]) + result = _dask_compute_numeric_metrics(col, histogram_bins=5) + + assert result["mean"] == pytest.approx(42.0) + assert result["min_val"] == 42.0 + assert result["max_val"] == 42.0 + assert result["stddev"] is None # STDDEV_SAMP of single value + + def test_numeric_large_dataset_percentiles(self): + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + + vals = list(range(1, 1001)) + col = pa.chunked_array([pa.array(vals, type=pa.float64())]) + result = _dask_compute_numeric_metrics(col, histogram_bins=10) + + assert result["mean"] == pytest.approx(500.5, abs=0.1) + assert result["p50"] == pytest.approx(500.5, abs=5.0) + assert result["p90"] == pytest.approx(900.0, abs=10.0) + assert result["p99"] == pytest.approx(990.0, abs=10.0) + result["feature_name"] = "test" + assert_histogram_correctness(result, "dask_large", expected_bins=10) + + def test_categorical_stats_basic(self): + from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics + + col = pa.chunked_array([pa.array(CATEGORICAL_VALUES, type=pa.string())]) + result = _dask_compute_categorical_metrics(col, top_n=10) + result["feature_name"] = "test" + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "dask_categorical") + + def test_categorical_with_nulls(self): + from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics + + vals = ["a", None, "b", None, "a", "c"] + col = pa.chunked_array([pa.array(vals, type=pa.string())]) + result = _dask_compute_categorical_metrics(col, top_n=10) + + assert result["row_count"] == 6 + assert result["null_count"] == 2 + assert result["null_rate"] == pytest.approx(1 / 3, abs=1e-4) + assert result["histogram"]["unique_count"] == 3 + + def test_categorical_top_n_truncation(self): + from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics + + col = pa.chunked_array([pa.array(CATEGORICAL_VALUES, type=pa.string())]) + result = _dask_compute_categorical_metrics(col, top_n=2) + + assert len(result["histogram"]["values"]) == 2 + assert result["histogram"]["other_count"] > 0 + total = ( + sum(e["count"] for e in result["histogram"]["values"]) + + result["histogram"]["other_count"] + ) + assert total == 10 + + def test_categorical_all_nulls(self): + from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics + + col = pa.chunked_array([pa.array([None, None], type=pa.string())]) + result = _dask_compute_categorical_metrics(col, top_n=10) + + assert result["null_count"] == 2 + assert result["histogram"] is None + + +# =================================================================== +# PostgreSQL compute correctness tests (requires live Postgres) +# =================================================================== + + +def _pg_available(): + try: + import psycopg # noqa: F401 + + return True + except ImportError: + return False + + +@pytest.mark.skipif(not _pg_available(), reason="psycopg not installed") +class TestPostgresComputeCorrectness: + """Test PostgreSQL SQL helpers produce correct metric values. + + Requires env vars: FEAST_PG_HOST, FEAST_PG_PORT, FEAST_PG_DB, + FEAST_PG_USER, FEAST_PG_PASS (or a local Postgres at localhost:5432). + """ + + @pytest.fixture(autouse=True) + def setup_pg(self): + import os + + import psycopg + + host = os.environ.get("FEAST_PG_HOST", "localhost") + port = os.environ.get("FEAST_PG_PORT", "5432") + db = os.environ.get("FEAST_PG_DB", "feast") + user = os.environ.get("FEAST_PG_USER", "feast") + password = os.environ.get("FEAST_PG_PASS", "feast") + + try: + self.conn = psycopg.connect( + f"host={host} port={port} dbname={db} user={user} password={password}", + autocommit=True, + ) + except psycopg.OperationalError: + pytest.skip("PostgreSQL not reachable") + + self.conn.execute("DROP TABLE IF EXISTS feast_test_monitoring_correctness") + self.conn.execute(""" + CREATE TABLE feast_test_monitoring_correctness ( + event_timestamp TIMESTAMPTZ, + numeric_col DOUBLE PRECISION, + numeric_with_nulls DOUBLE PRECISION, + categorical_col TEXT + ) + """) + + ts = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc) + for i in range(ROW_COUNT): + n_val = NUMERIC_VALUES[i] + n_null = NUMERIC_WITH_NULLS[i] + c_val = CATEGORICAL_VALUES[i] + self.conn.execute( + "INSERT INTO feast_test_monitoring_correctness VALUES (%s, %s, %s, %s)", + (ts, n_val, n_null, c_val), + ) + yield + self.conn.execute("DROP TABLE IF EXISTS feast_test_monitoring_correctness") + self.conn.close() + + def test_numeric_stats(self): + from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import ( + _sql_numeric_stats, + ) + + results = _sql_numeric_stats( + self.conn, + "feast_test_monitoring_correctness", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + expected = _expected_numeric_stats() + assert_numeric_correctness(results[0], expected, "pg_numeric") + assert_histogram_correctness(results[0], "pg_numeric", expected_bins=5) + + def test_numeric_stats_with_nulls(self): + from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import ( + _sql_numeric_stats, + ) + + results = _sql_numeric_stats( + self.conn, + "feast_test_monitoring_correctness", + ["numeric_with_nulls"], + "1=1", + histogram_bins=5, + ) + + expected = _expected_numeric_with_nulls_stats() + assert_numeric_correctness(results[0], expected, "pg_numeric_nulls") + + def test_numeric_multiple_features(self): + from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import ( + _sql_numeric_stats, + ) + + results = _sql_numeric_stats( + self.conn, + "feast_test_monitoring_correctness", + ["numeric_col", "numeric_with_nulls"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 2 + assert results[0]["mean"] == pytest.approx(5.5, abs=1e-4) + assert results[1]["mean"] == pytest.approx(5.0, abs=1e-4) + + def test_categorical_stats(self): + from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import ( + _sql_categorical_stats, + ) + + result = _sql_categorical_stats( + self.conn, + "feast_test_monitoring_correctness", + "categorical_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "pg_categorical") + + +# =================================================================== +# Snowflake compute correctness (mocked connection, real parsing) +# =================================================================== + + +def _snowflake_importable(): + try: + from feast.infra.offline_stores.snowflake import ( + _snowflake_sql_numeric_stats, # noqa: F401 + ) + + return True + except (ImportError, Exception): + return False + + +@pytest.mark.skipif(not _snowflake_importable(), reason="Snowflake deps not installed") +class TestSnowflakeComputeCorrectness: + """Tests Snowflake result parsing with mocked cursor. + + The cursor returns exactly the row format Snowflake would produce. + This validates column indexing, opt_float, null_count math, and + histogram assembly without needing a live Snowflake account. + """ + + def _make_mock_cursor(self, fetchone_val=None, fetchall_val=None): + from unittest.mock import MagicMock + + cursor = MagicMock() + cursor.fetchone.return_value = fetchone_val + cursor.fetchall.return_value = fetchall_val or [] + return cursor + + def test_numeric_stats(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.snowflake import ( + _snowflake_sql_numeric_stats, + ) + + vals = NUMERIC_VALUES + mean_v = statistics.mean(vals) + stddev_v = statistics.stdev(vals) + # row: COUNT(*), then per-feature: nn, avg, stddev, min, max, p50..p99 + stats_row = ( + 10, + 10, + mean_v, + stddev_v, + 1.0, + 10.0, + 5.5, + 7.75, + 9.1, + 9.55, + 9.91, + ) + stats_cursor = self._make_mock_cursor(fetchone_val=stats_row) + hist_row_data = [(1, 2), (2, 2), (3, 2), (4, 2), (5, 2)] + hist_cursor = self._make_mock_cursor(fetchall_val=hist_row_data) + + call_count = [0] + + def mock_execute(conn, query): + call_count[0] += 1 + return stats_cursor if call_count[0] == 1 else hist_cursor + + with patch( + "feast.infra.offline_stores.snowflake.execute_snowflake_statement", + side_effect=mock_execute, + ): + results = _snowflake_sql_numeric_stats( + MagicMock(), + "test_table", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + r = results[0] + expected = _expected_numeric_stats() + assert_numeric_correctness(r, expected, "snowflake_numeric") + assert r["histogram"] is not None + assert sum(r["histogram"]["counts"]) == 10 + + def test_numeric_stats_with_nulls(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.snowflake import ( + _snowflake_sql_numeric_stats, + ) + + vals = NON_NULL_VALUES + stats_row = ( + 10, + 5, + statistics.mean(vals), + statistics.stdev(vals), + 1.0, + 9.0, + 5.0, + 7.0, + 8.6, + 8.8, + 8.96, + ) + stats_cursor = self._make_mock_cursor(fetchone_val=stats_row) + hist_cursor = self._make_mock_cursor( + fetchall_val=[(1, 1), (2, 1), (3, 1), (4, 1), (5, 1)] + ) + + call_count = [0] + + def mock_execute(conn, query): + call_count[0] += 1 + return stats_cursor if call_count[0] == 1 else hist_cursor + + with patch( + "feast.infra.offline_stores.snowflake.execute_snowflake_statement", + side_effect=mock_execute, + ): + results = _snowflake_sql_numeric_stats( + MagicMock(), + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + r = results[0] + assert r["null_count"] == 5 + assert r["null_rate"] == pytest.approx(0.5) + assert r["mean"] == pytest.approx(5.0, abs=1e-4) + + def test_categorical_stats(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.snowflake import ( + _snowflake_sql_categorical_stats, + ) + + rows = [ + (10, 0, 4, "a", 4), + (10, 0, 4, "b", 3), + (10, 0, 4, "c", 2), + (10, 0, 4, "d", 1), + ] + cursor = self._make_mock_cursor(fetchall_val=rows) + + with patch( + "feast.infra.offline_stores.snowflake.execute_snowflake_statement", + return_value=cursor, + ): + result = _snowflake_sql_categorical_stats( + MagicMock(), + "t", + "cat_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "snowflake_categorical") + + def test_empty_result(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.snowflake import ( + _snowflake_sql_numeric_stats, + ) + + cursor = self._make_mock_cursor(fetchone_val=None) + with patch( + "feast.infra.offline_stores.snowflake.execute_snowflake_statement", + return_value=cursor, + ): + results = _snowflake_sql_numeric_stats( + MagicMock(), + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + assert results[0]["mean"] is None + assert results[0]["row_count"] == 0 + + +# =================================================================== +# BigQuery compute correctness (mocked client, real parsing) +# =================================================================== + + +class TestBigQueryComputeCorrectness: + """Tests BigQuery result parsing with mocked client. + + BigQuery results use dict-like row access (row["column_name"]). + """ + + def _make_mock_bq_row(self, data: dict): + """Create an object supporting both dict-key and index access.""" + + class BQRow: + def __init__(self, d): + self._data = d + self._keys = list(d.keys()) + + def __getitem__(self, key): + if isinstance(key, int): + return self._data[self._keys[key]] + return self._data[key] + + return BQRow(data) + + def _make_mock_job(self, rows): + from unittest.mock import MagicMock + + job = MagicMock() + job.result.return_value = None + job.__iter__ = lambda self_: iter(rows) + return job + + def test_numeric_stats(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.bigquery import _bq_numeric_stats + + vals = NUMERIC_VALUES + row_data = { + "_row_count": 10, + "c0_nn": 10, + "c0_avg": statistics.mean(vals), + "c0_stddev": statistics.stdev(vals), + "c0_min": 1.0, + "c0_max": 10.0, + "c0_p50": 5.5, + "c0_p75": 7.75, + "c0_p90": 9.1, + "c0_p95": 9.55, + "c0_p99": 9.91, + } + stats_row = self._make_mock_bq_row(row_data) + stats_job = self._make_mock_job([stats_row]) + + hist_rows = [ + self._make_mock_bq_row({"bucket": i + 1, "cnt": 2}) for i in range(5) + ] + hist_job = self._make_mock_job(hist_rows) + + call_count = [0] + + def mock_query(sql, *args, **kwargs): + call_count[0] += 1 + return stats_job if call_count[0] == 1 else hist_job + + mock_config = MagicMock() + mock_config.offline_store.billing_project_id = "proj" + mock_config.offline_store.project_id = "proj" + mock_config.offline_store.location = "US" + + with patch( + "feast.infra.offline_stores.bigquery._get_bigquery_client" + ) as mock_client: + mock_client.return_value.query = mock_query + results = _bq_numeric_stats( + mock_config, + "test_table", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + r = results[0] + expected = _expected_numeric_stats() + assert_numeric_correctness(r, expected, "bq_numeric") + assert r["histogram"] is not None + assert sum(r["histogram"]["counts"]) == 10 + + def test_numeric_stats_with_nulls(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.bigquery import _bq_numeric_stats + + vals = NON_NULL_VALUES + row_data = { + "_row_count": 10, + "c0_nn": 5, + "c0_avg": statistics.mean(vals), + "c0_stddev": statistics.stdev(vals), + "c0_min": 1.0, + "c0_max": 9.0, + "c0_p50": 5.0, + "c0_p75": 7.0, + "c0_p90": 8.6, + "c0_p95": 8.8, + "c0_p99": 8.96, + } + stats_row = self._make_mock_bq_row(row_data) + stats_job = self._make_mock_job([stats_row]) + hist_job = self._make_mock_job( + [self._make_mock_bq_row({"bucket": i + 1, "cnt": 1}) for i in range(5)] + ) + + call_count = [0] + + def mock_query(sql, *args, **kwargs): + call_count[0] += 1 + return stats_job if call_count[0] == 1 else hist_job + + mock_config = MagicMock() + mock_config.offline_store.billing_project_id = "proj" + mock_config.offline_store.project_id = "proj" + mock_config.offline_store.location = "US" + + with patch( + "feast.infra.offline_stores.bigquery._get_bigquery_client" + ) as mock_client: + mock_client.return_value.query = mock_query + results = _bq_numeric_stats( + mock_config, + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + r = results[0] + assert r["null_count"] == 5 + assert r["null_rate"] == pytest.approx(0.5) + assert r["mean"] == pytest.approx(5.0, abs=1e-4) + + def test_categorical_stats(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.bigquery import _bq_categorical_stats + + rows = [ + self._make_mock_bq_row( + { + "row_count": 10, + "null_count": 0, + "unique_count": 4, + "value": "a", + "cnt": 4, + } + ), + self._make_mock_bq_row( + { + "row_count": 10, + "null_count": 0, + "unique_count": 4, + "value": "b", + "cnt": 3, + } + ), + self._make_mock_bq_row( + { + "row_count": 10, + "null_count": 0, + "unique_count": 4, + "value": "c", + "cnt": 2, + } + ), + self._make_mock_bq_row( + { + "row_count": 10, + "null_count": 0, + "unique_count": 4, + "value": "d", + "cnt": 1, + } + ), + ] + job = self._make_mock_job(rows) + + mock_config = MagicMock() + mock_config.offline_store.billing_project_id = "proj" + mock_config.offline_store.project_id = "proj" + mock_config.offline_store.location = "US" + + with patch( + "feast.infra.offline_stores.bigquery._get_bigquery_client" + ) as mock_client: + mock_client.return_value.query.return_value = job + result = _bq_categorical_stats( + mock_config, + "t", + "cat_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "bq_categorical") + + def test_multiple_features(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.bigquery import _bq_numeric_stats + + row_data = { + "_row_count": 10, + "c0_nn": 10, + "c0_avg": 5.5, + "c0_stddev": 3.03, + "c0_min": 1.0, + "c0_max": 10.0, + "c0_p50": 5.5, + "c0_p75": 7.75, + "c0_p90": 9.1, + "c0_p95": 9.55, + "c0_p99": 9.91, + "c1_nn": 5, + "c1_avg": 5.0, + "c1_stddev": 3.16, + "c1_min": 1.0, + "c1_max": 9.0, + "c1_p50": 5.0, + "c1_p75": 7.0, + "c1_p90": 8.6, + "c1_p95": 8.8, + "c1_p99": 8.96, + } + stats_job = self._make_mock_job([self._make_mock_bq_row(row_data)]) + hist_job = self._make_mock_job( + [self._make_mock_bq_row({"bucket": i + 1, "cnt": 2}) for i in range(5)] + ) + + call_count = [0] + + def mock_query(sql, *args, **kwargs): + call_count[0] += 1 + return stats_job if call_count[0] == 1 else hist_job + + mock_config = MagicMock() + mock_config.offline_store.billing_project_id = "p" + mock_config.offline_store.project_id = "p" + mock_config.offline_store.location = "US" + + with patch( + "feast.infra.offline_stores.bigquery._get_bigquery_client" + ) as mock_client: + mock_client.return_value.query = mock_query + results = _bq_numeric_stats( + mock_config, + "t", + ["col_a", "col_b"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 2 + assert results[0]["mean"] == pytest.approx(5.5, abs=1e-2) + assert results[1]["mean"] == pytest.approx(5.0, abs=1e-2) + assert results[0]["null_count"] == 0 + assert results[1]["null_count"] == 5 + + +# =================================================================== +# Redshift compute correctness (mocked Data API, real parsing) +# =================================================================== + + +class TestRedshiftComputeCorrectness: + """Tests Redshift result parsing with mocked _redshift_execute_fetch_rows. + + Redshift Data API returns rows as lists of field dicts, e.g. + [{"longValue": 10}, {"doubleValue": 5.5}, ...]. + """ + + def _long(self, v): + return {"longValue": v} + + def _double(self, v): + return {"doubleValue": v} + + def _string(self, v): + return {"stringValue": v} + + def _null(self): + return {"isNull": True} + + def test_numeric_stats(self): + from unittest.mock import patch + + from feast.infra.offline_stores.redshift import ( + _redshift_sql_numeric_stats, + ) + + vals = NUMERIC_VALUES + row = [ + self._long(10), # COUNT(*) + self._long(10), # COUNT(col) + self._double(statistics.mean(vals)), # AVG + self._double(statistics.stdev(vals)), # STDDEV_SAMP + self._double(1.0), # MIN + self._double(10.0), # MAX + self._double(5.5), # p50 + self._double(7.75), # p75 + self._double(9.1), # p90 + self._double(9.55), # p95 + self._double(9.91), # p99 + ] + hist_rows = [[self._long(i + 1), self._long(2)] for i in range(5)] + + call_count = [0] + + def mock_fetch(config, sql): + call_count[0] += 1 + return [row] if call_count[0] == 1 else hist_rows + + with patch( + "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows", + side_effect=mock_fetch, + ): + from unittest.mock import MagicMock + + results = _redshift_sql_numeric_stats( + MagicMock(), + "test_table", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + r = results[0] + expected = _expected_numeric_stats() + assert_numeric_correctness(r, expected, "redshift_numeric") + assert r["histogram"] is not None + assert sum(r["histogram"]["counts"]) == 10 + + def test_numeric_stats_with_nulls(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.redshift import ( + _redshift_sql_numeric_stats, + ) + + vals = NON_NULL_VALUES + row = [ + self._long(10), + self._long(5), + self._double(statistics.mean(vals)), + self._double(statistics.stdev(vals)), + self._double(1.0), + self._double(9.0), + self._double(5.0), + self._double(7.0), + self._double(8.6), + self._double(8.8), + self._double(8.96), + ] + hist_rows = [[self._long(i + 1), self._long(1)] for i in range(5)] + + call_count = [0] + + def mock_fetch(config, sql): + call_count[0] += 1 + return [row] if call_count[0] == 1 else hist_rows + + with patch( + "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows", + side_effect=mock_fetch, + ): + results = _redshift_sql_numeric_stats( + MagicMock(), + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + r = results[0] + assert r["null_count"] == 5 + assert r["null_rate"] == pytest.approx(0.5) + assert r["mean"] == pytest.approx(5.0, abs=1e-4) + + def test_categorical_stats(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.redshift import ( + _redshift_sql_categorical_stats, + ) + + rows = [ + [ + self._long(10), + self._long(0), + self._long(4), + self._string("a"), + self._long(4), + ], + [ + self._long(10), + self._long(0), + self._long(4), + self._string("b"), + self._long(3), + ], + [ + self._long(10), + self._long(0), + self._long(4), + self._string("c"), + self._long(2), + ], + [ + self._long(10), + self._long(0), + self._long(4), + self._string("d"), + self._long(1), + ], + ] + + with patch( + "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows", + return_value=rows, + ): + result = _redshift_sql_categorical_stats( + MagicMock(), + "t", + "cat_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "redshift_categorical") + + def test_empty_result(self): + from unittest.mock import MagicMock, patch + + from feast.infra.offline_stores.redshift import ( + _redshift_sql_numeric_stats, + ) + + with patch( + "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows", + return_value=[], + ): + results = _redshift_sql_numeric_stats( + MagicMock(), + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + assert results[0]["mean"] is None + assert results[0]["row_count"] == 0 + + +# =================================================================== +# Spark compute correctness tests (requires SparkSession) +# =================================================================== + + +def _spark_available(): + try: + from pyspark.sql import SparkSession # noqa: F401 + + return True + except ImportError: + return False + + +@pytest.mark.skipif(not _spark_available(), reason="PySpark not installed") +class TestSparkComputeCorrectness: + """Test Spark SQL helpers produce correct metric values. + + Uses a local SparkSession — no external cluster required. + """ + + @pytest.fixture(autouse=True) + def setup_spark(self): + from pyspark.sql import SparkSession + from pyspark.sql.types import ( + DoubleType, + StringType, + StructField, + StructType, + TimestampType, + ) + + try: + self.spark = ( + SparkSession.builder.master("local[1]") + .appName("feast_monitoring_test") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .getOrCreate() + ) + except Exception as e: + pytest.skip(f"SparkSession unavailable: {e}") + + schema = StructType( + [ + StructField("event_timestamp", TimestampType(), False), + StructField("numeric_col", DoubleType(), True), + StructField("numeric_with_nulls", DoubleType(), True), + StructField("categorical_col", StringType(), True), + ] + ) + + ts = datetime(2025, 1, 15, 12, 0, 0) + rows = [ + (ts, NUMERIC_VALUES[i], NUMERIC_WITH_NULLS[i], CATEGORICAL_VALUES[i]) + for i in range(ROW_COUNT) + ] + df = self.spark.createDataFrame(rows, schema) + df.createOrReplaceTempView("feast_test_monitoring") + + yield + self.spark.sql("DROP VIEW IF EXISTS feast_test_monitoring") + + def test_numeric_stats(self): + from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + _spark_sql_numeric_stats, + ) + + results = _spark_sql_numeric_stats( + self.spark, + "feast_test_monitoring", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + expected = _expected_numeric_stats() + assert_numeric_correctness( + results[0], + expected, + "spark_numeric", + approx_percentiles=True, + ) + assert results[0]["histogram"] is not None + assert sum(results[0]["histogram"]["counts"]) == 10 + + def test_numeric_stats_with_nulls(self): + from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + _spark_sql_numeric_stats, + ) + + results = _spark_sql_numeric_stats( + self.spark, + "feast_test_monitoring", + ["numeric_with_nulls"], + "1=1", + histogram_bins=5, + ) + + expected = _expected_numeric_with_nulls_stats() + assert_numeric_correctness( + results[0], + expected, + "spark_numeric_nulls", + approx_percentiles=True, + ) + + def test_categorical_stats(self): + from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + _spark_sql_categorical_stats, + ) + + result = _spark_sql_categorical_stats( + self.spark, + "feast_test_monitoring", + "categorical_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "spark_categorical") + + def test_numeric_multiple_features(self): + from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + _spark_sql_numeric_stats, + ) + + results = _spark_sql_numeric_stats( + self.spark, + "feast_test_monitoring", + ["numeric_col", "numeric_with_nulls"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 2 + assert results[0]["mean"] == pytest.approx(5.5, abs=1e-4) + assert results[1]["mean"] == pytest.approx(5.0, abs=1e-4) + + def test_categorical_top_n_truncation(self): + from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + _spark_sql_categorical_stats, + ) + + result = _spark_sql_categorical_stats( + self.spark, + "feast_test_monitoring", + "categorical_col", + "1=1", + top_n=2, + ) + + assert len(result["histogram"]["values"]) == 2 + assert result["histogram"]["other_count"] > 0 + total = ( + sum(e["count"] for e in result["histogram"]["values"]) + + result["histogram"]["other_count"] + ) + assert total == 10 + + +# =================================================================== +# Oracle compute correctness (mocked Ibis connection, real parsing) +# =================================================================== + + +def _oracle_importable(): + try: + from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import ( + _oracle_numeric_stats, # noqa: F401 + ) + + return True + except ImportError: + return False + + +@pytest.mark.skipif(not _oracle_importable(), reason="Oracle deps not installed") +class TestOracleComputeCorrectness: + """Tests Oracle result parsing with mocked Ibis connection. + + _oracle_fetchall returns list of tuples (positional indexing). + """ + + def test_numeric_stats(self): + from unittest.mock import patch + + from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import ( + _oracle_numeric_stats, + ) + + vals = NUMERIC_VALUES + row = ( + 10, + 10, + statistics.mean(vals), + statistics.stdev(vals), + 1.0, + 10.0, + 5.5, + 7.75, + 9.1, + 9.55, + 9.91, + ) + hist_rows = [(i + 1, 2) for i in range(5)] + + call_count = [0] + + def mock_fetchall(con, sql): + call_count[0] += 1 + return [row] if call_count[0] == 1 else hist_rows + + with patch( + "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall", + side_effect=mock_fetchall, + ): + results = _oracle_numeric_stats( + None, + "test_table", + ["numeric_col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + r = results[0] + expected = _expected_numeric_stats() + assert_numeric_correctness(r, expected, "oracle_numeric") + assert r["histogram"] is not None + assert sum(r["histogram"]["counts"]) == 10 + + def test_numeric_stats_with_nulls(self): + from unittest.mock import patch + + from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import ( + _oracle_numeric_stats, + ) + + vals = NON_NULL_VALUES + row = ( + 10, + 5, + statistics.mean(vals), + statistics.stdev(vals), + 1.0, + 9.0, + 5.0, + 7.0, + 8.6, + 8.8, + 8.96, + ) + hist_rows = [(i + 1, 1) for i in range(5)] + + call_count = [0] + + def mock_fetchall(con, sql): + call_count[0] += 1 + return [row] if call_count[0] == 1 else hist_rows + + with patch( + "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall", + side_effect=mock_fetchall, + ): + results = _oracle_numeric_stats( + None, + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + r = results[0] + assert r["null_count"] == 5 + assert r["null_rate"] == pytest.approx(0.5) + assert r["mean"] == pytest.approx(5.0, abs=1e-4) + + def test_categorical_stats(self): + from unittest.mock import patch + + from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import ( + _oracle_categorical_stats, + ) + + rows = [ + (10, 0, 4, "a", 4), + (10, 0, 4, "b", 3), + (10, 0, 4, "c", 2), + (10, 0, 4, "d", 1), + ] + + with patch( + "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall", + return_value=rows, + ): + result = _oracle_categorical_stats( + None, + "t", + "cat_col", + "1=1", + top_n=10, + ) + + expected = _expected_categorical_stats() + assert_categorical_correctness(result, expected, "oracle_categorical") + + def test_empty_result(self): + from unittest.mock import patch + + from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import ( + _oracle_numeric_stats, + ) + + with patch( + "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall", + return_value=[None], + ): + results = _oracle_numeric_stats( + None, + "t", + ["col"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 1 + assert results[0]["mean"] is None + assert results[0]["row_count"] == 0 + + def test_multiple_features(self): + from unittest.mock import patch + + from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import ( + _oracle_numeric_stats, + ) + + row = ( + 10, + # Feature 0: numeric_col + 10, + 5.5, + 3.03, + 1.0, + 10.0, + 5.5, + 7.75, + 9.1, + 9.55, + 9.91, + # Feature 1: numeric_with_nulls + 5, + 5.0, + 3.16, + 1.0, + 9.0, + 5.0, + 7.0, + 8.6, + 8.8, + 8.96, + ) + hist_rows = [(i + 1, 2) for i in range(5)] + + call_count = [0] + + def mock_fetchall(con, sql): + call_count[0] += 1 + return [row] if call_count[0] == 1 else hist_rows + + with patch( + "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall", + side_effect=mock_fetchall, + ): + results = _oracle_numeric_stats( + None, + "t", + ["col_a", "col_b"], + "1=1", + histogram_bins=5, + ) + + assert len(results) == 2 + assert results[0]["mean"] == pytest.approx(5.5, abs=1e-2) + assert results[1]["mean"] == pytest.approx(5.0, abs=1e-2) + assert results[0]["null_count"] == 0 + assert results[1]["null_count"] == 5 + + +# =================================================================== +# Cross-backend consistency: MetricsCalculator vs DuckDB vs Dask +# =================================================================== + + +class TestCrossBackendConsistency: + """Verify that DuckDB, Dask, and MetricsCalculator produce + consistent results for the same dataset.""" + + def test_numeric_mean_matches_across_backends(self): + duckdb = pytest.importorskip("duckdb") + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + from feast.monitoring.metrics_calculator import MetricsCalculator + + calc = MetricsCalculator(histogram_bins=5, top_n=10) + arr = pa.array(NUMERIC_VALUES, type=pa.float64()) + pyarrow_result = calc.compute_numeric(arr) + + col = pa.chunked_array([arr]) + dask_result = _dask_compute_numeric_metrics(col, histogram_bins=5) + + conn = duckdb.connect() + conn.execute("CREATE TABLE consistency_test (v DOUBLE)") + for v in NUMERIC_VALUES: + conn.execute(f"INSERT INTO consistency_test VALUES ({v})") + + duckdb_results = _duckdb_numeric_stats( + conn, + "consistency_test", + ["v"], + "1=1", + histogram_bins=5, + ) + conn.close() + + duckdb_result = duckdb_results[0] + + assert pyarrow_result["mean"] == pytest.approx(dask_result["mean"], abs=1e-6) + assert pyarrow_result["mean"] == pytest.approx(duckdb_result["mean"], abs=1e-6) + assert dask_result["mean"] == pytest.approx(duckdb_result["mean"], abs=1e-6) + + assert pyarrow_result["stddev"] == pytest.approx( + dask_result["stddev"], abs=0.01 + ) + assert pyarrow_result["stddev"] == pytest.approx( + duckdb_result["stddev"], abs=0.01 + ) + + assert pyarrow_result["min_val"] == dask_result["min_val"] + assert pyarrow_result["min_val"] == duckdb_result["min_val"] + assert pyarrow_result["max_val"] == dask_result["max_val"] + assert pyarrow_result["max_val"] == duckdb_result["max_val"] + + def test_categorical_unique_count_matches(self): + duckdb = pytest.importorskip("duckdb") + from feast.infra.offline_stores.dask import ( + _dask_compute_categorical_metrics, + ) + from feast.infra.offline_stores.duckdb import _duckdb_categorical_stats + from feast.monitoring.metrics_calculator import MetricsCalculator + + calc = MetricsCalculator(histogram_bins=5, top_n=10) + arr = pa.array(CATEGORICAL_VALUES, type=pa.string()) + pyarrow_result = calc.compute_categorical(arr) + + col = pa.chunked_array([arr]) + dask_result = _dask_compute_categorical_metrics(col, top_n=10) + + conn = duckdb.connect() + conn.execute("CREATE TABLE cat_consistency (v VARCHAR)") + for v in CATEGORICAL_VALUES: + conn.execute(f"INSERT INTO cat_consistency VALUES ('{v}')") + + duckdb_result = _duckdb_categorical_stats( + conn, + "cat_consistency", + "v", + "1=1", + top_n=10, + ) + conn.close() + + assert ( + pyarrow_result["histogram"]["unique_count"] + == dask_result["histogram"]["unique_count"] + == duckdb_result["histogram"]["unique_count"] + == 4 + ) + + pyarrow_top = pyarrow_result["histogram"]["values"][0] + dask_top = dask_result["histogram"]["values"][0] + duckdb_top = duckdb_result["histogram"]["values"][0] + assert pyarrow_top["value"] == dask_top["value"] == duckdb_top["value"] == "a" + assert pyarrow_top["count"] == dask_top["count"] == duckdb_top["count"] == 4 + + def test_null_rate_matches_across_backends(self): + duckdb = pytest.importorskip("duckdb") + from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics + from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats + from feast.monitoring.metrics_calculator import MetricsCalculator + + calc = MetricsCalculator(histogram_bins=5, top_n=10) + arr = pa.array(NUMERIC_WITH_NULLS, type=pa.float64()) + pyarrow_result = calc.compute_numeric(arr) + + col = pa.chunked_array([arr]) + dask_result = _dask_compute_numeric_metrics(col, histogram_bins=5) + + conn = duckdb.connect() + conn.execute("CREATE TABLE null_consistency (v DOUBLE)") + for v in NUMERIC_WITH_NULLS: + val = f"{v}" if v is not None else "NULL" + conn.execute(f"INSERT INTO null_consistency VALUES ({val})") + + duckdb_results = _duckdb_numeric_stats( + conn, + "null_consistency", + ["v"], + "1=1", + histogram_bins=5, + ) + conn.close() + + assert pyarrow_result["null_rate"] == pytest.approx(0.5, abs=1e-6) + assert dask_result["null_rate"] == pytest.approx(0.5, abs=1e-6) + assert duckdb_results[0]["null_rate"] == pytest.approx(0.5, abs=1e-6) diff --git a/sdk/python/tests/unit/monitoring/test_metrics_calculator.py b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py new file mode 100644 index 00000000000..8124531d765 --- /dev/null +++ b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py @@ -0,0 +1,289 @@ +import json +import math + +import pyarrow as pa +import pytest + +from feast.monitoring.metrics_calculator import MetricsCalculator +from feast.monitoring.monitoring_utils import opt_float +from feast.types import PrimitiveFeastType + + +def _make_calc(bins=20, top_n=10): + return MetricsCalculator(histogram_bins=bins, top_n=top_n) + + +class TestClassifyFeature: + @pytest.mark.parametrize( + "dtype, expected", + [ + (PrimitiveFeastType.INT32, "numeric"), + (PrimitiveFeastType.INT64, "numeric"), + (PrimitiveFeastType.FLOAT32, "numeric"), + (PrimitiveFeastType.FLOAT64, "numeric"), + (PrimitiveFeastType.STRING, "categorical"), + (PrimitiveFeastType.BOOL, "categorical"), + (PrimitiveFeastType.BYTES, None), + (PrimitiveFeastType.UNIX_TIMESTAMP, None), + ], + ) + def test_classification(self, dtype, expected): + assert MetricsCalculator.classify_feature(dtype) == expected + + +class TestComputeNumeric: + def test_basic_stats(self): + calc = _make_calc() + arr = pa.array([1.0, 2.0, 3.0, 4.0, 5.0]) + result = calc.compute_numeric(arr) + + assert result["feature_type"] == "numeric" + assert result["row_count"] == 5 + assert result["null_count"] == 0 + assert result["null_rate"] == 0.0 + assert result["mean"] == pytest.approx(3.0) + assert result["min_val"] == 1.0 + assert result["max_val"] == 5.0 + assert result["p50"] is not None + assert result["histogram"] is not None + assert "bins" in result["histogram"] + assert "counts" in result["histogram"] + + def test_with_nulls(self): + calc = _make_calc() + arr = pa.array([1.0, None, 3.0, None, 5.0]) + result = calc.compute_numeric(arr) + + assert result["row_count"] == 5 + assert result["null_count"] == 2 + assert result["null_rate"] == pytest.approx(0.4) + assert result["mean"] == pytest.approx(3.0) + + def test_all_nulls(self): + calc = _make_calc() + arr = pa.array([None, None, None], type=pa.float64()) + result = calc.compute_numeric(arr) + + assert result["null_count"] == 3 + assert result["mean"] is None + assert result["histogram"] is None + + def test_empty_array(self): + calc = _make_calc() + arr = pa.array([], type=pa.float64()) + result = calc.compute_numeric(arr) + + assert result["row_count"] == 0 + assert result["null_rate"] == 0.0 + + def test_single_value(self): + calc = _make_calc() + arr = pa.array([42.0]) + result = calc.compute_numeric(arr) + + assert result["mean"] == 42.0 + assert result["min_val"] == 42.0 + assert result["max_val"] == 42.0 + assert result["stddev"] is None # STDDEV_SAMP of 1 value is NaN → None + + def test_histogram_bin_count(self): + calc = _make_calc(bins=5) + arr = pa.array(list(range(100)), type=pa.float64()) + result = calc.compute_numeric(arr) + + assert len(result["histogram"]["counts"]) == 5 + assert len(result["histogram"]["bins"]) == 6 + + def test_percentiles_order(self): + calc = _make_calc() + arr = pa.array(list(range(1000)), type=pa.float64()) + result = calc.compute_numeric(arr) + + assert result["p50"] <= result["p75"] + assert result["p75"] <= result["p90"] + assert result["p90"] <= result["p95"] + assert result["p95"] <= result["p99"] + + +class TestComputeCategorical: + def test_basic(self): + calc = _make_calc() + arr = pa.array(["a", "b", "a", "c", "a", "b"]) + result = calc.compute_categorical(arr) + + assert result["feature_type"] == "categorical" + assert result["row_count"] == 6 + assert result["null_count"] == 0 + assert result["histogram"] is not None + assert result["histogram"]["unique_count"] == 3 + + top_values = {v["value"] for v in result["histogram"]["values"]} + assert "a" in top_values + + def test_with_nulls(self): + calc = _make_calc() + arr = pa.array(["a", None, "b", None]) + result = calc.compute_categorical(arr) + + assert result["null_count"] == 2 + assert result["null_rate"] == 0.5 + + def test_high_cardinality(self): + calc = _make_calc(top_n=3) + arr = pa.array([f"val_{i}" for i in range(100)]) + result = calc.compute_categorical(arr) + + assert len(result["histogram"]["values"]) == 3 + assert result["histogram"]["unique_count"] == 100 + assert result["histogram"]["other_count"] == 97 + + def test_all_nulls(self): + calc = _make_calc() + arr = pa.array([None, None], type=pa.string()) + result = calc.compute_categorical(arr) + + assert result["null_count"] == 2 + assert result["histogram"] is None + + +class TestComputeAll: + def test_mixed_features(self): + calc = _make_calc() + table = pa.table( + { + "age": [25, 30, 35, 40], + "city": ["NYC", "LA", "NYC", "SF"], + } + ) + fields = [("age", "numeric"), ("city", "categorical")] + results = calc.compute_all(table, fields) + + assert len(results) == 2 + assert results[0]["feature_name"] == "age" + assert results[0]["feature_type"] == "numeric" + assert results[1]["feature_name"] == "city" + assert results[1]["feature_type"] == "categorical" + + def test_missing_column_skipped(self): + calc = _make_calc() + table = pa.table({"age": [25, 30]}) + fields = [("age", "numeric"), ("missing_col", "numeric")] + results = calc.compute_all(table, fields) + + assert len(results) == 1 + assert results[0]["feature_name"] == "age" + + +class TestNaNSanitization: + """Verify that NaN/Inf values never leak into metric results.""" + + def test_opt_float_none(self): + assert opt_float(None) is None + + def test_opt_float_normal(self): + assert opt_float(3.14) == pytest.approx(3.14) + + def test_opt_float_nan(self): + assert opt_float(float("nan")) is None + + def test_opt_float_inf(self): + assert opt_float(float("inf")) is None + + def test_opt_float_neg_inf(self): + assert opt_float(float("-inf")) is None + + def test_opt_float_zero(self): + assert opt_float(0) == 0.0 + + def test_opt_float_integer(self): + assert opt_float(42) == 42.0 + + def test_single_value_stddev_is_none_not_nan(self): + """pc.stddev(ddof=1) on a single value returns NaN; we must convert to None.""" + calc = _make_calc() + arr = pa.array([7.0]) + result = calc.compute_numeric(arr) + + assert result["stddev"] is None + assert result["mean"] == pytest.approx(7.0) + + def test_two_values_stddev_is_valid(self): + calc = _make_calc() + arr = pa.array([4.0, 6.0]) + result = calc.compute_numeric(arr) + + assert result["stddev"] is not None + assert result["stddev"] == pytest.approx(math.sqrt(2.0)) + + def test_all_numeric_results_json_serializable(self): + """Every field in a numeric result must be JSON-serializable (no NaN/Inf).""" + calc = _make_calc(bins=5) + for test_data in [ + [42.0], # single value + [1.0, 2.0], # two values + [1.0, None, 3.0], # with nulls + list(range(100)), # many values + ]: + arr = pa.array(test_data, type=pa.float64()) + result = calc.compute_numeric(arr) + json.dumps(result) # raises ValueError if NaN/Inf present + + def test_all_categorical_results_json_serializable(self): + calc = _make_calc() + for test_data in [ + ["a", "b", "a"], + ["x", None, "y"], + [None, None], + ]: + arr = pa.array(test_data, type=pa.string()) + result = calc.compute_categorical(arr) + json.dumps(result) + + def test_sanitize_floats_cleans_nan(self): + from feast.monitoring.monitoring_service import _sanitize_floats + + row = { + "feature_name": "test", + "mean": float("nan"), + "stddev": float("inf"), + "null_rate": float("-inf"), + "min_val": 1.0, + "max_val": 10.0, + "p50": 5.0, + "p75": None, + "row_count": 100, + } + result = _sanitize_floats(row) + + assert result["mean"] is None + assert result["stddev"] is None + assert result["null_rate"] is None + assert result["min_val"] == 1.0 + assert result["max_val"] == 10.0 + assert result["p50"] == 5.0 + assert result["p75"] is None + assert result["row_count"] == 100 # non-float fields untouched + assert result["feature_name"] == "test" + json.dumps(result) + + def test_sanitize_floats_preserves_valid_values(self): + from feast.monitoring.monitoring_service import _sanitize_floats + + row = { + "mean": 5.5, + "stddev": 2.3, + "null_rate": 0.0, + "min_val": 0.0, + "max_val": 10.0, + "p50": 5.0, + "p75": 7.5, + "p90": 9.0, + "p95": 9.5, + "p99": 9.9, + "avg_null_rate": 0.05, + "max_null_rate": 0.1, + } + result = _sanitize_floats(row) + + for key, val in row.items(): + assert result[key] == val diff --git a/sdk/python/tests/unit/test_metrics.py b/sdk/python/tests/unit/test_metrics.py index bffde73dd91..abf2a35e389 100644 --- a/sdk/python/tests/unit/test_metrics.py +++ b/sdk/python/tests/unit/test_metrics.py @@ -18,9 +18,14 @@ import pytest from feast.metrics import ( + emit_offline_audit_log, + emit_online_audit_log, feature_freshness_seconds, materialization_duration_seconds, materialization_result_total, + offline_store_request_latency_seconds, + offline_store_request_total, + offline_store_row_count, online_features_entity_count, online_features_request_count, online_features_status_total, @@ -42,13 +47,11 @@ ) -@pytest.fixture(autouse=True) -def _enable_metrics(): - """Enable all metric categories for each test, then restore.""" +def _all_enabled_flags(): + """Return a _MetricsFlags with every category enabled.""" import feast.metrics as m - original = m._config - m._config = m._MetricsFlags( + return m._MetricsFlags( enabled=True, resource=True, request=True, @@ -56,7 +59,18 @@ def _enable_metrics(): push=True, materialization=True, freshness=True, + offline_features=True, + audit_logging=True, ) + + +@pytest.fixture(autouse=True) +def _enable_metrics(): + """Enable all metric categories for each test, then restore.""" + import feast.metrics as m + + original = m._config + m._config = _all_enabled_flags() yield m._config = original @@ -1081,3 +1095,640 @@ def test_separate_from_read_transform_metric(self): assert abs(read_delta - 0.01) < 0.001 assert abs(write_delta - 0.05) < 0.001 + + +class TestOfflineStoreMetrics: + """Tests for the offline store Prometheus metrics (RED pattern).""" + + def test_request_total_increments_on_success(self): + before = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + + offline_store_request_total.labels(method="to_arrow", status="success").inc() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before + 1 + ) + + def test_request_total_increments_on_error(self): + before = offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + + offline_store_request_total.labels(method="to_arrow", status="error").inc() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + == before + 1 + ) + + def test_latency_histogram_records(self): + before_sum = offline_store_request_latency_seconds.labels( + method="to_arrow" + )._sum.get() + + offline_store_request_latency_seconds.labels(method="to_arrow").observe(2.5) + + after_sum = offline_store_request_latency_seconds.labels( + method="to_arrow" + )._sum.get() + assert pytest.approx(after_sum - before_sum, abs=0.01) == 2.5 + + def test_row_count_histogram_records(self): + before_sum = offline_store_row_count.labels(method="to_arrow")._sum.get() + + offline_store_row_count.labels(method="to_arrow").observe(1000) + + after_sum = offline_store_row_count.labels(method="to_arrow")._sum.get() + assert pytest.approx(after_sum - before_sum, abs=1) == 1000 + + def test_different_methods_tracked_independently(self): + before_a = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + before_b = offline_store_request_total.labels( + method="other", status="success" + )._value.get() + + offline_store_request_total.labels(method="to_arrow", status="success").inc() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before_a + 1 + ) + assert ( + offline_store_request_total.labels( + method="other", status="success" + )._value.get() + == before_b + ) + + +class TestEmitAuditLogs: + """Tests for structured JSON audit log emission.""" + + def test_emit_online_audit_log_writes_json(self): + import json + import logging + + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_online_audit_log( + requestor_id="user@example.com", + entity_keys=["driver_id", "customer_id"], + entity_count=10, + feature_views=["driver_fv", "order_fv"], + feature_count=5, + status="success", + latency_ms=42.0, + ) + + mock_info.assert_called_once() + logged_json = mock_info.call_args[0][0] + record = json.loads(logged_json) + + assert record["event"] == "online_feature_request" + assert record["requestor_id"] == "user@example.com" + assert record["entity_keys"] == ["driver_id", "customer_id"] + assert record["entity_count"] == 10 + assert record["feature_views"] == ["driver_fv", "order_fv"] + assert record["feature_count"] == 5 + assert record["status"] == "success" + assert record["latency_ms"] == pytest.approx(42.0) + assert "timestamp" in record + + def test_emit_online_audit_log_noop_when_disabled(self): + import logging + + import feast.metrics as m + + m._config = m._MetricsFlags(enabled=True, audit_logging=False) + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_online_audit_log( + requestor_id="user@example.com", + entity_keys=["driver_id"], + entity_count=1, + feature_views=["driver_fv"], + feature_count=1, + status="success", + latency_ms=10.0, + ) + mock_info.assert_not_called() + + def test_emit_offline_audit_log_writes_json(self): + import json + import logging + + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_offline_audit_log( + method="to_arrow", + feature_views=["driver_fv"], + feature_count=3, + row_count=500, + status="success", + start_time="2026-04-27T12:00:00+00:00", + end_time="2026-04-27T12:00:01+00:00", + duration_ms=1230.0, + ) + + mock_info.assert_called_once() + logged_json = mock_info.call_args[0][0] + record = json.loads(logged_json) + + assert record["event"] == "offline_feature_retrieval" + assert "timestamp" in record + assert record["method"] == "to_arrow" + assert record["feature_views"] == ["driver_fv"] + assert record["feature_count"] == 3 + assert record["row_count"] == 500 + assert record["status"] == "success" + assert record["duration_ms"] == pytest.approx(1230.0) + assert record["start_time"] == "2026-04-27T12:00:00+00:00" + assert record["end_time"] == "2026-04-27T12:00:01+00:00" + + def test_emit_offline_audit_log_noop_when_disabled(self): + import logging + + import feast.metrics as m + + m._config = m._MetricsFlags(enabled=True, audit_logging=False) + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_offline_audit_log( + method="to_arrow", + feature_views=["fv"], + feature_count=1, + row_count=10, + status="success", + start_time="t0", + end_time="t1", + duration_ms=500.0, + ) + mock_info.assert_not_called() + + def test_emit_online_audit_log_with_error_status(self): + import json + import logging + + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_online_audit_log( + requestor_id="unknown", + entity_keys=[], + entity_count=0, + feature_views=[], + feature_count=0, + status="error", + latency_ms=1.0, + ) + + record = json.loads(mock_info.call_args[0][0]) + assert record["status"] == "error" + + +class TestBuildMetricsFlagsOfflineAndAudit: + """Tests for the new offline_features and audit_logging flags.""" + + def test_no_config_defaults_for_new_flags(self): + from feast.metrics import build_metrics_flags + + flags = build_metrics_flags(None) + assert flags.offline_features is True + assert flags.audit_logging is False + + def test_explicit_enable(self): + from types import SimpleNamespace + + from feast.metrics import build_metrics_flags + + mc = SimpleNamespace( + enabled=True, + resource=True, + request=True, + online_features=True, + push=True, + materialization=True, + freshness=True, + offline_features=True, + audit_logging=True, + ) + flags = build_metrics_flags(mc) + assert flags.offline_features is True + assert flags.audit_logging is True + + def test_explicit_disable(self): + from types import SimpleNamespace + + from feast.metrics import build_metrics_flags + + mc = SimpleNamespace( + enabled=True, + resource=True, + request=True, + online_features=True, + push=True, + materialization=True, + freshness=True, + offline_features=False, + audit_logging=False, + ) + flags = build_metrics_flags(mc) + assert flags.offline_features is False + assert flags.audit_logging is False + + def test_missing_new_attrs_fall_back_to_defaults(self): + from types import SimpleNamespace + + from feast.metrics import build_metrics_flags + + mc = SimpleNamespace( + enabled=True, + resource=True, + request=True, + online_features=True, + push=True, + materialization=True, + freshness=True, + ) + flags = build_metrics_flags(mc) + assert flags.offline_features is True + assert flags.audit_logging is False + + +class TestExtractRetrievalMetadata: + """Tests for _extract_retrieval_metadata helper.""" + + def test_extracts_feature_views_and_count(self): + from feast.infra.offline_stores.offline_store import ( + RetrievalMetadata, + _extract_retrieval_metadata, + ) + + job = MagicMock() + job.metadata = RetrievalMetadata( + features=[ + "driver_fv:conv_rate", + "driver_fv:acc_rate", + "vehicle_fv:mileage", + ], + keys=["driver_id"], + ) + + fv_names, feat_count = _extract_retrieval_metadata(job) + assert feat_count == 3 + assert set(fv_names) == {"driver_fv", "vehicle_fv"} + + def test_returns_empty_when_no_metadata(self): + from feast.infra.offline_stores.offline_store import ( + _extract_retrieval_metadata, + ) + + job = MagicMock() + job.metadata = None + + fv_names, feat_count = _extract_retrieval_metadata(job) + assert fv_names == [] + assert feat_count == 0 + + def test_handles_not_implemented_metadata(self): + from feast.infra.offline_stores.offline_store import ( + _extract_retrieval_metadata, + ) + + job = MagicMock() + type(job).metadata = property( + lambda self: (_ for _ in ()).throw(NotImplementedError()) + ) + + fv_names, feat_count = _extract_retrieval_metadata(job) + assert fv_names == [] + assert feat_count == 0 + + +class TestRetrievalJobToArrowInstrumentation: + """Tests for the metrics/audit instrumentation in RetrievalJob.to_arrow().""" + + def _make_job( + self, table, on_demand_fvs=None, metadata=None, raise_on_internal=None + ): + """Create a concrete RetrievalJob subclass for testing.""" + from feast.infra.offline_stores.offline_store import RetrievalJob + + class _TestJob(RetrievalJob): + def __init__(self): + self._table = table + self._odfvs = on_demand_fvs or [] + self._metadata = metadata + self._raise = raise_on_internal + + def _to_arrow_internal(self, timeout=None): + if self._raise: + raise self._raise + return self._table + + @property + def full_feature_names(self): + return False + + @property + def on_demand_feature_views(self): + return self._odfvs + + @property + def metadata(self): + return self._metadata + + return _TestJob() + + def test_success_increments_counter_and_records_latency(self): + import pyarrow as pa + + table = pa.table({"col": [1, 2, 3]}) + job = self._make_job(table) + + before_count = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + before_latency = offline_store_request_latency_seconds.labels( + method="to_arrow" + )._sum.get() + + result = job.to_arrow() + + assert result.num_rows == 3 + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before_count + 1 + ) + assert ( + offline_store_request_latency_seconds.labels(method="to_arrow")._sum.get() + > before_latency + ) + + def test_error_increments_error_counter(self): + job = self._make_job(None, raise_on_internal=RuntimeError("query failed")) + + before_error = offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + + with pytest.raises(RuntimeError, match="query failed"): + job.to_arrow() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + == before_error + 1 + ) + + def test_row_count_recorded_on_success(self): + import pyarrow as pa + + table = pa.table({"a": list(range(500))}) + job = self._make_job(table) + + before_sum = offline_store_row_count.labels(method="to_arrow")._sum.get() + + job.to_arrow() + + assert ( + offline_store_row_count.labels(method="to_arrow")._sum.get() + >= before_sum + 500 + ) + + def test_row_count_recorded_when_zero(self): + import pyarrow as pa + + table = pa.table({"a": pa.array([], type=pa.int64())}) + job = self._make_job(table) + + hist = offline_store_row_count.labels(method="to_arrow") + before_bucket = hist._buckets[0].get() + + job.to_arrow() + + assert hist._buckets[0].get() == before_bucket + 1 + + def test_metrics_skipped_when_offline_features_disabled(self): + import pyarrow as pa + + import feast.metrics as m + + m._config = m._MetricsFlags( + enabled=True, offline_features=False, audit_logging=False + ) + + table = pa.table({"col": [1, 2]}) + job = self._make_job(table) + + before_count = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + + job.to_arrow() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before_count + ) + + def test_audit_log_emitted_on_success(self): + import pyarrow as pa + + from feast.infra.offline_stores.offline_store import RetrievalMetadata + + meta = RetrievalMetadata( + features=["driver_fv:conv_rate", "driver_fv:acc_rate"], + keys=["driver_id"], + ) + table = pa.table({"col": [1, 2, 3]}) + job = self._make_job(table, metadata=meta) + + with patch("feast.metrics.emit_offline_audit_log") as mock_audit: + job.to_arrow() + + mock_audit.assert_called_once() + call_kwargs = mock_audit.call_args[1] + assert call_kwargs["method"] == "to_arrow" + assert call_kwargs["status"] == "success" + assert call_kwargs["row_count"] == 3 + assert call_kwargs["feature_count"] == 2 + assert set(call_kwargs["feature_views"]) == {"driver_fv"} + + def test_audit_log_skipped_when_disabled(self): + import pyarrow as pa + + import feast.metrics as m + + m._config = m._MetricsFlags( + enabled=True, offline_features=True, audit_logging=False + ) + + table = pa.table({"col": [1]}) + job = self._make_job(table) + + with patch("feast.metrics.emit_offline_audit_log") as mock_audit: + job.to_arrow() + mock_audit.assert_not_called() + + def test_instrumentation_failure_does_not_mask_query_error(self): + """If metrics code itself throws, the original query error still propagates.""" + import pyarrow as pa + + table = pa.table({"col": [1]}) + job = self._make_job(table) + + with patch( + "feast.metrics._config", + new_callable=lambda: property( + lambda self: (_ for _ in ()).throw(RuntimeError("metrics broken")) + ), + ): + result = job.to_arrow() + assert result.num_rows == 1 + + +class TestParseFeatureInfo: + """Tests for _parse_feature_info in feature_server.""" + + def test_feature_ref_list(self): + from feast.feature_server import _parse_feature_info + + refs = ["driver_fv:conv_rate", "driver_fv:acc_rate", "vehicle_fv:mileage"] + fv_names, feat_count = _parse_feature_info(refs) + assert feat_count == 3 + assert set(fv_names) == {"driver_fv", "vehicle_fv"} + + def test_empty_list(self): + from feast.feature_server import _parse_feature_info + + fv_names, feat_count = _parse_feature_info([]) + assert fv_names == [] + assert feat_count == 0 + + def test_feature_service(self): + from feast.feature_server import _parse_feature_info + + proj1 = MagicMock() + proj1.name = "driver_fv" + proj1.features = [MagicMock(), MagicMock()] + proj2 = MagicMock() + proj2.name = "order_fv" + proj2.features = [MagicMock()] + + fs_svc = MagicMock() + fs_svc.feature_view_projections = [proj1, proj2] + + from feast.feature_service import FeatureService + + fs_svc.__class__ = FeatureService + + fv_names, feat_count = _parse_feature_info(fs_svc) + assert feat_count == 3 + assert fv_names == ["driver_fv", "order_fv"] + + def test_strips_version_suffix(self): + from feast.feature_server import _parse_feature_info + + refs = ["driver_fv@v2:conv_rate"] + fv_names, feat_count = _parse_feature_info(refs) + assert feat_count == 1 + assert fv_names == ["driver_fv"] + + +class TestEmitOnlineAudit: + """Tests for the _emit_online_audit helper in feature_server.""" + + def test_emits_audit_log_with_anonymous_user(self): + from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit + + request = GetOnlineFeaturesRequest( + entities={"driver_id": [1, 2]}, + features=["driver_fv:conv_rate"], + ) + + with ( + patch("feast.feature_server.feast_metrics") as mock_metrics, + patch( + "feast.permissions.security_manager.get_security_manager", + return_value=None, + ), + ): + _emit_online_audit( + request=request, + features=request.features, + entity_count=2, + status="success", + latency_ms=15.0, + ) + + mock_metrics.emit_online_audit_log.assert_called_once() + kwargs = mock_metrics.emit_online_audit_log.call_args[1] + assert kwargs["requestor_id"] == "anonymous" + assert kwargs["entity_keys"] == ["driver_id"] + assert kwargs["entity_count"] == 2 + assert kwargs["status"] == "success" + + def test_emits_audit_log_with_authenticated_user(self): + from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit + + request = GetOnlineFeaturesRequest( + entities={"driver_id": [1]}, + features=["driver_fv:conv_rate"], + ) + + mock_sm = MagicMock() + mock_sm.current_user.username = "jdoe" + + with ( + patch("feast.feature_server.feast_metrics") as mock_metrics, + patch( + "feast.permissions.security_manager.get_security_manager", + return_value=mock_sm, + ), + ): + _emit_online_audit( + request=request, + features=request.features, + entity_count=1, + status="success", + latency_ms=10.0, + ) + + kwargs = mock_metrics.emit_online_audit_log.call_args[1] + assert kwargs["requestor_id"] == "jdoe" + + def test_does_not_raise_on_failure(self): + from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit + + request = GetOnlineFeaturesRequest( + entities={"driver_id": [1]}, + features=["driver_fv:conv_rate"], + ) + + with patch( + "feast.permissions.security_manager.get_security_manager", + side_effect=RuntimeError("auth broken"), + ): + _emit_online_audit( + request=request, + features=request.features, + entity_count=1, + status="error", + latency_ms=5.0, + ) diff --git a/ui/package-lock.json b/ui/package-lock.json index 1c6ce720e02..2552b4367bc 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "@feast-dev/feast-ui", - "version": "0.57.0", + "version": "0.63.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@feast-dev/feast-ui", - "version": "0.57.0", + "version": "0.63.0", "license": "Apache-2.0", "dependencies": { "@elastic/datemath": "^5.0.3", @@ -163,7 +163,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -860,7 +859,6 @@ "integrity": "sha512-p9OkPbZ5G7UT1MofwYFigGebnrzGJacoBSQM0/6bi/PUMVE+qlWDD/OalvQKbwgQzU6dl0xAv6r4X7Jme0RYxA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/helper-plugin-utils": "^7.27.1" }, @@ -1802,7 +1800,6 @@ "integrity": "sha512-2KH4LWGSrJIkVf5tSiBFYuXDAoWRq2MMwgivCf+93dd0GQi8RXLjKA/0EvRnVV5G0hrHczsquXuD01L8s6dmBw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/helper-annotate-as-pure": "^7.27.1", "@babel/helper-module-imports": "^7.27.1", @@ -2753,7 +2750,6 @@ "resolved": "https://registry.npmjs.org/@emotion/css/-/css-11.13.5.tgz", "integrity": "sha512-wQdD0Xhkn3Qy2VNcIzbLP9MR8TafI0MJb7BEAXKp+w4+XqErksWR4OXomuDzPsN4InLdGhVe6EYcn2ZIUCpB8w==", "license": "MIT", - "peer": true, "dependencies": { "@emotion/babel-plugin": "^11.13.5", "@emotion/cache": "^11.13.5", @@ -2794,7 +2790,6 @@ "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.14.0.tgz", "integrity": "sha512-O000MLDBDdk/EohJPFUqvnp4qnHeYkVP5B0xEG0D/L7cOKP9kefu2DXn8dj74cQfsEzUqh+sr1RzFqiL1o+PpA==", "license": "MIT", - "peer": true, "dependencies": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -4725,7 +4720,6 @@ "integrity": "sha512-8QqtOQT5ACVlmsvKOJNEaWmRPmcojMOzCz4Hs2BGG/toAp/K38LcsMRyLp349glq5AzJbCEeimEoxaX6v/fLrA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/core": "^7.21.3", "@svgr/babel-preset": "8.1.0", @@ -4834,7 +4828,6 @@ "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.10.4", "@babel/runtime": "^7.12.5", @@ -5524,7 +5517,6 @@ "integrity": "sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@types/linkify-it": "^5", "@types/mdurl": "^2" @@ -5632,7 +5624,6 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.27.tgz", "integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==", "license": "MIT", - "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.2.2" @@ -5643,7 +5634,6 @@ "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.7.tgz", "integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==", "license": "MIT", - "peer": true, "peerDependencies": { "@types/react": "^18.0.0" } @@ -5833,7 +5823,6 @@ "integrity": "sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.4.0", "@typescript-eslint/scope-manager": "5.62.0", @@ -5889,7 +5878,6 @@ "integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==", "dev": true, "license": "BSD-2-Clause", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "5.62.0", "@typescript-eslint/types": "5.62.0", @@ -6289,7 +6277,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -6387,7 +6374,6 @@ "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -7398,7 +7384,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -8677,7 +8662,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } @@ -9612,7 +9596,6 @@ "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -12242,7 +12225,6 @@ "integrity": "sha512-bc4NBHqOqSfRW7POMkHd51LvClaeMXpm8dx0e8oE2GORbq5aRK7Bxl4FyzVLdGtLmvLKL7BTDBG5ACQm4HWjTA==", "devOptional": true, "license": "MIT", - "peer": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/immer" @@ -13208,7 +13190,6 @@ "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@jest/core": "^29.7.0", "@jest/types": "^29.6.3", @@ -15825,7 +15806,6 @@ "resolved": "https://registry.npmjs.org/moment/-/moment-2.30.1.tgz", "integrity": "sha512-uEmtNhbDOrWPFS+hdjFCBfy9f2YoyzRpwcl+DqpC6taX21FzsTLQVbMV/W7PzNSX6x/bhC1zA3c2UQ5NzH6how==", "license": "MIT", - "peer": true, "engines": { "node": "*" } @@ -16735,7 +16715,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -17885,7 +17864,6 @@ "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -18162,7 +18140,6 @@ "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==", "hasInstallScript": true, "license": "BSD-3-Clause", - "peer": true, "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", @@ -18556,7 +18533,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, @@ -18791,7 +18767,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -18977,7 +18952,6 @@ "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -19049,7 +19023,6 @@ "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-6.30.2.tgz", "integrity": "sha512-l2OwHn3UUnEVUqc6/1VMmR1cvZryZ3j3NzapC2eUXO1dB0sYp5mvwdjiXhpUbRb21eFow3qSxpP8Yv6oAU824Q==", "license": "MIT", - "peer": true, "dependencies": { "@remix-run/router": "1.23.1", "react-router": "6.30.2" @@ -19225,7 +19198,6 @@ "resolved": "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz", "integrity": "sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==", "license": "MIT", - "peer": true, "dependencies": { "@babel/runtime": "^7.9.2" } @@ -19685,7 +19657,6 @@ "integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==", "dev": true, "license": "MIT", - "peer": true, "bin": { "rollup": "dist/bin/rollup" }, @@ -20207,7 +20178,6 @@ "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -21874,7 +21844,6 @@ "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", "dev": true, "license": "(MIT OR CC0-1.0)", - "peer": true, "engines": { "node": ">=10" }, @@ -21989,7 +21958,6 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz", "integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==", "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -22591,7 +22559,6 @@ "integrity": "sha512-HU1JOuV1OavsZ+mfigY0j8d1TgQgbZ6M+J75zDkpEAwYeXjWSqrGJtgnPblJjd/mAyTNQ7ygw0MiKOn6etz8yw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -22672,7 +22639,6 @@ "integrity": "sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@types/bonjour": "^3.5.9", "@types/connect-history-api-fallback": "^1.3.5", @@ -23114,7 +23080,6 @@ "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", diff --git a/ui/src/FeastUISansProviders.tsx b/ui/src/FeastUISansProviders.tsx index 50de27b5944..801229cc40a 100644 --- a/ui/src/FeastUISansProviders.tsx +++ b/ui/src/FeastUISansProviders.tsx @@ -26,10 +26,15 @@ import DocumentLabelingPage from "./pages/document-labeling/DocumentLabelingPage import PermissionsIndex from "./pages/permissions/Index"; import LineageIndex from "./pages/lineage/Index"; import NoProjectGuard from "./components/NoProjectGuard"; +import MonitoringIndex from "./pages/monitoring/Index"; +import FeatureMetricsDetail from "./pages/monitoring/FeatureMetricsDetail"; import TabsRegistryContext, { FeastTabsRegistryInterface, } from "./custom-tabs/TabsRegistryContext"; +import MonitoringContext, { + MonitoringConfig, +} from "./contexts/MonitoringContext"; import CurlGeneratorTab from "./pages/feature-views/CurlGeneratorTab"; import FeatureFlagsContext, { FeatureFlags, @@ -46,6 +51,7 @@ interface FeastUIConfigs { featureFlags?: FeatureFlags; projectListPromise?: Promise; fetchOptions?: FetchOptions; + monitoringConfig?: MonitoringConfig; } const defaultProjectListPromise = (basename: string) => { @@ -134,13 +140,21 @@ const FeastUISansProvidersInner = ({ feastUIConfigs?.tabsRegistry?.DatasetCustomTabs || [], }} > - - - - }> - } /> + + + }> + } /> + }> + } /> + } /> } @@ -181,28 +195,33 @@ const FeastUISansProvidersInner = ({ element={} /> - } /> - } - /> - } - /> - } - /> - } /> - + } /> + } + /> + } + /> + } /> + } /> + } + /> + } + /> - } /> - - - - - + + } /> + + + + + ); diff --git a/ui/src/contexts/MonitoringContext.ts b/ui/src/contexts/MonitoringContext.ts new file mode 100644 index 00000000000..985f00080e9 --- /dev/null +++ b/ui/src/contexts/MonitoringContext.ts @@ -0,0 +1,14 @@ +import React from "react"; + +interface MonitoringConfig { + apiBaseUrl: string; + enabled: boolean; +} + +const MonitoringContext = React.createContext({ + apiBaseUrl: "/api/v1", + enabled: true, +}); + +export default MonitoringContext; +export type { MonitoringConfig }; diff --git a/ui/src/pages/Sidebar.tsx b/ui/src/pages/Sidebar.tsx index cf3d64a6816..b5590ffac0c 100644 --- a/ui/src/pages/Sidebar.tsx +++ b/ui/src/pages/Sidebar.tsx @@ -156,6 +156,15 @@ const SideNav = () => { renderItem: (props) => , isSelected: useMatchSubpath(`${baseUrl}/data-set`), }, + { + name: "Monitoring", + id: htmlIdGenerator("monitoring")(), + icon: , + renderItem: (props) => ( + + ), + isSelected: useMatchSubpath(`${baseUrl}/monitoring`), + }, { name: "Data Labeling", id: htmlIdGenerator("dataLabeling")(), diff --git a/ui/src/pages/features/FeatureInstance.tsx b/ui/src/pages/features/FeatureInstance.tsx index fe81c6e619f..aa73db7c8c1 100644 --- a/ui/src/pages/features/FeatureInstance.tsx +++ b/ui/src/pages/features/FeatureInstance.tsx @@ -3,8 +3,9 @@ import { Route, Routes, useNavigate, useParams } from "react-router-dom"; import { EuiPageTemplate } from "@elastic/eui"; import { FeatureIcon } from "../../graphics/FeatureIcon"; -import { useMatchExact } from "../../hooks/useMatchSubpath"; +import { useMatchExact, useMatchSubpath } from "../../hooks/useMatchSubpath"; import FeatureOverviewTab from "./FeatureOverviewTab"; +import FeatureMonitoringTab from "./FeatureMonitoringTab"; import { useDocumentTitle } from "../../hooks/useDocumentTitle"; import { useFeatureCustomTabs, @@ -34,12 +35,20 @@ const FeatureInstance = () => { navigate(""); }, }, + { + label: "Monitoring", + isSelected: useMatchSubpath("monitoring"), + onClick: () => { + navigate("monitoring"); + }, + }, ...customNavigationTabs, ]} /> } /> + } /> {CustomTabRoutes} diff --git a/ui/src/pages/features/FeatureMonitoringTab.tsx b/ui/src/pages/features/FeatureMonitoringTab.tsx new file mode 100644 index 00000000000..fdf7b38bc86 --- /dev/null +++ b/ui/src/pages/features/FeatureMonitoringTab.tsx @@ -0,0 +1,122 @@ +import React from "react"; +import { useParams } from "react-router-dom"; +import { + EuiFlexGroup, + EuiFlexItem, + EuiSpacer, + EuiSkeletonText, + EuiEmptyPrompt, + EuiButton, +} from "@elastic/eui"; +import { + useFeatureMetrics, + useBaselineMetrics, +} from "../../queries/useMonitoringApi"; +import type { + NumericHistogram, + CategoricalHistogram, +} from "../../queries/useMonitoringApi"; +import { + NumericHistogramChart, + CategoricalHistogramChart, +} from "../monitoring/components/HistogramChart"; +import StatsPanel from "../monitoring/components/StatsPanel"; + +const FeatureMonitoringTab = () => { + const { projectName, FeatureViewName, FeatureName } = useParams(); + + const { + data: metrics, + isLoading, + isError, + } = useFeatureMetrics({ + project: projectName || "", + feature_view_name: FeatureViewName, + feature_name: FeatureName, + }); + + const { data: baselineMetrics } = useBaselineMetrics( + projectName || "", + FeatureViewName, + FeatureName, + ); + + if (isLoading) { + return ; + } + + const latestMetric = (() => { + if (!metrics || metrics.length === 0) return null; + const withData = metrics.filter((m) => m.row_count > 0); + const candidates = withData.length > 0 ? withData : metrics; + return candidates.reduce((a, b) => + a.metric_date > b.metric_date ? a : b, + ); + })(); + + const baselineMetric = + baselineMetrics && baselineMetrics.length > 0 + ? baselineMetrics[0] + : null; + + if (isError || !latestMetric) { + return ( + No Monitoring Data} + body={ +

+ No monitoring metrics available for this feature. Run a + monitoring compute job to generate data quality metrics. +

+ } + actions={ + + Go to Monitoring + + } + /> + ); + } + + const isNumeric = latestMetric.feature_type === "numeric"; + + return ( + <> + + + {isNumeric && latestMetric.histogram && ( + + )} + {!isNumeric && latestMetric.histogram && ( + + )} + {!latestMetric.histogram && ( + No Histogram} + body={

Histogram data is not available.

} + /> + )} +
+ + + +
+ + + ); +}; + +export default FeatureMonitoringTab; diff --git a/ui/src/pages/monitoring/FeatureMetricsDetail.tsx b/ui/src/pages/monitoring/FeatureMetricsDetail.tsx new file mode 100644 index 00000000000..7ace799742b --- /dev/null +++ b/ui/src/pages/monitoring/FeatureMetricsDetail.tsx @@ -0,0 +1,249 @@ +import React from "react"; +import { useParams, useNavigate } from "react-router-dom"; +import { + EuiPageTemplate, + EuiFlexGroup, + EuiFlexItem, + EuiSpacer, + EuiSkeletonText, + EuiEmptyPrompt, + EuiButton, + EuiBreadcrumbs, +} from "@elastic/eui"; +import { FeatureIcon } from "../../graphics/FeatureIcon"; +import { + useFeatureMetrics, + useBaselineMetrics, +} from "../../queries/useMonitoringApi"; +import type { + NumericHistogram, + CategoricalHistogram, +} from "../../queries/useMonitoringApi"; +import { + NumericHistogramChart, + CategoricalHistogramChart, +} from "./components/HistogramChart"; +import StatsPanel from "./components/StatsPanel"; +import { useDocumentTitle } from "../../hooks/useDocumentTitle"; + +const FeatureMetricsDetail = () => { + const { projectName, featureViewName, featureName } = useParams(); + const navigate = useNavigate(); + + useDocumentTitle( + `${featureName} Monitoring | ${featureViewName} | Feast`, + ); + + const { + data: metrics, + isLoading, + isError, + } = useFeatureMetrics({ + project: projectName || "", + feature_view_name: featureViewName, + feature_name: featureName, + }); + + const { data: baselineMetrics } = useBaselineMetrics( + projectName || "", + featureViewName, + featureName, + ); + + const latestMetric = (() => { + if (!metrics || metrics.length === 0) return null; + const withData = metrics.filter((m) => m.row_count > 0); + const candidates = withData.length > 0 ? withData : metrics; + return candidates.reduce((a, b) => + a.metric_date > b.metric_date ? a : b, + ); + })(); + + const baselineMetric = + baselineMetrics && baselineMetrics.length > 0 + ? baselineMetrics[0] + : null; + + const breadcrumbs = [ + { + text: "Monitoring", + onClick: () => navigate(`/p/${projectName}/monitoring`), + }, + { + text: featureViewName || "", + }, + { + text: featureName || "", + }, + ]; + + if (isLoading) { + return ( + + + + + + ); + } + + if (isError || !latestMetric) { + return ( + + + + + No Metrics Available} + body={ +

+ No monitoring metrics found for feature{" "} + {featureName} in feature view{" "} + {featureViewName}. Run a monitoring + compute job first. +

+ } + actions={ + navigate(`/p/${projectName}/monitoring`)} + > + Back to Monitoring + + } + /> +
+
+ ); + } + + const isNumeric = latestMetric.feature_type === "numeric"; + + return ( + + navigate(`/p/${projectName}/monitoring`)} + > + Back to Monitoring + , + ]} + /> + + + + + + + {isNumeric && latestMetric.histogram && ( + + )} + {!isNumeric && latestMetric.histogram && ( + + )} + {!latestMetric.histogram && ( + No Histogram Data} + body={

Histogram data is not available for this metric.

} + /> + )} +
+ + + + +
+ + {metrics && metrics.length > 1 && ( + <> + + + + )} +
+
+ ); +}; + +const NullRateTimeline = ({ + metrics, +}: { + metrics: { metric_date: string; null_rate: number }[]; +}) => { + const sorted = [...metrics].sort( + (a, b) => a.metric_date.localeCompare(b.metric_date), + ); + const maxRate = Math.max(...sorted.map((m) => m.null_rate), 0.01); + const chartWidth = Math.max(sorted.length * 50, 200); + const chartHeight = 80; + + const points = sorted.map((m, i) => { + const x = (i / Math.max(sorted.length - 1, 1)) * (chartWidth - 20) + 10; + const y = chartHeight - (m.null_rate / maxRate) * (chartHeight - 10); + return { x, y, ...m }; + }); + + const polyline = points.map((p) => `${p.x},${p.y}`).join(" "); + + return ( +
+

+ Null Rate Over Time +

+ + + {points.map((p, i) => ( + + ))} + + {points.length > 0 && ( + <> + + {points[0].metric_date} + + + {points[points.length - 1].metric_date} + + + )} + +
+ ); +}; + +export default FeatureMetricsDetail; diff --git a/ui/src/pages/monitoring/FeatureMetricsTable.tsx b/ui/src/pages/monitoring/FeatureMetricsTable.tsx new file mode 100644 index 00000000000..d0a2e4e9573 --- /dev/null +++ b/ui/src/pages/monitoring/FeatureMetricsTable.tsx @@ -0,0 +1,296 @@ +import React, { useState, useMemo, useEffect } from "react"; +import { + EuiBasicTable, + EuiBasicTableColumn, + EuiBadge, + EuiHealth, + EuiLink, + EuiProgress, + EuiToolTip, + Criteria, +} from "@elastic/eui"; +import type { + FeatureMetric, + NumericHistogram, + CategoricalHistogram, +} from "../../queries/useMonitoringApi"; + +const healthColor = (nullRate: number): string => { + if (nullRate >= 0.5) return "danger"; + if (nullRate >= 0.1) return "warning"; + return "success"; +}; + +const healthLabel = (nullRate: number): string => { + if (nullRate >= 0.5) return "High null rate"; + if (nullRate >= 0.1) return "Moderate null rate"; + return "Healthy"; +}; + +const formatNum = (val: number | null, decimals = 2): string => { + if (val === null || val === undefined) return "—"; + if (Number.isInteger(val)) return val.toLocaleString(); + return val.toFixed(decimals); +}; + +const MiniHistogram = ({ metric }: { metric: FeatureMetric }) => { + if (!metric.histogram) return ; + + const width = 120; + const height = 28; + + if (metric.feature_type === "numeric") { + const hist = metric.histogram as NumericHistogram; + const maxCount = Math.max(...hist.counts, 1); + const barW = Math.max(Math.floor(width / hist.counts.length) - 1, 2); + + return ( + + + {hist.counts.map((count, i) => { + const h = (count / maxCount) * (height - 2); + return ( + + ); + })} + + + ); + } + + const hist = metric.histogram as CategoricalHistogram; + const maxCount = Math.max(...hist.values.map((v) => v.count), 1); + const barW = Math.max( + Math.floor(width / Math.min(hist.values.length, 10)) - 1, + 6, + ); + + return ( + + + {hist.values.slice(0, 10).map((v, i) => { + const h = (v.count / maxCount) * (height - 2); + return ( + + ); + })} + + + ); +}; + +interface FeatureMetricsTableProps { + metrics: FeatureMetric[]; + isLoading: boolean; + onFeatureClick: (fvName: string, featureName: string) => void; +} + +const PAGE_SIZE_OPTIONS = [10, 20, 50]; + +const FeatureMetricsTable = ({ + metrics, + isLoading, + onFeatureClick, +}: FeatureMetricsTableProps) => { + const [sortField, setSortField] = + useState("feature_view_name"); + const [sortDirection, setSortDirection] = useState<"asc" | "desc">("asc"); + const [pageIndex, setPageIndex] = useState(0); + const [pageSize, setPageSize] = useState(20); + + useEffect(() => { + setPageIndex(0); + }, [metrics]); + + const latestMetrics = useMemo(() => { + const byKey = new Map(); + for (const m of metrics) { + const key = `${m.feature_view_name}::${m.feature_name}`; + const existing = byKey.get(key); + if (!existing) { + byKey.set(key, m); + } else { + const preferNew = + m.row_count > 0 && existing.row_count === 0 + ? true + : existing.row_count > 0 && m.row_count === 0 + ? false + : m.metric_date > existing.metric_date; + if (preferNew) byKey.set(key, m); + } + } + return Array.from(byKey.values()); + }, [metrics]); + + const sortedItems = useMemo(() => { + return [...latestMetrics].sort((a, b) => { + const aVal = a[sortField]; + const bVal = b[sortField]; + if (aVal == null && bVal == null) return 0; + if (aVal == null) return 1; + if (bVal == null) return -1; + if (aVal < bVal) return sortDirection === "asc" ? -1 : 1; + if (aVal > bVal) return sortDirection === "asc" ? 1 : -1; + return 0; + }); + }, [latestMetrics, sortField, sortDirection]); + + const pageOfItems = useMemo(() => { + const start = pageIndex * pageSize; + return sortedItems.slice(start, start + pageSize); + }, [sortedItems, pageIndex, pageSize]); + + const pagination = useMemo( + () => ({ + pageIndex, + pageSize, + totalItemCount: sortedItems.length, + pageSizeOptions: PAGE_SIZE_OPTIONS, + }), + [pageIndex, pageSize, sortedItems.length], + ); + + const onTableChange = ({ sort, page }: Criteria) => { + if (sort) { + setSortField(sort.field as keyof FeatureMetric); + setSortDirection(sort.direction); + } + if (page) { + setPageIndex(page.index); + setPageSize(page.size); + } + }; + + const columns: EuiBasicTableColumn[] = [ + { + field: "feature_name", + name: "Feature", + sortable: true, + render: (name: string, item: FeatureMetric) => ( + onFeatureClick(item.feature_view_name, name)} + > + {name} + + ), + }, + { + field: "feature_view_name", + name: "Feature View", + sortable: true, + }, + { + field: "feature_type", + name: "Type", + sortable: true, + width: "100px", + render: (type: string) => ( + + {type} + + ), + }, + { + name: "Distribution", + width: "140px", + render: (item: FeatureMetric) => , + }, + { + field: "row_count", + name: "Rows", + sortable: true, + width: "90px", + render: (val: number) => formatNum(val, 0), + }, + { + field: "null_rate", + name: "Null Rate", + sortable: true, + width: "150px", + render: (val: number) => ( +
+ + {(val * 100).toFixed(1)}% +
+ ), + }, + { + field: "null_rate", + name: "Health", + width: "130px", + render: (val: number) => ( + {healthLabel(val)} + ), + }, + { + field: "mean", + name: "Mean", + sortable: true, + width: "100px", + render: (val: number | null) => formatNum(val), + }, + { + field: "stddev", + name: "Std Dev", + sortable: true, + width: "100px", + render: (val: number | null) => formatNum(val), + }, + { + field: "data_source_type", + name: "Source", + width: "80px", + render: (val: string) => {val}, + }, + ]; + + return ( + ({ + "data-test-subj": `row-${item.feature_name}`, + })} + noItemsMessage={ + isLoading + ? "Loading metrics..." + : "No metrics found. Run a monitoring compute job to generate metrics." + } + /> + ); +}; + +export default FeatureMetricsTable; diff --git a/ui/src/pages/monitoring/FeatureServiceMetricsPanel.tsx b/ui/src/pages/monitoring/FeatureServiceMetricsPanel.tsx new file mode 100644 index 00000000000..c3fa61b25a8 --- /dev/null +++ b/ui/src/pages/monitoring/FeatureServiceMetricsPanel.tsx @@ -0,0 +1,224 @@ +import React, { useState, useMemo } from "react"; +import { + EuiPanel, + EuiTitle, + EuiSpacer, + EuiFlexGroup, + EuiFlexItem, + EuiStat, + EuiBasicTable, + EuiBasicTableColumn, + EuiProgress, + EuiBadge, + EuiSkeletonText, + Criteria, +} from "@elastic/eui"; +import type { FeatureServiceMetric } from "../../queries/useMonitoringApi"; + +const healthColor = (nullRate: number): string => { + if (nullRate >= 0.5) return "danger"; + if (nullRate >= 0.1) return "warning"; + return "success"; +}; + +interface FeatureServiceMetricsPanelProps { + metrics: FeatureServiceMetric[]; + isLoading: boolean; +} + +const FeatureServiceMetricsPanel = ({ + metrics, + isLoading, +}: FeatureServiceMetricsPanelProps) => { + if (isLoading) { + return ( + + +

Feature Service Metrics

+
+ + +
+ ); + } + + const latestByFS = new Map(); + for (const m of metrics) { + const existing = latestByFS.get(m.feature_service_name); + if (!existing || m.metric_date > existing.metric_date) { + latestByFS.set(m.feature_service_name, m); + } + } + const latestMetrics = Array.from(latestByFS.values()); + + const totalViews = latestMetrics.reduce( + (sum, m) => sum + (m.total_feature_views || 0), + 0, + ); + const totalFeatures = latestMetrics.reduce( + (sum, m) => sum + (m.total_features || 0), + 0, + ); + const avgNullRate = + latestMetrics.length > 0 + ? latestMetrics.reduce((sum, m) => sum + (m.avg_null_rate || 0), 0) / + latestMetrics.length + : 0; + + const columns: EuiBasicTableColumn[] = [ + { + field: "feature_service_name", + name: "Feature Service", + sortable: true, + }, + { + field: "total_feature_views", + name: "Feature Views", + sortable: true, + width: "110px", + }, + { + field: "total_features", + name: "Features", + sortable: true, + width: "80px", + }, + { + field: "avg_null_rate", + name: "Avg Null Rate", + sortable: true, + render: (val: number) => ( +
+ + {((val || 0) * 100).toFixed(1)}% +
+ ), + }, + { + field: "max_null_rate", + name: "Max Null Rate", + sortable: true, + width: "110px", + render: (val: number) => `${((val || 0) * 100).toFixed(1)}%`, + }, + { + field: "metric_date", + name: "Date", + sortable: true, + width: "110px", + }, + { + field: "data_source_type", + name: "Source", + width: "80px", + render: (val: string) => ( + {val} + ), + }, + ]; + + return ( + + +

Feature Service Metrics

+
+

+ Aggregated data quality metrics across feature services. +

+ + + + + + + + + + + + + + + + + + + + {latestMetrics.length > 0 && ( + + )} +
+ ); +}; + +const SortableFSTable = ({ + items, + columns, +}: { + items: FeatureServiceMetric[]; + columns: EuiBasicTableColumn[]; +}) => { + const [sortField, setSortField] = useState("feature_service_name"); + const [sortDirection, setSortDirection] = useState<"asc" | "desc">("asc"); + + const sortedItems = useMemo(() => { + return [...items].sort((a, b) => { + const aVal = (a as any)[sortField]; + const bVal = (b as any)[sortField]; + if (aVal == null && bVal == null) return 0; + if (aVal == null) return 1; + if (bVal == null) return -1; + if (aVal < bVal) return sortDirection === "asc" ? -1 : 1; + if (aVal > bVal) return sortDirection === "asc" ? 1 : -1; + return 0; + }); + }, [items, sortField, sortDirection]); + + const onTableChange = ({ sort }: Criteria) => { + if (sort) { + setSortField(sort.field as string); + setSortDirection(sort.direction); + } + }; + + return ( + + ); +}; + +export default FeatureServiceMetricsPanel; diff --git a/ui/src/pages/monitoring/FeatureViewMetricsPanel.tsx b/ui/src/pages/monitoring/FeatureViewMetricsPanel.tsx new file mode 100644 index 00000000000..267f1292fe1 --- /dev/null +++ b/ui/src/pages/monitoring/FeatureViewMetricsPanel.tsx @@ -0,0 +1,240 @@ +import React, { useState, useMemo } from "react"; +import { + EuiPanel, + EuiTitle, + EuiSpacer, + EuiFlexGroup, + EuiFlexItem, + EuiStat, + EuiBasicTable, + EuiBasicTableColumn, + EuiProgress, + EuiBadge, + EuiSkeletonText, + Criteria, +} from "@elastic/eui"; +import type { FeatureViewMetric } from "../../queries/useMonitoringApi"; + +const healthColor = (nullRate: number): string => { + if (nullRate >= 0.5) return "danger"; + if (nullRate >= 0.1) return "warning"; + return "success"; +}; + +interface FeatureViewMetricsPanelProps { + metrics: FeatureViewMetric[]; + isLoading: boolean; + title: string; + description?: string; +} + +const FeatureViewMetricsPanel = ({ + metrics, + isLoading, + title, + description, +}: FeatureViewMetricsPanelProps) => { + if (isLoading) { + return ( + + +

{title}

+
+ + +
+ ); + } + + const latestByFV = new Map(); + for (const m of metrics) { + const existing = latestByFV.get(m.feature_view_name); + if (!existing || m.metric_date > existing.metric_date) { + latestByFV.set(m.feature_view_name, m); + } + } + const latestMetrics = Array.from(latestByFV.values()); + + const totalRows = latestMetrics.reduce( + (sum, m) => sum + (m.total_row_count || 0), + 0, + ); + const totalFeatures = latestMetrics.reduce( + (sum, m) => sum + (m.total_features || 0), + 0, + ); + const avgNullRate = + latestMetrics.length > 0 + ? latestMetrics.reduce((sum, m) => sum + (m.avg_null_rate || 0), 0) / + latestMetrics.length + : 0; + const healthyViews = latestMetrics.filter( + (m) => m.avg_null_rate < 0.1, + ).length; + + const columns: EuiBasicTableColumn[] = [ + { + field: "feature_view_name", + name: "Feature View", + sortable: true, + }, + { + field: "total_row_count", + name: "Total Rows", + sortable: true, + render: (val: number) => (val || 0).toLocaleString(), + }, + { + field: "total_features", + name: "Features", + sortable: true, + width: "80px", + }, + { + field: "features_with_nulls", + name: "With Nulls", + sortable: true, + width: "90px", + }, + { + field: "avg_null_rate", + name: "Avg Null Rate", + sortable: true, + render: (val: number) => ( +
+ + {((val || 0) * 100).toFixed(1)}% +
+ ), + }, + { + field: "max_null_rate", + name: "Max Null Rate", + sortable: true, + width: "110px", + render: (val: number) => `${((val || 0) * 100).toFixed(1)}%`, + }, + { + field: "metric_date", + name: "Date", + sortable: true, + width: "110px", + }, + { + field: "data_source_type", + name: "Source", + width: "80px", + render: (val: string) => ( + {val} + ), + }, + ]; + + return ( + + +

{title}

+
+ {description && ( +

+ {description} +

+ )} + + + + + + + + + + + + + + + + + + + + {latestMetrics.length > 0 && ( + + )} +
+ ); +}; + +const SortableTable = ({ + items, + columns, +}: { + items: FeatureViewMetric[]; + columns: EuiBasicTableColumn[]; +}) => { + const [sortField, setSortField] = useState("feature_view_name"); + const [sortDirection, setSortDirection] = useState<"asc" | "desc">("asc"); + + const sortedItems = useMemo(() => { + return [...items].sort((a, b) => { + const aVal = (a as any)[sortField]; + const bVal = (b as any)[sortField]; + if (aVal == null && bVal == null) return 0; + if (aVal == null) return 1; + if (bVal == null) return -1; + if (aVal < bVal) return sortDirection === "asc" ? -1 : 1; + if (aVal > bVal) return sortDirection === "asc" ? 1 : -1; + return 0; + }); + }, [items, sortField, sortDirection]); + + const onTableChange = ({ sort }: Criteria) => { + if (sort) { + setSortField(sort.field as string); + setSortDirection(sort.direction); + } + }; + + return ( + + ); +}; + +export default FeatureViewMetricsPanel; diff --git a/ui/src/pages/monitoring/Index.tsx b/ui/src/pages/monitoring/Index.tsx new file mode 100644 index 00000000000..1af792b119b --- /dev/null +++ b/ui/src/pages/monitoring/Index.tsx @@ -0,0 +1,265 @@ +import React, { useState, useContext, useMemo } from "react"; +import { useParams, useNavigate } from "react-router-dom"; +import { + EuiPageTemplate, + EuiSpacer, + EuiTabbedContent, + EuiTabbedContentTab, + EuiEmptyPrompt, + EuiButton, + EuiCallOut, +} from "@elastic/eui"; + +import { useDocumentTitle } from "../../hooks/useDocumentTitle"; +import useLoadRegistry from "../../queries/useLoadRegistry"; +import RegistryPathContext from "../../contexts/RegistryPathContext"; +import { + useFeatureMetrics, + useFeatureViewMetrics, + useFeatureServiceMetrics, + useComputeMetrics, +} from "../../queries/useMonitoringApi"; +import FeatureMetricsTable from "./FeatureMetricsTable"; +import FeatureViewMetricsPanel from "./FeatureViewMetricsPanel"; +import FeatureServiceMetricsPanel from "./FeatureServiceMetricsPanel"; +import MetricsFilters from "./components/MetricsFilters"; + +const MonitoringIndex = () => { + useDocumentTitle("Monitoring | Feast"); + + const { projectName } = useParams(); + const navigate = useNavigate(); + const registryUrl = useContext(RegistryPathContext); + const { data: registryData } = useLoadRegistry(registryUrl, projectName); + + const [selectedFV, setSelectedFV] = useState(""); + const [granularity, setGranularity] = useState(""); + const [dataSourceType, setDataSourceType] = useState(""); + const [startDate, setStartDate] = useState(""); + const [endDate, setEndDate] = useState(""); + + const filters = useMemo( + () => ({ + project: projectName || "", + feature_view_name: selectedFV || undefined, + granularity: granularity || undefined, + data_source_type: dataSourceType || undefined, + start_date: startDate || undefined, + end_date: endDate || undefined, + }), + [projectName, selectedFV, granularity, dataSourceType, startDate, endDate], + ); + + const featureQuery = useFeatureMetrics(filters); + const fvQuery = useFeatureViewMetrics(filters); + const fsQuery = useFeatureServiceMetrics({ + project: projectName || "", + granularity: granularity || undefined, + data_source_type: dataSourceType || undefined, + start_date: startDate || undefined, + end_date: endDate || undefined, + }); + const computeMutation = useComputeMetrics(); + + const featureViews = useMemo(() => { + if (!registryData?.mergedFVList) return []; + return registryData.mergedFVList.map((fv: any) => fv.name as string); + }, [registryData]); + + const handleFeatureClick = (fvName: string, featureName: string) => { + navigate( + `/p/${projectName}/monitoring/feature/${fvName}/${featureName}`, + ); + }; + + const uniqueFeatureCount = useMemo(() => { + if (!featureQuery.data) return 0; + const seen = new Set(); + for (const m of featureQuery.data) { + seen.add(`${m.feature_view_name}::${m.feature_name}`); + } + return seen.size; + }, [featureQuery.data]); + + const handleRefresh = () => { + featureQuery.refetch(); + fvQuery.refetch(); + fsQuery.refetch(); + }; + + const handleCompute = () => { + computeMutation.mutate({ + project: projectName || "", + feature_view_name: selectedFV || undefined, + }); + }; + + const hasError = + featureQuery.isError && fvQuery.isError && fsQuery.isError; + const hasData = + (featureQuery.data && featureQuery.data.length > 0) || + (fvQuery.data && fvQuery.data.length > 0); + + const tabs: EuiTabbedContentTab[] = [ + { + id: "features", + name: `Features${uniqueFeatureCount > 0 ? ` (${uniqueFeatureCount})` : ""}`, + content: ( + <> + + + + ), + }, + { + id: "feature-views", + name: "Feature Views", + content: ( + <> + + + + ), + }, + { + id: "feature-services", + name: "Feature Services", + content: ( + <> + + + + ), + }, + ]; + + return ( + + + Compute Metrics + , + ]} + /> + + {hasError && ( + <> + +

+ Could not connect to the monitoring API. Make sure the Feast + registry server is running with monitoring enabled. +

+
+ + + )} + + + + + + {!hasData && !featureQuery.isLoading && !hasError && ( + No Metrics Yet} + body={ +

+ No monitoring data has been computed for this project. Click + "Compute Metrics" to run data quality analysis on your + feature views, or use the CLI:{" "} + feast monitor run --data-source batch +

+ } + actions={ + + Compute Metrics + + } + /> + )} + + {(hasData || featureQuery.isLoading) && ( + + )} + + {computeMutation.isSuccess && ( + <> + + +

+ Data quality metrics have been computed. The table above has + been refreshed. +

+
+ + )} + + {computeMutation.isError && ( + <> + + +

{(computeMutation.error as Error)?.message}

+
+ + )} +
+
+ ); +}; + +export default MonitoringIndex; diff --git a/ui/src/pages/monitoring/components/HistogramChart.tsx b/ui/src/pages/monitoring/components/HistogramChart.tsx new file mode 100644 index 00000000000..188bcba7c0b --- /dev/null +++ b/ui/src/pages/monitoring/components/HistogramChart.tsx @@ -0,0 +1,245 @@ +import React from "react"; +import { + EuiPanel, + EuiTitle, + EuiSpacer, + EuiText, +} from "@elastic/eui"; +import type { + NumericHistogram, + CategoricalHistogram, +} from "../../../queries/useMonitoringApi"; + +const BAR_COLOR = "#006BB4"; +const BAR_COLOR_BASELINE = "#BD271E55"; +const CHART_HEIGHT = 160; +const AXIS_HEIGHT = 24; +const LEFT_PAD = 50; + +const NumericHistogramChart = ({ + histogram, + baseline, + title, +}: { + histogram: NumericHistogram; + baseline?: NumericHistogram | null; + title?: string; +}) => { + const maxCount = Math.max(...histogram.counts, 1); + const numBars = histogram.counts.length; + const barWidth = Math.max(Math.floor(460 / numBars) - 2, 6); + const barsWidth = (barWidth + 2) * numBars; + const svgWidth = LEFT_PAD + barsWidth + 20; + + const yTicks = [0, 0.25, 0.5, 0.75, 1].map((f) => ({ + value: Math.round(maxCount * f), + y: CHART_HEIGHT - f * CHART_HEIGHT, + })); + + return ( + + {title && ( + <> + +

{title}

+
+ + + )} +
+ + {yTicks.map((t) => ( + + + + {t.value.toLocaleString()} + + + ))} + {histogram.counts.map((count, i) => { + const height = (count / maxCount) * CHART_HEIGHT; + const x = LEFT_PAD + i * (barWidth + 2); + const binStart = histogram.bins[i]; + const binEnd = + i < histogram.bins.length - 1 + ? histogram.bins[i + 1] + : binStart + histogram.bin_width; + const baselineHeight = + baseline && baseline.counts[i] + ? (baseline.counts[i] / maxCount) * CHART_HEIGHT + : 0; + + return ( + + {baselineHeight > 0 && ( + + )} + + {`${binStart.toFixed(2)} – ${binEnd.toFixed(2)}: ${count.toLocaleString()}`} + + + ); + })} + + + {histogram.bins[0]?.toLocaleString(undefined, { maximumFractionDigits: 1 })} + + + {histogram.bins[histogram.bins.length - 1]?.toLocaleString(undefined, { maximumFractionDigits: 1 })} + + +
+ {baseline && ( + + + Baseline + + )} +
+ ); +}; + +const LABEL_WIDTH = 60; +const BAR_MAX_WIDTH = 320; +const COUNT_PAD = 80; +const CAT_SVG_WIDTH = LABEL_WIDTH + BAR_MAX_WIDTH + COUNT_PAD; + +const CategoricalHistogramChart = ({ + histogram, + title, +}: { + histogram: CategoricalHistogram; + title?: string; +}) => { + const maxCount = Math.max( + ...histogram.values.map((v) => v.count), + 1, + ); + const barHeight = 24; + const rowHeight = barHeight + 6; + const chartHeight = histogram.values.length * rowHeight; + + return ( + + {title && ( + <> + +

{title}

+
+ + + )} +
+ + {histogram.values.map((v, i) => { + const width = (v.count / maxCount) * BAR_MAX_WIDTH; + const y = i * rowHeight; + return ( + + + {v.value.length > 8 ? v.value.slice(0, 8) + "…" : v.value} + + + {`${v.value}: ${v.count.toLocaleString()}`} + + + {v.count.toLocaleString()} + + + ); + })} + +
+ + {histogram.unique_count} unique values + {histogram.other_count > 0 && + ` (${histogram.other_count.toLocaleString()} in other categories)`} + +
+ ); +}; + +export { NumericHistogramChart, CategoricalHistogramChart }; diff --git a/ui/src/pages/monitoring/components/MetricsFilters.tsx b/ui/src/pages/monitoring/components/MetricsFilters.tsx new file mode 100644 index 00000000000..081e380da74 --- /dev/null +++ b/ui/src/pages/monitoring/components/MetricsFilters.tsx @@ -0,0 +1,128 @@ +import React from "react"; +import { + EuiFlexGroup, + EuiFlexItem, + EuiSelect, + EuiFieldText, + EuiFormRow, + EuiButton, +} from "@elastic/eui"; + +interface MetricsFiltersProps { + featureViews: string[]; + selectedFeatureView: string; + onFeatureViewChange: (fv: string) => void; + granularity: string; + onGranularityChange: (g: string) => void; + dataSourceType: string; + onDataSourceTypeChange: (ds: string) => void; + startDate: string; + onStartDateChange: (d: string) => void; + endDate: string; + onEndDateChange: (d: string) => void; + onRefresh: () => void; + isLoading?: boolean; +} + +const GRANULARITY_OPTIONS = [ + { value: "", text: "All" }, + { value: "daily", text: "Daily" }, + { value: "weekly", text: "Weekly" }, + { value: "biweekly", text: "Biweekly" }, + { value: "monthly", text: "Monthly" }, + { value: "quarterly", text: "Quarterly" }, +]; + +const DATA_SOURCE_OPTIONS = [ + { value: "", text: "All Sources" }, + { value: "batch", text: "Batch" }, + { value: "log", text: "Log" }, +]; + +const MetricsFilters = ({ + featureViews, + selectedFeatureView, + onFeatureViewChange, + granularity, + onGranularityChange, + dataSourceType, + onDataSourceTypeChange, + startDate, + onStartDateChange, + endDate, + onEndDateChange, + onRefresh, + isLoading, +}: MetricsFiltersProps) => { + const fvOptions = [ + { value: "", text: "All Feature Views" }, + ...featureViews.map((fv) => ({ value: fv, text: fv })), + ]; + + return ( + + + + onFeatureViewChange(e.target.value)} + compressed + /> + + + + + onGranularityChange(e.target.value)} + compressed + /> + + + + + onDataSourceTypeChange(e.target.value)} + compressed + /> + + + + + onStartDateChange(e.target.value)} + compressed + /> + + + + + onEndDateChange(e.target.value)} + compressed + /> + + + + + Refresh + + + + ); +}; + +export default MetricsFilters; diff --git a/ui/src/pages/monitoring/components/StatsPanel.tsx b/ui/src/pages/monitoring/components/StatsPanel.tsx new file mode 100644 index 00000000000..070b99373e7 --- /dev/null +++ b/ui/src/pages/monitoring/components/StatsPanel.tsx @@ -0,0 +1,130 @@ +import React from "react"; +import { + EuiPanel, + EuiTitle, + EuiSpacer, + EuiDescriptionList, + EuiDescriptionListTitle, + EuiDescriptionListDescription, + EuiFlexGroup, + EuiFlexItem, + EuiBadge, +} from "@elastic/eui"; +import type { FeatureMetric } from "../../../queries/useMonitoringApi"; + +const formatNumber = (val: number | null, decimals = 4): string => { + if (val === null || val === undefined) return "—"; + if (Number.isInteger(val)) return val.toLocaleString(); + return val.toFixed(decimals); +}; + +const formatPercent = (val: number | null): string => { + if (val === null || val === undefined) return "—"; + return `${(val * 100).toFixed(2)}%`; +}; + +const StatsPanel = ({ + metric, + baseline, +}: { + metric: FeatureMetric; + baseline?: FeatureMetric | null; +}) => { + const isNumeric = metric.feature_type === "numeric"; + + return ( + + + + +

Statistics

+
+
+ + + {metric.feature_type} + + +
+ + + Row Count + + {formatNumber(metric.row_count, 0)} + {baseline && ( + + (baseline: {formatNumber(baseline.row_count, 0)}) + + )} + + + Null Rate + + 0.1 ? "#BD271E" : "inherit", + fontWeight: metric.null_rate > 0.1 ? 600 : 400, + }} + > + {formatPercent(metric.null_rate)} + + {baseline && ( + + (baseline: {formatPercent(baseline.null_rate)}) + + )} + + + {isNumeric && ( + <> + Mean + + {formatNumber(metric.mean)} + {baseline && ( + + (baseline: {formatNumber(baseline.mean)}) + + )} + + + Std Dev + + {formatNumber(metric.stddev)} + + + Min / Max + + {formatNumber(metric.min_val)} / {formatNumber(metric.max_val)} + + + Percentiles + + P50: {formatNumber(metric.p50)} | P75: {formatNumber(metric.p75)}{" "} + | P90: {formatNumber(metric.p90)} | P95:{" "} + {formatNumber(metric.p95)} | P99: {formatNumber(metric.p99)} + + + )} + + Data Source + + {metric.data_source_type} + + + Granularity + + {metric.granularity} + + + Computed At + + {metric.computed_at + ? new Date(metric.computed_at).toLocaleString() + : "—"} + + +
+ ); +}; + +export default StatsPanel; diff --git a/ui/src/queries/useMonitoringApi.ts b/ui/src/queries/useMonitoringApi.ts new file mode 100644 index 00000000000..fde01f29d6d --- /dev/null +++ b/ui/src/queries/useMonitoringApi.ts @@ -0,0 +1,250 @@ +import { useContext } from "react"; +import { useQuery, useMutation, useQueryClient } from "react-query"; +import MonitoringContext from "../contexts/MonitoringContext"; + +interface FeatureMetric { + project_id: string; + feature_view_name: string; + feature_name: string; + metric_date: string; + granularity: string; + data_source_type: string; + computed_at: string; + is_baseline: boolean; + feature_type: string; + row_count: number; + null_count: number; + null_rate: number; + mean: number | null; + stddev: number | null; + min_val: number | null; + max_val: number | null; + p50: number | null; + p75: number | null; + p90: number | null; + p95: number | null; + p99: number | null; + histogram: NumericHistogram | CategoricalHistogram | null; +} + +interface NumericHistogram { + bins: number[]; + counts: number[]; + bin_width: number; +} + +interface CategoricalHistogram { + values: { value: string; count: number }[]; + other_count: number; + unique_count: number; +} + +interface FeatureViewMetric { + project_id: string; + feature_view_name: string; + metric_date: string; + granularity: string; + data_source_type: string; + computed_at: string; + is_baseline: boolean; + total_row_count: number; + total_features: number; + features_with_nulls: number; + avg_null_rate: number; + max_null_rate: number; +} + +interface FeatureServiceMetric { + project_id: string; + feature_service_name: string; + metric_date: string; + granularity: string; + data_source_type: string; + computed_at: string; + is_baseline: boolean; + total_feature_views: number; + total_features: number; + avg_null_rate: number; + max_null_rate: number; +} + +interface MonitoringFilters { + project: string; + feature_view_name?: string; + feature_name?: string; + feature_service_name?: string; + granularity?: string; + data_source_type?: string; + start_date?: string; + end_date?: string; +} + +const toQueryParams = ( + filters: MonitoringFilters, +): Record => { + return { + project: filters.project, + feature_view_name: filters.feature_view_name, + feature_name: filters.feature_name, + feature_service_name: filters.feature_service_name, + granularity: filters.granularity, + data_source_type: filters.data_source_type, + start_date: filters.start_date, + end_date: filters.end_date, + }; +}; + +const buildQueryString = (params: Record) => { + const entries = Object.entries(params).filter( + ([, v]) => v !== undefined && v !== "", + ); + if (entries.length === 0) return ""; + return "?" + entries.map(([k, v]) => `${k}=${encodeURIComponent(v!)}`).join("&"); +}; + +const fetchMonitoring = async ( + baseUrl: string, + path: string, + params: Record, +): Promise => { + const qs = buildQueryString(params); + const res = await fetch(`${baseUrl}${path}${qs}`); + if (!res.ok) { + throw new Error(`Failed to fetch ${path}: ${res.status} ${res.statusText}`); + } + const text = await res.text(); + const sanitized = text.replace(/:\s*NaN/g, ": null").replace(/:\s*Infinity/g, ": null").replace(/:\s*-Infinity/g, ": null"); + return JSON.parse(sanitized); +}; + +const STALE_TIME = 30_000; + +const useFeatureMetrics = (filters: MonitoringFilters) => { + const { apiBaseUrl, enabled } = useContext(MonitoringContext); + return useQuery( + ["monitoring-features", filters], + () => + fetchMonitoring( + apiBaseUrl, + "/monitoring/metrics/features", + toQueryParams(filters), + ), + { staleTime: STALE_TIME, enabled, retry: 1 }, + ); +}; + +const useFeatureViewMetrics = (filters: MonitoringFilters) => { + const { apiBaseUrl, enabled } = useContext(MonitoringContext); + return useQuery( + ["monitoring-feature-views", filters], + () => + fetchMonitoring( + apiBaseUrl, + "/monitoring/metrics/feature_views", + toQueryParams(filters), + ), + { staleTime: STALE_TIME, enabled, retry: 1 }, + ); +}; + +const useFeatureServiceMetrics = (filters: MonitoringFilters) => { + const { apiBaseUrl, enabled } = useContext(MonitoringContext); + return useQuery( + ["monitoring-feature-services", filters], + () => + fetchMonitoring( + apiBaseUrl, + "/monitoring/metrics/feature_services", + toQueryParams(filters), + ), + { staleTime: STALE_TIME, enabled, retry: 1 }, + ); +}; + +const useBaselineMetrics = ( + project: string, + featureViewName?: string, + featureName?: string, + dataSourceType?: string, +) => { + const { apiBaseUrl, enabled } = useContext(MonitoringContext); + return useQuery( + ["monitoring-baseline", project, featureViewName, featureName], + () => + fetchMonitoring( + apiBaseUrl, + "/monitoring/metrics/baseline", + { + project, + feature_view_name: featureViewName, + feature_name: featureName, + data_source_type: dataSourceType, + }, + ), + { staleTime: STALE_TIME, enabled, retry: 1 }, + ); +}; + +const useTimeseriesMetrics = (filters: MonitoringFilters) => { + const { apiBaseUrl, enabled } = useContext(MonitoringContext); + return useQuery( + ["monitoring-timeseries", filters], + () => + fetchMonitoring( + apiBaseUrl, + "/monitoring/metrics/timeseries", + toQueryParams(filters), + ), + { staleTime: STALE_TIME, enabled, retry: 1 }, + ); +}; + +const useComputeMetrics = () => { + const { apiBaseUrl } = useContext(MonitoringContext); + const queryClient = useQueryClient(); + return useMutation( + async (body: { + project: string; + feature_view_name?: string; + feature_names?: string[]; + start_date?: string; + end_date?: string; + granularity?: string; + set_baseline?: boolean; + }) => { + const res = await fetch(`${apiBaseUrl}/monitoring/compute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + if (!res.ok) { + throw new Error(`Failed to trigger compute: ${res.status}`); + } + return res.json(); + }, + { + onSuccess: () => { + queryClient.invalidateQueries("monitoring-features"); + queryClient.invalidateQueries("monitoring-feature-views"); + queryClient.invalidateQueries("monitoring-feature-services"); + }, + }, + ); +}; + +export { + useFeatureMetrics, + useFeatureViewMetrics, + useFeatureServiceMetrics, + useBaselineMetrics, + useTimeseriesMetrics, + useComputeMetrics, +}; +export type { + FeatureMetric, + FeatureViewMetric, + FeatureServiceMetric, + NumericHistogram, + CategoricalHistogram, + MonitoringFilters, +};