diff --git a/.secrets.baseline b/.secrets.baseline
index e0030466f1f..bd2a7f382c2 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -272,6 +272,29 @@
"line_number": 11
}
],
+ "examples/monitoring/monitoring-quickstart.ipynb": [
+ {
+ "type": "Base64 High Entropy String",
+ "filename": "examples/monitoring/monitoring-quickstart.ipynb",
+ "hashed_secret": "8d921d6d629bc22832e5fae42dfc828b8ce5cf47",
+ "is_verified": false,
+ "line_number": 606
+ },
+ {
+ "type": "Base64 High Entropy String",
+ "filename": "examples/monitoring/monitoring-quickstart.ipynb",
+ "hashed_secret": "37b47d0b2461457e316f1b0be0eef0f9599d440d",
+ "is_verified": false,
+ "line_number": 780
+ },
+ {
+ "type": "Base64 High Entropy String",
+ "filename": "examples/monitoring/monitoring-quickstart.ipynb",
+ "hashed_secret": "be6715cc8d40a964c7bd1fd8eff5e840d61ad598",
+ "is_verified": false,
+ "line_number": 875
+ }
+ ],
"examples/online_store/milvus_tutorial/docker-compose.yml": [
{
"type": "Secret Keyword",
@@ -934,7 +957,7 @@
"filename": "infra/feast-operator/api/v1/featurestore_types.go",
"hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c",
"is_verified": false,
- "line_number": 885
+ "line_number": 889
}
],
"infra/feast-operator/api/v1/zz_generated.deepcopy.go": [
@@ -943,21 +966,21 @@
"filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go",
"hashed_secret": "f914fc9324de1bec1ad13dec94a8ea2ddb41fc87",
"is_verified": false,
- "line_number": 785
+ "line_number": 810
},
{
"type": "Secret Keyword",
"filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go",
"hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c",
"is_verified": false,
- "line_number": 846
+ "line_number": 871
},
{
"type": "Secret Keyword",
"filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go",
"hashed_secret": "c2028031c154bbe86fd69bef740855c74b927dcf",
"is_verified": false,
- "line_number": 1496
+ "line_number": 1516
}
],
"infra/feast-operator/api/v1alpha1/featurestore_types.go": [
@@ -1140,14 +1163,14 @@
"filename": "infra/feast-operator/internal/controller/services/repo_config.go",
"hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c",
"is_verified": false,
- "line_number": 129
+ "line_number": 133
},
{
"type": "Secret Keyword",
"filename": "infra/feast-operator/internal/controller/services/repo_config.go",
"hashed_secret": "e2fb052132fd6a07a56af2013e0b62a1f510572c",
"is_verified": false,
- "line_number": 220
+ "line_number": 224
}
],
"infra/feast-operator/internal/controller/services/services.go": [
diff --git a/Makefile b/Makefile
index 799bc9c42fc..1c150abad35 100644
--- a/Makefile
+++ b/Makefile
@@ -105,7 +105,10 @@ install-python-dependencies-minimal: ## Install minimal Python dependencies usin
# Used in github actions/ci
install-python-dependencies-ci: ## Install Python CI dependencies using uv pip sync
# Create virtualenv if it doesn't exist
- uv venv .venv
+ @if [ ! -d .venv ]; then \
+ echo "Creating virtualenv..."; \
+ uv venv .venv; \
+ fi
# Install CPU-only torch first to prevent CUDA dependency issues (Linux only)
@if [ "$$(uname -s)" = "Linux" ]; then \
echo "Installing dependencies with torch CPU index for Linux..."; \
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 1b0b0961d79..ab1d5a80e1b 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -57,6 +57,7 @@
* [MCP - AI Agent Example](../examples/mcp_feature_store/README.md)
* [Feast-Powered AI Agent](../examples/agent_feature_store/README.md)
* [Demo Notebooks](tutorials/demo-notebooks.md)
+* [Feature Quality Monitoring Quickstart](../examples/monitoring/monitoring-quickstart.ipynb)
## How-to Guides
@@ -90,6 +91,7 @@
* [Adding or reusing tests](how-to-guides/adding-or-reusing-tests.md)
* [Starting Feast servers in TLS(SSL) Mode](how-to-guides/starting-feast-servers-tls-mode.md)
* [Importing Features from dbt](how-to-guides/dbt-integration.md)
+* [Feature Quality Monitoring](how-to-guides/feature-monitoring.md)
## Reference
diff --git a/docs/how-to-guides/feature-monitoring.md b/docs/how-to-guides/feature-monitoring.md
new file mode 100644
index 00000000000..c79b955ed2e
--- /dev/null
+++ b/docs/how-to-guides/feature-monitoring.md
@@ -0,0 +1,388 @@
+# Feature Quality Monitoring
+
+## Overview
+
+Feast's data quality monitoring system computes, stores, and serves statistical metrics for every registered feature. It gives you visibility into feature health — distributions, null rates, percentiles, histograms — across batch data and feature serving logs.
+
+This guide covers:
+
+1. [Prerequisites](#1-prerequisites)
+2. [Auto-baseline on registration](#2-auto-baseline-on-registration)
+3. [Scheduled monitoring with the CLI](#3-scheduled-monitoring-with-the-cli)
+4. [Monitoring feature serving logs](#4-monitoring-feature-serving-logs)
+5. [Reading metrics via REST API](#5-reading-metrics-via-rest-api)
+6. [On-demand exploration (transient compute)](#6-on-demand-exploration)
+7. [Integrating with orchestrators](#7-integrating-with-orchestrators)
+8. [Supported backends](#8-supported-backends)
+
+## 1. Prerequisites
+
+Monitoring works with any supported offline store backend. No additional infrastructure or configuration is needed — monitoring tables are created automatically on first use.
+
+**Minimum setup:**
+
+- A Feast project with at least one feature view and a configured offline store
+- Feast SDK installed (`pip install feast`)
+
+**For serving log monitoring:**
+
+- At least one feature service with `logging_config` set (see [step 4](#4-monitoring-feature-serving-logs))
+
+## 2. Auto-baseline on registration
+
+When you run `feast apply` to register new features, Feast automatically queues baseline metric computation:
+
+```bash
+$ feast apply
+Applying changes...
+Created feature view 'driver_stats' with 3 features
+ → Queued baseline metrics computation (DQM job: abc-123)
+Done!
+```
+
+The baseline reads all available source data and stores the resulting statistics with `is_baseline=TRUE`. This serves as the reference distribution for future drift detection.
+
+Baseline computation is:
+- **Non-blocking** — `feast apply` returns immediately; computation runs asynchronously
+- **Idempotent** — only features without existing baselines are computed; re-running `feast apply` won't recompute existing baselines
+
+### Disabling auto-baseline
+
+To skip automatic baseline computation on `feast apply`, set the DQM config in `feature_store.yaml`:
+
+```yaml
+dqm:
+ auto_baseline: false
+```
+
+When using the Feast operator, set this in the `FeatureStore` CR:
+
+```yaml
+apiVersion: feast.dev/v1
+kind: FeatureStore
+spec:
+ feastProject: my_project
+ dqm:
+ autoBaseline: false
+```
+
+## 3. Scheduled monitoring with the CLI
+
+### Auto mode (recommended for production)
+
+Schedule a single daily job that computes all granularities automatically:
+
+```bash
+feast monitor run
+```
+
+This detects the latest event timestamp in the source data and computes metrics for 5 time windows:
+
+| Granularity | Window |
+|-------------|--------|
+| `daily` | Last 1 day |
+| `weekly` | Last 7 days |
+| `biweekly` | Last 14 days |
+| `monthly` | Last 30 days |
+| `quarterly` | Last 90 days |
+
+No date arguments needed. One scheduled job produces all granularities.
+
+### Targeting a specific feature view
+
+```bash
+feast monitor run --feature-view driver_stats
+```
+
+### Explicit date range and granularity
+
+```bash
+feast monitor run \
+ --feature-view driver_stats \
+ --start-date 2025-01-01 \
+ --end-date 2025-01-07 \
+ --granularity weekly
+```
+
+### Setting a manual baseline
+
+```bash
+feast monitor run \
+ --feature-view driver_stats \
+ --start-date 2025-01-01 \
+ --end-date 2025-03-31 \
+ --granularity daily \
+ --set-baseline
+```
+
+### CLI reference
+
+```
+Usage: feast monitor run [OPTIONS]
+
+Options:
+ -p, --project TEXT Feast project name (defaults to feature_store.yaml)
+ -v, --feature-view TEXT Feature view name (omit for all)
+ -f, --feature-name TEXT Feature name(s), repeatable (omit for all)
+ --start-date TEXT Start date YYYY-MM-DD (omit for auto-detect)
+ --end-date TEXT End date YYYY-MM-DD (omit for auto-detect)
+ -g, --granularity One of: daily, weekly, biweekly, monthly, quarterly
+ --set-baseline Mark this computation as baseline
+ --source-type One of: batch, log, all (default: batch)
+ --help Show this message and exit.
+```
+
+## 4. Monitoring feature serving logs
+
+If your feature services have logging configured, you can compute metrics from the actual features served to models in production.
+
+### Setting up feature service logging
+
+In your feature definitions:
+
+```python
+from feast import FeatureService, LoggingConfig
+from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (
+ PostgreSQLLoggingDestination,
+)
+
+driver_service = FeatureService(
+ name="driver_service",
+ features=[driver_stats_fv],
+ logging_config=LoggingConfig(
+ destination=PostgreSQLLoggingDestination(table_name="feast_driver_logs"),
+ sample_rate=1.0,
+ ),
+)
+```
+
+### Computing log metrics
+
+**Auto mode (all feature services with logging):**
+
+```bash
+feast monitor run --source-type log
+```
+
+**Specific feature service:**
+
+```bash
+feast monitor run --source-type log --feature-view driver_service
+```
+
+**Both batch and log in one run:**
+
+```bash
+feast monitor run --source-type all
+```
+
+Log metrics are stored with `data_source_type="log"` alongside batch metrics in the same monitoring tables. Feature names from the log schema (e.g., `driver_stats__conv_rate`) are automatically normalized back to their original names (`conv_rate`) and associated with the correct feature view — enabling batch-vs-log comparison and drift detection.
+
+### Via REST API
+
+```bash
+# Compute log metrics
+POST /monitoring/compute/log
+{
+ "project": "my_project",
+ "feature_service_name": "driver_service",
+ "granularity": "daily"
+}
+
+# Auto-compute all log metrics
+POST /monitoring/auto_compute/log
+{
+ "project": "my_project"
+}
+```
+
+## 5. Reading metrics via REST API
+
+All read endpoints support cascading filters: `project` → `feature_service_name` → `feature_view_name` → `feature_name` → `granularity` → `data_source_type`.
+
+### Per-feature metrics
+
+```
+GET /monitoring/metrics/features?project=my_project&feature_view_name=driver_stats&granularity=daily
+```
+
+**Response:**
+
+```json
+[
+ {
+ "project_id": "my_project",
+ "feature_view_name": "driver_stats",
+ "feature_name": "conv_rate",
+ "feature_type": "numeric",
+ "metric_date": "2025-03-26",
+ "granularity": "daily",
+ "data_source_type": "batch",
+ "row_count": 15000,
+ "null_count": 12,
+ "null_rate": 0.0008,
+ "mean": 0.523,
+ "stddev": 0.189,
+ "min_val": 0.001,
+ "max_val": 0.998,
+ "p50": 0.51,
+ "p75": 0.68,
+ "p90": 0.82,
+ "p95": 0.89,
+ "p99": 0.96,
+ "histogram": {
+ "bins": [0.0, 0.05, 0.1, "..."],
+ "counts": [120, 340, 560, "..."],
+ "bin_width": 0.05
+ }
+ }
+]
+```
+
+### Per-feature-view aggregates
+
+```
+GET /monitoring/metrics/feature_views?project=my_project&feature_view_name=driver_stats
+```
+
+### Per-feature-service aggregates
+
+```
+GET /monitoring/metrics/feature_services?project=my_project&feature_service_name=driver_service
+```
+
+### Baseline
+
+```
+GET /monitoring/metrics/baseline?project=my_project&feature_view_name=driver_stats
+```
+
+### Time-series (for trend charts)
+
+```
+GET /monitoring/metrics/timeseries?project=my_project&feature_name=conv_rate&granularity=daily&start_date=2025-01-01&end_date=2025-03-31
+```
+
+### Filtering batch vs. log metrics
+
+Add `data_source_type=batch` or `data_source_type=log` to any read endpoint:
+
+```
+GET /monitoring/metrics/features?project=my_project&data_source_type=log
+```
+
+### Full endpoint reference
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| `POST` | `/monitoring/compute` | Submit batch DQM job |
+| `POST` | `/monitoring/auto_compute` | Auto-detect dates, all granularities |
+| `POST` | `/monitoring/compute/transient` | On-demand compute (not stored) |
+| `POST` | `/monitoring/compute/log` | Compute from serving logs |
+| `POST` | `/monitoring/auto_compute/log` | Auto-detect log dates, all granularities |
+| `GET` | `/monitoring/jobs/{job_id}` | DQM job status |
+| `GET` | `/monitoring/metrics/features` | Per-feature metrics |
+| `GET` | `/monitoring/metrics/feature_views` | Per-view aggregates |
+| `GET` | `/monitoring/metrics/feature_services` | Per-service aggregates |
+| `GET` | `/monitoring/metrics/baseline` | Baseline metrics |
+| `GET` | `/monitoring/metrics/timeseries` | Time-series data |
+
+## 6. On-demand exploration
+
+When you need metrics for an arbitrary date range (e.g., "show me the distribution for Jan 5 to Jan 20"), use the transient compute endpoint. It reads source data for the exact range, computes fresh statistics, and returns them directly without storing.
+
+```bash
+POST /monitoring/compute/transient
+{
+ "project": "my_project",
+ "feature_view_name": "driver_stats",
+ "feature_names": ["conv_rate"],
+ "start_date": "2025-01-05",
+ "end_date": "2025-01-20"
+}
+```
+
+This is necessary because pre-computed histograms from different date ranges have different bin edges and cannot be merged losslessly.
+
+## 7. Integrating with orchestrators
+
+### Airflow
+
+```python
+from airflow.operators.bash import BashOperator
+
+monitor_task = BashOperator(
+ task_id="feast_monitor",
+ bash_command="feast monitor run",
+ cwd="/path/to/feast/repo",
+)
+```
+
+### Kubeflow Pipelines (KFP)
+
+```python
+from kfp import dsl
+
+@dsl.component(base_image="feast-image:latest")
+def monitor_features():
+ import subprocess
+ subprocess.run(["feast", "monitor", "run"], check=True, cwd="/feast/repo")
+```
+
+### Cron
+
+```cron
+# Daily at 2:00 AM UTC
+0 2 * * * cd /path/to/feast/repo && feast monitor run >> /var/log/feast-monitor.log 2>&1
+```
+
+### Monitoring both batch and log in one job
+
+```bash
+feast monitor run --source-type all
+```
+
+## 8. Supported backends
+
+Monitoring works natively with all offline stores that serve as compute engines for Feast materialization:
+
+| Backend | Compute | Storage |
+|---------|---------|---------|
+| PostgreSQL | SQL push-down | `INSERT ON CONFLICT` |
+| Snowflake | SQL push-down | `MERGE` with `VARIANT` JSON |
+| BigQuery | SQL push-down | `MERGE` into BQ tables |
+| Redshift | SQL push-down | `MERGE` via Data API |
+| Spark | SparkSQL push-down | Parquet tables |
+| Oracle | SQL via Ibis | `MERGE` from `DUAL` |
+| DuckDB | In-memory SQL | Parquet files |
+| Dask | PyArrow compute | Parquet files |
+
+Backends not listed above fall back to Python-based computation — the offline store's `pull_all_from_table_or_query()` returns a PyArrow Table, and metrics are computed using `pyarrow.compute` and `numpy`.
+
+## What metrics are computed
+
+**Per-feature (full profile):**
+
+| Metric | Numeric | Categorical |
+|--------|:-------:|:-----------:|
+| row_count, null_count, null_rate | Yes | Yes |
+| mean, stddev, min, max | Yes | — |
+| p50, p75, p90, p95, p99 | Yes | — |
+| histogram (JSONB) | Binned distribution | Top-N values with counts |
+
+**Per-feature-view and per-feature-service (aggregate summaries):**
+
+| Metric | Description |
+|--------|-------------|
+| total_row_count | Total rows in the view |
+| total_features | Number of features |
+| features_with_nulls | Count of features with any nulls |
+| avg_null_rate, max_null_rate | Aggregate null rate statistics |
+
+## RBAC
+
+Monitoring respects Feast's existing RBAC:
+
+- **Compute operations** (`POST /monitoring/compute`, `/auto_compute`, `/compute/log`, `/auto_compute/log`) require `AuthzedAction.UPDATE`
+- **Transient compute** (`POST /monitoring/compute/transient`) requires `AuthzedAction.DESCRIBE`
+- **Read operations** (`GET /monitoring/metrics/*`) require `AuthzedAction.DESCRIBE`
diff --git a/docs/reference/feature-servers/python-feature-server.md b/docs/reference/feature-servers/python-feature-server.md
index 654c4b9f938..4802599866d 100644
--- a/docs/reference/feature-servers/python-feature-server.md
+++ b/docs/reference/feature-servers/python-feature-server.md
@@ -352,11 +352,14 @@ feature_server:
push: true # push request counters
materialization: true # materialization counters & duration
freshness: true # feature freshness gauges
+ offline_features: true # offline store retrieval counters & latency
+ audit_logging: false # structured JSON audit logs (see below)
```
Any category set to `false` will emit no metrics and start no background
threads (e.g., setting `freshness: false` prevents the registry polling
-thread from starting). All categories default to `true`.
+thread from starting). All categories default to `true` except
+`audit_logging`, which defaults to `false`.
### Available metrics
@@ -375,6 +378,9 @@ thread from starting). All categories default to `true`.
| `feast_materialization_result_total` | Counter | `feature_view`, `status` | `materialization` | Materialization runs (success/failure) |
| `feast_materialization_duration_seconds` | Histogram | `feature_view` | `materialization` | Materialization duration per feature view |
| `feast_feature_freshness_seconds` | Gauge | `feature_view`, `project` | `freshness` | Seconds since last materialization |
+| `feast_offline_store_request_total` | Counter | `method`, `status` | `offline_features` | Total offline store retrieval requests |
+| `feast_offline_store_request_latency_seconds` | Histogram | `method` | `offline_features` | Latency of offline store retrieval operations |
+| `feast_offline_store_row_count` | Histogram | `method` | `offline_features` | Rows returned by offline store retrieval |
### Per-ODFV transformation metrics
@@ -405,6 +411,70 @@ The `odfv_name` label lets you filter or group by individual ODFV,
and the `mode` label (`python`, `pandas`, `substrait`) lets you compare
transformation engines.
+### Audit logging
+
+Feast can emit structured JSON audit log entries for every online and offline
+feature retrieval. These are written via the standard `feast.audit` Python
+logger, so you can route them to a dedicated file, SIEM, or log aggregator
+independently of application logs.
+
+Audit logging is **disabled by default**. Enable it in `feature_store.yaml`:
+
+```yaml
+feature_server:
+ type: local
+ metrics:
+ enabled: true
+ audit_logging: true
+```
+
+**Online audit log** (emitted per `/get-online-features` call):
+
+```json
+{
+ "event": "online_feature_request",
+ "timestamp": "2026-05-11T08:30:00.123456+00:00",
+ "requestor_id": "user@example.com",
+ "entity_keys": ["driver_id"],
+ "entity_count": 3,
+ "feature_views": ["driver_hourly_stats"],
+ "feature_count": 3,
+ "status": "success",
+ "latency_ms": 12.34
+}
+```
+
+**Offline audit log** (emitted per `RetrievalJob.to_arrow()` call):
+
+```json
+{
+ "event": "offline_feature_retrieval",
+ "timestamp": "2026-05-11T08:31:00.456789+00:00",
+ "method": "to_arrow",
+ "start_time": "2026-05-11T08:30:59.226789+00:00",
+ "end_time": "2026-05-11T08:31:00.456789+00:00",
+ "feature_views": ["driver_hourly_stats"],
+ "feature_count": 3,
+ "row_count": 500,
+ "status": "success",
+ "duration_ms": 1230.0
+}
+```
+
+The `requestor_id` field in online audit logs is populated from the
+security manager's current user when authentication is configured, and
+falls back to `"anonymous"` otherwise.
+
+To route audit logs to a separate file:
+
+```python
+import logging
+
+handler = logging.FileHandler("/var/log/feast/audit.log")
+handler.setFormatter(logging.Formatter("%(message)s"))
+logging.getLogger("feast.audit").addHandler(handler)
+```
+
### Scraping with Prometheus
```yaml
diff --git a/examples/monitoring/monitoring-quickstart.ipynb b/examples/monitoring/monitoring-quickstart.ipynb
new file mode 100644
index 00000000000..77101ffff51
--- /dev/null
+++ b/examples/monitoring/monitoring-quickstart.ipynb
@@ -0,0 +1,1256 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Feature Quality Monitoring — Quickstart\n",
+ "\n",
+ "This notebook walks you through Feast's data quality monitoring end-to-end:\n",
+ "\n",
+ "1. Set up a feature store with a PostgreSQL offline store\n",
+ "2. Register features and trigger baseline computation\n",
+ "3. Compute metrics across multiple granularities\n",
+ "4. Read metrics via the Python SDK and REST API\n",
+ "5. Set up serving log monitoring\n",
+ "6. Use on-demand exploration for custom date ranges\n",
+ "\n",
+ "**Prerequisites:** A running PostgreSQL instance and `feast[postgres]` installed."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 1: Install Feast"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!uv pip install -q 'feast[postgres]'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 2: Configure the Feature Store\n",
+ "\n",
+ "Create a minimal `feature_store.yaml` with a PostgreSQL offline store."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Working directory: /var/folders/cn/z7vz24yj25d8fjqdrs9jbsh00000gn/T/feast_monitoring_demo_kze7m3sk\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import tempfile\n",
+ "\n",
+ "REPO_DIR = tempfile.mkdtemp(prefix=\"feast_monitoring_demo_\")\n",
+ "os.makedirs(REPO_DIR, exist_ok=True)\n",
+ "print(f\"Working directory: {REPO_DIR}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "feature_store.yaml written.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Adjust these to match your PostgreSQL instance\n",
+ "PG_HOST = os.environ.get(\"FEAST_PG_HOST\", \"localhost\")\n",
+ "PG_PORT = os.environ.get(\"FEAST_PG_PORT\", \"5432\")\n",
+ "PG_DB = os.environ.get(\"FEAST_PG_DB\", \"feast\")\n",
+ "PG_USER = os.environ.get(\"FEAST_PG_USER\", \"feast\")\n",
+ "PG_PASS = os.environ.get(\"FEAST_PG_PASS\", \"feast\")\n",
+ "\n",
+ "PG_SSLMODE = os.environ.get(\"FEAST_PG_SSLMODE\", \"disable\")\n",
+ "\n",
+ "feature_store_yaml = f\"\"\"\n",
+ "project: monitoring_demo\n",
+ "registry:\n",
+ " registry_type: sql\n",
+ " path: postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}?sslmode={PG_SSLMODE}\n",
+ "provider: local\n",
+ "offline_store:\n",
+ " type: postgres\n",
+ " host: {PG_HOST}\n",
+ " port: {PG_PORT}\n",
+ " database: {PG_DB}\n",
+ " user: {PG_USER}\n",
+ " password: {PG_PASS}\n",
+ " sslmode: {PG_SSLMODE}\n",
+ "online_store:\n",
+ " type: sqlite\n",
+ " path: {REPO_DIR}/online_store.db\n",
+ "entity_key_serialization_version: 3\n",
+ "\"\"\"\n",
+ "\n",
+ "with open(os.path.join(REPO_DIR, \"feature_store.yaml\"), \"w\") as f:\n",
+ " f.write(feature_store_yaml)\n",
+ "\n",
+ "print(\"feature_store.yaml written.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 3: Create Sample Data and Feature Definitions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sample data: 5000 rows, 60 days\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " driver_id \n",
+ " event_timestamp \n",
+ " conv_rate \n",
+ " acc_rate \n",
+ " avg_daily_trips \n",
+ " vehicle_type \n",
+ " created \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1148 \n",
+ " 2025-02-08 \n",
+ " 0.348307 \n",
+ " 0.794390 \n",
+ " 14 \n",
+ " compact \n",
+ " 2025-02-08 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1539 \n",
+ " 2025-02-21 \n",
+ " 0.305945 \n",
+ " 0.749046 \n",
+ " 25 \n",
+ " van \n",
+ " 2025-02-21 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 1487 \n",
+ " 2025-01-29 \n",
+ " 0.791641 \n",
+ " 0.784492 \n",
+ " 17 \n",
+ " sedan \n",
+ " 2025-01-29 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 1821 \n",
+ " 2025-01-15 \n",
+ " 0.267308 \n",
+ " 0.726226 \n",
+ " 17 \n",
+ " sedan \n",
+ " 2025-01-15 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 1437 \n",
+ " 2025-02-12 \n",
+ " 0.544618 \n",
+ " 0.729568 \n",
+ " 11 \n",
+ " suv \n",
+ " 2025-02-12 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " driver_id event_timestamp conv_rate acc_rate avg_daily_trips \\\n",
+ "0 1148 2025-02-08 0.348307 0.794390 14 \n",
+ "1 1539 2025-02-21 0.305945 0.749046 25 \n",
+ "2 1487 2025-01-29 0.791641 0.784492 17 \n",
+ "3 1821 2025-01-15 0.267308 0.726226 17 \n",
+ "4 1437 2025-02-12 0.544618 0.729568 11 \n",
+ "\n",
+ " vehicle_type created \n",
+ "0 compact 2025-02-08 \n",
+ "1 van 2025-02-21 \n",
+ "2 sedan 2025-01-29 \n",
+ "3 sedan 2025-01-15 \n",
+ "4 suv 2025-02-12 "
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from datetime import datetime, timedelta\n",
+ "\n",
+ "np.random.seed(42)\n",
+ "\n",
+ "N_ROWS = 5000\n",
+ "N_DAYS = 60\n",
+ "\n",
+ "base_date = datetime(2025, 1, 1)\n",
+ "timestamps = [base_date + timedelta(days=int(d)) for d in np.random.randint(0, N_DAYS, N_ROWS)]\n",
+ "\n",
+ "df = pd.DataFrame({\n",
+ " \"driver_id\": np.random.randint(1000, 2000, N_ROWS),\n",
+ " \"event_timestamp\": timestamps,\n",
+ " \"conv_rate\": np.clip(np.random.normal(0.5, 0.2, N_ROWS), 0, 1),\n",
+ " \"acc_rate\": np.clip(np.random.normal(0.7, 0.15, N_ROWS), 0, 1),\n",
+ " \"avg_daily_trips\": np.random.poisson(20, N_ROWS).astype(\"int32\"),\n",
+ " \"vehicle_type\": np.random.choice([\"sedan\", \"suv\", \"truck\", \"van\", \"compact\"], N_ROWS),\n",
+ " \"created\": timestamps,\n",
+ "})\n",
+ "\n",
+ "print(f\"Sample data: {len(df)} rows, {N_DAYS} days\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!uv pip install -q 'psycopg2'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loaded sample data into PostgreSQL table 'driver_stats_source'.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load sample data into PostgreSQL'\n",
+ "from sqlalchemy import create_engine\n",
+ "\n",
+ "engine = create_engine(f\"postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}\")\n",
+ "df.to_sql(\"driver_stats_source\", engine, if_exists=\"replace\", index=False)\n",
+ "print(\"Loaded sample data into PostgreSQL table 'driver_stats_source'.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Feature definitions written.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Write feature definitions\n",
+ "definitions = '''\n",
+ "from datetime import timedelta\n",
+ "from feast import Entity, FeatureView, FeatureService, Field\n",
+ "from feast.types import Float32, Int32, String\n",
+ "from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (\n",
+ " PostgreSQLSource,\n",
+ ")\n",
+ "\n",
+ "driver = Entity(name=\"driver\", join_keys=[\"driver_id\"])\n",
+ "\n",
+ "driver_stats_source = PostgreSQLSource(\n",
+ " name=\"driver_stats_source\",\n",
+ " query=\"SELECT * FROM driver_stats_source\",\n",
+ " timestamp_field=\"event_timestamp\",\n",
+ " created_timestamp_column=\"created\",\n",
+ ")\n",
+ "\n",
+ "driver_stats_fv = FeatureView(\n",
+ " name=\"driver_stats\",\n",
+ " entities=[driver],\n",
+ " ttl=timedelta(days=365),\n",
+ " schema=[\n",
+ " Field(name=\"conv_rate\", dtype=Float32),\n",
+ " Field(name=\"acc_rate\", dtype=Float32),\n",
+ " Field(name=\"avg_daily_trips\", dtype=Int32),\n",
+ " Field(name=\"vehicle_type\", dtype=String),\n",
+ " ],\n",
+ " source=driver_stats_source,\n",
+ ")\n",
+ "\n",
+ "driver_service = FeatureService(\n",
+ " name=\"driver_service\",\n",
+ " features=[driver_stats_fv],\n",
+ ")\n",
+ "'''\n",
+ "\n",
+ "with open(os.path.join(REPO_DIR, \"definitions.py\"), \"w\") as f:\n",
+ " f.write(definitions)\n",
+ "\n",
+ "print(\"Feature definitions written.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 4: Apply — Registers Features & Triggers Baseline\n",
+ "\n",
+ "Running `feast apply` registers the feature definitions and automatically queues baseline metric computation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/cn/z7vz24yj25d8fjqdrs9jbsh00000gn/T/feast_monitoring_demo_kze7m3sk/definitions.py:9: DeprecationWarning: Entity value_type will be mandatory in the next release. Please specify a value_type for entity 'driver'.\n",
+ " driver = Entity(name=\"driver\", join_keys=[\"driver_id\"])\n",
+ "The `path` of the `RegistryConfig` starts with a plain `postgresql` string. We are updating this to `postgresql+psycopg` to ensure that the `psycopg3` driver is used by `sqlalchemy`. If you want to use `psycopg2` pass `postgresql+psycopg2` explicitely to `path`. To silence this warning, pass `postgresql+psycopg` explicitely to `path`.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Features registered. Baseline computation queued.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sys\n",
+ "from feast import FeatureStore\n",
+ "\n",
+ "sys.path.insert(0, REPO_DIR)\n",
+ "from definitions import driver, driver_stats_source, driver_stats_fv, driver_service\n",
+ "\n",
+ "store = FeatureStore(repo_path=REPO_DIR)\n",
+ "store.apply([driver, driver_stats_source, driver_stats_fv, driver_service])\n",
+ "print(\"Features registered. Baseline computation queued.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 5: Compute Batch Metrics\n",
+ "\n",
+ "### 5a. Auto-compute (recommended for production)\n",
+ "\n",
+ "Auto-compute detects the latest event timestamp and generates metrics for all 5 granularities: `daily`, `weekly`, `biweekly`, `monthly`, and `quarterly`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computed metrics for 20 features\n",
+ "Granularities: ['biweekly', 'daily', 'monthly', 'quarterly', 'weekly']\n"
+ ]
+ }
+ ],
+ "source": [
+ "from feast.monitoring.monitoring_service import MonitoringService\n",
+ "\n",
+ "monitoring = MonitoringService(store)\n",
+ "\n",
+ "result = monitoring.auto_compute(\n",
+ " project=\"monitoring_demo\",\n",
+ ")\n",
+ "\n",
+ "print(f\"Computed metrics for {result.get('computed_features', 'N/A')} features\")\n",
+ "print(f\"Granularities: {result.get('granularities', [])}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5b. Targeted compute (specific date range)\n",
+ "\n",
+ "Compute `weekly` metrics for a specific window."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'status': 'completed', 'granularity': 'weekly', 'computed_features': 4, 'computed_feature_views': 1, 'computed_feature_services': 1, 'metric_dates': ['2025-01-01'], 'duration_ms': 43}\n"
+ ]
+ }
+ ],
+ "source": [
+ "from datetime import date\n",
+ "\n",
+ "result = monitoring.compute_metrics(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " start_date=date(2025, 1, 1),\n",
+ " end_date=date(2025, 1, 7),\n",
+ " granularity=\"weekly\",\n",
+ ")\n",
+ "\n",
+ "print(result)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5c. Set a manual baseline\n",
+ "\n",
+ "Use `set_baseline=True` to mark the computed metrics as the reference distribution."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Baseline set.\n"
+ ]
+ }
+ ],
+ "source": [
+ "result = monitoring.compute_metrics(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " start_date=date(2025, 1, 1),\n",
+ " end_date=date(2025, 2, 28),\n",
+ " granularity=\"daily\",\n",
+ " set_baseline=True,\n",
+ ")\n",
+ "\n",
+ "print(\"Baseline set.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 6: Read Metrics\n",
+ "\n",
+ "### Per-feature metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Date: 2025-01-01 Mean: 0.4989 Null rate: 0.0000 Rows: 4922\n",
+ "Date: 2025-02-28 Mean: 0.5201 Null rate: 0.0000 Rows: 104\n"
+ ]
+ }
+ ],
+ "source": [
+ "metrics = monitoring.get_feature_metrics(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " feature_name=\"conv_rate\",\n",
+ " data_source_type=\"batch\",\n",
+ " granularity=\"daily\",\n",
+ ")\n",
+ "\n",
+ "for m in metrics[:3]:\n",
+ " print(f\"Date: {m['metric_date']} Mean: {m['mean']:.4f} Null rate: {m['null_rate']:.4f} Rows: {m['row_count']}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Categorical feature metrics\n",
+ "\n",
+ "Categorical features (like `vehicle_type`) produce value-count histograms instead of numeric statistics."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Date: 2025-01-01 Type: categorical Rows: 4922 Null rate: 0.0000\n",
+ " Unique values: 5 Other count: 0\n",
+ " van: 1051\n",
+ " suv: 1028\n",
+ " sedan: 970\n",
+ " truck: 954\n",
+ " compact: 919\n",
+ "Date: 2025-02-28 Type: categorical Rows: 104 Null rate: 0.0000\n",
+ " Unique values: 5 Other count: 0\n",
+ " compact: 26\n",
+ " truck: 24\n",
+ " sedan: 19\n",
+ " van: 18\n",
+ " suv: 17\n"
+ ]
+ }
+ ],
+ "source": [
+ "cat_metrics = monitoring.get_feature_metrics(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " feature_name=\"vehicle_type\",\n",
+ " data_source_type=\"batch\",\n",
+ " granularity=\"daily\",\n",
+ ")\n",
+ "\n",
+ "for m in cat_metrics[:3]:\n",
+ " print(f\"Date: {m['metric_date']} Type: {m['feature_type']} \"\n",
+ " f\"Rows: {m['row_count']} Null rate: {m['null_rate']:.4f}\")\n",
+ " if m.get(\"histogram\"):\n",
+ " hist = m[\"histogram\"]\n",
+ " print(f\" Unique values: {hist['unique_count']} Other count: {hist['other_count']}\")\n",
+ " for entry in hist[\"values\"]:\n",
+ " print(f\" {entry['value']}: {entry['count']}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANlNJREFUeJzt3QmcjeX///HPjDFjmca+G7ufnYTKUrLvhUhSiEqRkqTUD1HiS5SQoujrG/mWolWSXWTJvmdrpiKyjK0sM/f/8bl+//t0zmxmXMOZmfN6Ph7HmPvc576v+77PzFzv+1pOkOM4jgAAAACAhWCbFwMAAACAIlgAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gA8IsPPvhAgoKCZOPGjVdd96677jKP1NLtv/zyy9dYwsxNz4ueH2+lSpWSnj17Xvd9Hz582Oxb3wMu3W94eLjcKLw3ACDtESwA4Bq8/fbbPhXjQPXNN9+k2wp6ei5bWvvss8+kS5cuUqZMGcmRI4dUqFBBnn32WTl9+nSi63/xxRdyyy23SLZs2aREiRIyfPhwuXLlis86S5YskV69esn//M//mG3qth955BE5cuRIgu1p8NewFv/RsmXLFB+DlvWxxx6TAgUKSM6cOaVRo0ayadOmBOv997//lQcffFDKly9v9nEtNx1u5L6AQBLi7wIAwNV89913kh6DRf78+W/IHf4bZe/evRIcHJzqyvuUKVNSVYEvWbKk/PXXX5I1a9ZrKGXalE33HxKSef4EaiW5aNGiphKsQWH79u0yefJkcw60wpw9e3bPugsXLpT27dubSvKkSZPMuq+++qocO3ZMpk6d6lnv+eefl5MnT0rnzp1NxfrgwYNmm1999ZVs2bJFChcu7FOG4sWLy+jRo32WaZlSIi4uTtq0aSNbt26V5557zvxs6c+YlvGnn34y+3dpGXVZnTp15MSJE6k+VzdyX0CgyTy/VQFkWqGhof4uQkAICwu7rtvXO+JaqdPrqXfK/cnf+09r8+bNS3A3vVatWtKjRw+ZPXu2aWlwDRo0SKpXr24CuxuuIiIi5LXXXpOnn35aKlasaJZNmDBBGjRo4BM2tQWiYcOGJmBoGPGWK1cuE2yutfxr1qyRTz75RDp16mSW3Xfffaa1RFtT5syZ41n3P//5jxQrVsyUq2rVqul6X0CgoSsUgBT9IdZuACtWrEjw3Lvvvmue27Fjh2fZnj17zB/svHnzmgpc7dq1TdeLxFy8eFEGDhzo6ZLQoUMHOX78+FXHWPz999/mTrRWBnQfRYoUkY4dO8qBAweSPZbffvvNdO8oVKiQqUhXqVJFZsyYkarzoWMRdu7cac6H2+VDy6d3dPX/b7zxRoLXaEVGn/voo498xjjoudJKjVbs8uXLZyp2emzxffjhh6aiqHee9bzef//9Eh0dnaLyrl692txx1fNUtmxZc82SOi7vFpjLly/LiBEjzB1cfa2WTyuaixcvNs/rutoioLy7v3iPo3j99dflzTffNPvV871r165Ex1i49By2aNHCvBf0bvfIkSPFcRzP88uXLzev1a/e4m8zubK5y+K3ZGzevFlatWplroWO92jSpIn8+OOPiY4N+uGHH676vr2REuuio2VSu3fv9izT868PbeHwbrHp27evOc/6s+668847E7Rg6TJ9/3lvM354PHfuXKrLr/vVn0n9GXbpudWfjc8//9z8nnBFRkamumXNX/sCAg0tFgCuSrsNaEXr448/Nncr4/dB1sq5ezdPK9z169c3d/leeOEFU+nS12nXi08//dRT2XH1799f8uTJY+4UauVQK6FPPvmk2W5SYmNjpW3btqYPuFawtTJ+9uxZU+HVgKOV2MT88ccfcvvtt5uKoe5DKxPaLaR3795y5swZGTBgQIrOh5ZRy63n5KWXXjLLtKKifdD12PUO8TPPPOPzGl120003yT333OOzXCszWqHXLiRaiX3rrbfk1KlTMmvWLM86o0aNkqFDh5p19c6zVmC1C4tW8rQynDt37iTLqt1cmjdvbo5VK9Ja8dNzreW9Gl1fy6X7vPXWW8050sH22rWmWbNm0qdPH/n999/Nedc7u4mZOXOmCUpakdVgoZVSbbVI6rrqHXG9RmPHjpVvv/3W0/dfA0ZqpKRs3vR9e8cdd5hQMXjwYNNNSwOYVtg1QN52223W79sb7ejRo+ardvVx6ftFadj3piFOuzK5zydFQ4M+vLfp2rdvn/l5v3Tpknl/PfroozJs2LAUdXnT/eqYj/iVeH3fTZs2zWy7WrVqkhZu5L6AgOMAQAp07drVKViwoHPlyhXPsiNHjjjBwcHOyJEjPcuaNGniVKtWzfn77789y+Li4px69eo55cuX9yybOXOm3oZ2mjZtap53PfPMM06WLFmc06dPe5Y1bNjQPFwzZswwr50wYUKCcnpvS9cZPny45/vevXs7RYoUcf7880+f19x///1Orly5nAsXLqT4fFSpUsWnTK53333X7Hf37t2eZZcuXXLy58/v9OjRw7NMy6Xr3X333T6v79u3r1m+detW8/3hw4fN+Rg1apTPetu3b3dCQkISLI+vffv2TrZs2ZxffvnFs2zXrl1mm/H/BJQsWdKnjDVq1HDatGmT7Pb79euXYDvq0KFDZnlERIRz7NixRJ/T94BL96vL+vfv73Mtdf+hoaHO8ePHzbJly5aZ9fTr1baZVNkSe2/oedL9HDhwwLPs999/d2666SbnzjvvvKb3rb/p+13LtG/fPs+ycePGmfJHRUUlWL9OnTrO7bffnuw2X3nlFfP6JUuW+Czv1auX8/LLLzuffvqpM2vWLPO+1vXuu+++FJU1Z86cZhvxff3112Y73377bap+DtPLvoBAQ/segBTRGWd0cKd3FxTtUqB3n/U5pQM9ly5dau6sawvCn3/+aR466FG7t/z888+mK5I3vZPt3UVF7xrrnetffvklybJoy4feMdW7xvHFn0LVpXVJfV27du3M/92y6UPLFhMTk+isMKmlx67dhrSFwrVo0SKzn8T6n/fr18/ne/eYdNCtO9uPnmPdrneZdeCsdlFatmxZkmXR86j71tYiHdDrqlSpkjnmq9GWEL2Tr9ftWt17772mtSSl9K6/y21Z0jvg33//vVwvep50vIGeJ211cmn3ugceeMB0JdPWGtv37Y2k4wTef/99MzOU92BkHbSe1Hgafd+6zydm5cqVpmucvhcbN27s85zuS1tvtHvRQw89ZLoUaYuFtlbG706WGN1vUmXyLndauJH7AgINXaEApIh2UdHBmdrVQ/ueK/3/zTffbMY5qP3795tKu3bb0UdiNJxoNymXd4VXafcSpd2BkqLjKHQ6zdTM6qPdh3SKSe3qoI+kymZLK+MaXrRi98orr5hlGjL0mONXxpR3pU9pNy7toqHda5RW6vWcxl/PlVw3Ez1mrSQl9lo9f254SYp2P9KuW3p9taubvge00qgDf1OqdOnSKV5Xj9u7Yq/c95Z7Pq4HPU8XLlww5yQ+DWEa7HQ8i3b5s3nf6rXQAHstdGyN/vylxKpVq0z3Pg2P2o0u/naU9zgCl3ZZ8549ypuOBdJujPo+eO+991JUDg0106dPN6FQu7dpQNSbD940dGbJksXsN6kyeZc7pW7kvgD8g2ABIEX0Dp/e0Z0/f76ZmlHHK+gAVp1JxuX2nddZZ5K6I16uXDmf7/UPfWK8B+ymBbds2mqgM+UkJjUV5uR0797dzDijA7a1r7YOXNfBsSkZBBq/xUXLrct0LEhi5+p6fqicjuHQEKd3n/WOvlYodWD6O++84zPLUHLSupKWVIuUthbcSNfyvtUg/vDDD1/T/vQ9m5LPTdEpVO+++24TALRFMX741lYYpZ9FoQOTvekyHWcQn4YqHaejwUbDqI4VSgl3+24FX38e9PMivB06dMiMMdJyJfb5GO6ylE5b67qR+wLwD4IFgBTTLk///ve/zaBpnRVGK1FuNyjl3m3Wu+hNmza9buXQu/rr1q0zsxal9LMQ9G6lVoi0ApoWZUuqgqv0zr7uT1sqdNCv3g3XO/2J0RYJ77v62uqjYUIrQO6x6nnWddy79ymlZdCKfWJdmfQzK1JCB1trZVgfOmhXw4YO6naDRXLnIbX0uHVWKO/j1IG0yj0fbstA/A9+S6wLUkrLpudJPwAusXOid+o1EMavhF8LDdvujFqplZLKroZAfe8VLFjQBIDEQqe2MCodhO8dInSg+6+//mq6eHnTbowaKvQOv/7cu8EkJfRaKrcrXI0aNRIcv/tZGFoubWnR94B3ANefc702qX3v38h9AfgHwQJAimmFXCuaeudVg4VWTLwrxVqh0Vl0dDYdHSsQvxKiXU5S098+uX77X3/9tZlLP/7sS1oJT6xCqXeY9XXaRUlnjoo/J31qy6az3yT1qcZ6l7hr165mX3qetNUiqdYQnRJVK24une1J6bSnSvusDxkyxPRt1ylnvY9Nj1XvBus0sInRY9bK7IIFCyQqKsrTfUfLpGMvrkYrld7b1oqqtjh5T3Or50HpuUhudqqU0muqM2O5x6ffa3h0u9/ph+vpcWl/f21Bc2krWnwpLZtuT6+Btsxolys3xGirnF5DnWJXZ4uypT8PqamYp3YGKD0GrSjrtU3qvazdufRzKrQ7oM6c5ba86AfB6XvL/VwHdf78eWndurUZF6VjeZLqjqfjT7RF03vcgl4793Mu3NZLDYVJhXrdr7aw6Jgitww6lkhb/rRrYWo/Y+VG7gvAPwgWAFJMK3ha0Z07d66pdOhnFCRWUdaKmFamdfCmtmJoBW3t2rXmjqh21UiLrkY6Hat+jsD69evNwFktj/bl1i5H8ad0dY0ZM8ZUkLQVQctWuXJlUzHXQdv62vh9spOjnymhlTGtPGllW0OV9xgKLaNWkHV///rXv5LcjnbP0K4reqdZz5GGBx0wrHdc3RYL3YeGC630amVaW170ddotTe8wa9ezpGgg0Wlb9RzpudGpWzW8aAVz27ZtyR6jnh8NinqsGij1LrdWyLwHWOtz6qmnnjIVSK2o6hTA10IHz2pZtduPXiPt/qUB8sUXX/RUlLU7jn4StB6DVoT1/OgnQSc2PiY1ZdNzrHe49b2r50nDoQZkvVOvU9+md/r+0RYCnSpXB5vrw6VTv+r0wK5x48aZ95wGET0fGrQ1wGkrlI4pcXXr1s38fOnnvmgY9f7sCg2ZbrDTnx8N0vrQnwUdS6LvTe0qqe9Pndr1arSCr+MwtGVMP2fD/TRsbWHU97A3DZX6cG8I6M++G2K0RU0f6WVfQMDx97RUADKWxYsXmykZg4KCnOjo6ETX0Sk7u3fv7hQuXNjJmjWrU6xYMadt27bOvHnzEkzbuWHDBp/XJjadaPzpZpVODfvSSy85pUuXNvvQfXXq1MlnutD4U4qqP/74w0xDGhkZ6XmdTpE7bdq0VJ2Ho0ePmqlQdTpS3U9i01Dq9JQ6He+vv/6a4Dl3ulmd+lXLrdvJkyeP8+STTzp//fVXgvV1Gs8GDRqYqTL1UbFiRXMce/fuvWpZV6xY4dSqVctMp1qmTBnnnXfe8ew/uelmX331VefWW291cufO7WTPnt3sU6e31elzXTr9sE4RW6BAAfOecLfpTv+q05vGl9R0s3pcev2aN2/u5MiRwylUqJApZ2xsrM/rderZe++916yj56xPnz7Ojh07EmwzqbIl9d7YtGmT06JFCyc8PNxsu1GjRs6aNWt81knN+/ZG0n0n9UjsvTl//nzn5ptvdsLCwpzixYs7//u//+tzXd33Q1Lb1OdcBw8edDp37uyUKlXKTG2s507fb/o+856S92pOnjxppsjNly+f2YaWO/55Vu57N7FH/GuaHvYFBJIg/cff4QYAMqOaNWuaO/3aNz0+Haegd0f1LmhiHzYGAEBGw+dYAMB1oN2GtmzZYrpEAQAQCBhjAQBetAUhualLQ0NDTStEUrS/+k8//STjx483A3W9Z80CACAzI1gAgJc6deok++nJDRs29Pn08fh0cLN+sJx+2NpHH33k+TRfAAAyO8ZYAIAXnclGZ7VJbhpLd7YhAADwD4IFAAAAAGsM3gYAAABgjTEWFuLi4uT33383H1aV2Cf9AgAAABmZdm46e/asFC1aVIKDk2+TIFhY0FARGRnp72IAAAAA11V0dLQUL1482XUIFha0pcI90REREf4uDgAAAJCmzpw5Y26ku/Xe5BAsLLjdnzRUECwAAACQWaWk2z+DtwEAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAIA0Exvn+LsI8JMQf+04M5nw5VY5fsHfpQAAAPCvyPzh8kKHmv4uBvyEYJEGfjtxXqJiYv1dDAAAAMBv6AoFAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWDx/wUFBcmCBQv8XQwAAAAgQyJYAAAAALjxwSIuLk7Gjh0r5cqVk7CwMClRooSMGjXKPLd9+3Zp3LixZM+eXfLlyyePPfaYnDt3zvPanj17Svv27eW1116TQoUKSe7cuWXkyJFy5coVee655yRv3rxSvHhxmTlzpuc1hw8fNq0Jc+fOlXr16km2bNmkatWqsmLFCs86sbGx0rt3byldurTZd4UKFWTixIkJyj5jxgypUqWKKXeRIkXkySefNMtLlSplvnbo0MHsy/0eAAAAwHUKFkOGDJExY8bI0KFDZdeuXTJnzhwTEs6fPy8tWrSQPHnyyIYNG+STTz6R77//3lN5dy1dulR+//13WblypUyYMEGGDx8ubdu2Na9bt26dPP7449KnTx/59ddffV6nwePZZ5+VzZs3S926daVdu3Zy4sQJT9jRQKL71DINGzZMXnzxRfn44489r586dar069fPhB0NQF988YUJR0rLqzTQHDlyxPN9fBcvXpQzZ874PAAAAACIBDmO46R05bNnz0qBAgVk8uTJ8sgjj/g8N336dHn++eclOjpacubMaZZ98803JgBokNDwoS0Wy5cvl4MHD0pw8P9lmooVK0rBggVN0HBbH3LlyiXvvfee3H///abFQlsiNMzo9pW2cOiy/v37y+DBgxMtqwaao0ePyrx588z3xYoVk4cfflheffXVxE9EUJDMnz/ftKgk5eWXX5YRI0YkWP7IxIUSFRObwrMIAACQOZUrHCFTHr3D38VAGtIb6Vo3j4mJkYiIiLRrsdi9e7e5a9+kSZNEn6tRo4YnVKj69eub1oS9e/d6lmlXJDdUKA0c1apV83yfJUsW043q2LFjPtvXVgpXSEiI1K5d2+zTNWXKFKlVq5YJPuHh4TJt2jSJiooyz+m2NNwkVu7UttboSXUfGqIAAAAAiISkZmUdv2Ara9asCVoKElumgSSldPzFoEGDZPz48SaA3HTTTTJu3DjTtSqtyq10bIY+AAAAAFi0WJQvX95U0pcsWZLguUqVKsnWrVvNWAvXDz/8YFondDC1rR9//NHzf+0K9dNPP5l9uvvRgd19+/aVmjVrmrETBw4c8KyvQUMHZCdWbpeGG+2GBQAAAOA6BwudkUnHOei4hlmzZpnKu1b433//fenWrZt5vkePHrJjxw5ZtmyZGQPx0EMPme5OtrSrk46B2LNnjxmEferUKenVq5cn8GzcuFEWLVok+/btMwPL4w/A1vER2qLx1ltvyc8//yybNm2SSZMmeZ53g4eOy9BtAwAAALiOs0JppV1nZ9KZl7TFoEuXLmYMQ44cOUzF/uTJk1KnTh3p1KmTGdOgA73Tgg7e1oeO41i9erWZ1Sl//vzmOZ1FqmPHjqYst912m5ktSlsvvGngefPNN+Xtt9824zx0JioNGC4NHYsXL5bIyEjT6gEAAADgOs0K5Q/urFA6zezNN98s6XGUPLNCAQAAMCtUZnTdZoUCAAAAgMQQLAAAAADc2Olm/UEHVafz3loAAABAwKPFAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACshdhvAsXy5ZTQ7P4uBQAAgH9F5g/3dxHgRwSLNDCwXQ2JiIjwdzEAAAD8LjbOkSzBQf4uBvyArlAAAABIM4SKwEWwAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAA6V5snOPvIuAqQq62Aq5uwpdb5fgFf5cCAAAgc4rMHy4vdKjp72LgKggWaeC3E+clKibW38UAAAAA/IauUAAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIC1gA0Whw8flqCgINmyZYu/iwIAAABkeOkuWNx1110yYMAAfxcDAAAAQEYOFlfjOI5cuXLF38UAAAAAkF6DRc+ePWXFihUyceJE001JHx988IH5unDhQqlVq5aEhYXJ6tWrzbrt27f3eb22dGiLhysuLk7Gjh0r5cqVM68rUaKEjBo1KtF9x8bGSq9evaRixYoSFRV13Y8VAAAAyExCJB3RQLFv3z6pWrWqjBw50izbuXOn+frCCy/I66+/LmXKlJE8efKkaHtDhgyR6dOnyxtvvCENGjSQI0eOyJ49exKsd/HiRenatasZd7Fq1SopUKBAGh8ZAAAAkLmlq2CRK1cuCQ0NlRw5ckjhwoXNMjcIaNBo1qxZird19uxZE1QmT54sPXr0MMvKli1rAoa3c+fOSZs2bUy4WLZsmSlDUnQdfbjOnDmT6mMEAAAAMqN01RUqObVr107V+rt37zYhoEmTJsmupy0V58+fl++++y7ZUKFGjx5t1nEfkZGRqSoTAAAAkFllmGCRM2dOn++Dg4PNQG5vly9f9vw/e/bsKdpu69atZdu2bbJ27doUda2KiYnxPKKjo1NcfgAAACAzS3fBQrtC6UDqq9FxEDpmwpv3Z1KUL1/ehIslS5Yku50nnnhCxowZI3fffbcZOJ4cHQAeERHh8wAAAACQzsZYqFKlSsm6devMQOrw8HAzs1NiGjduLOPGjZNZs2ZJ3bp15cMPP5QdO3ZIzZo1zfPZsmWT559/XgYPHmzCSv369eX48eNmMHjv3r19ttW/f38TZtq2bWtmn4o/DgMAAABABmuxGDRokGTJkkUqV65sWiWSmvq1RYsWMnToUBMc6tSpYwZrd+/e3Wcdff7ZZ5+VYcOGSaVKlaRLly5y7NixRLenU9WOGDHCdI1as2bNdTk2AAAAILMKcuIPVECK6axQOoj7kYkLJSrm6t23AAAAkHrlCkfIlEfv8HcxArq+GxMTc9VhAOmuxQIAAABAxkOwAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYC7HfBIrlyymh2f1dCgAAgMwpMn+4v4uAFCBYpIGB7WpIRESEv4sBAACQacXGOZIlOMjfxUAy6AoFAACAdI9Qkf4RLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAg04iNc/xdhIAV4u8CZAYTvtwqxy/4uxQAAACBLTJ/uLzQoaa/ixGwCBZp4LcT5yUqJtbfxQAAAAD8hq5QAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrmSJYHD58WIKCgmTLli3+LgoAAAAQkDJFsAAAAADgXwQLAAAAABk3WMybN0+qVasm2bNnl3z58knTpk3l/Pnz5rn33ntPKlWqJNmyZZOKFSvK22+/7fPa9evXS82aNc3ztWvXls2bN/s8HxsbK71795bSpUub7VeoUEEmTpzos07Pnj2lffv28vrrr0uRIkVMGfr16yeXL1++AUcPAAAAZC4h/tjpkSNHpGvXrjJ27Fjp0KGDnD17VlatWiWO48js2bNl2LBhMnnyZBMeNDQ8+uijkjNnTunRo4ecO3dO2rZtK82aNZMPP/xQDh06JE8//bTP9uPi4qR48eLyySefmMCwZs0aeeyxx0yAuO+++zzrLVu2zCzTr/v375cuXbrIzTffbPYHAAAAIAMEiytXrkjHjh2lZMmSZpm2Xqjhw4fL+PHjzXNKWx127dol7777rgkWc+bMMcHh/fffNy0WVapUkV9//VWeeOIJz/azZs0qI0aM8Hyv21i7dq18/PHHPsEiT548JsBkyZLFtIy0adNGlixZkmSwuHjxonm4zpw5cx3ODgAAAJDx+CVY1KhRQ5o0aWLCRIsWLaR58+bSqVMnCQ0NlQMHDphuTN6Vew0huXLlMv/fvXu3VK9e3YQKV926dRPsY8qUKTJjxgyJioqSv/76Sy5dumRaI7xpKNFQ4dLWi+3btydZ7tGjR/sEFgAAAAB+HGOhlfnFixfLwoULpXLlyjJp0iQzDmLHjh3m+enTp5upY92HLv/xxx9TvP25c+fKoEGDTED57rvvzDYefvhhEy68acuGN52yVltDkjJkyBCJiYnxPKKjo1N97AAAAEBm5JcWC7cSX79+ffPQMRXaJeqHH36QokWLysGDB6Vbt26Jvk4Hdf/nP/+Rv//+29NqET906Hbq1asnffv29SzTlhBbYWFh5gEAAAAgHQSLdevWmbEM2gWqYMGC5vvjx4+b0KBdjZ566inT9ally5ZmTMPGjRvl1KlTMnDgQHnggQfkpZdeMl2ltAVBPxxPZ3byVr58eZk1a5YsWrTIjK/QILJhwwbzfwAAAACZJFhERETIypUr5c033zQDoLW1Qgdst2rVyjyfI0cOGTdunDz33HNmNigdizFgwADzXHh4uHz55Zfy+OOPm1mjtCvVv/71L7n33ns92+/Tp4+ZTUpnedKWEZ2BSlsvtOsVAAAAgLQX5Ogcr7gmGoq0ZeWRiQslKibW38UBAAAIaOUKR8iUR+/wdzEyZX1Xxxdr40By+ORtAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAayH2m0CxfDklNLu/SwEAABDYIvOH+7sIAY1gkQYGtqshERER/i4GAABAwIuNcyRLcJC/ixGQ6AoFAACATINQ4T8ECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAADAdRQb50ggCPF3ATKDCV9uleMX/F0KAAAApDeR+cPlhQ41JRAQLNLAbyfOS1RMrL+LAQAAAPgNXaEAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALBGsAAAAABgjWABAAAAwBrBAgAAAIA1ggUAAAAAawQLAAAAANYIFgAAAACsESwAAAAAWCNYAAAAALCW4YPFvHnzpFq1apI9e3bJly+fNG3aVM6fPy933XWXDBgwwGfd9u3bS8+ePc3/X3zxRbntttsSbK9GjRoycuTIG1Z+AAAAIDPI0MHiyJEj0rVrV+nVq5fs3r1bli9fLh07dhTHca762m7dusn69evlwIEDnmU7d+6Ubdu2yQMPPJDoay5evChnzpzxeQAAAADIBMHiypUrJkyUKlXKtFz07dtXwsPDr/raKlWqmNaJOXPmeJbNnj3btGKUK1cu0deMHj1acuXK5XlERkam6fEAAAAAGVWGDhYaDJo0aWICRefOnWX69Oly6tSpFL9eWy3cYKGtHB999JFZlpQhQ4ZITEyM5xEdHZ0mxwEAAABkdBk6WGTJkkUWL14sCxculMqVK8ukSZOkQoUKcujQIQkODk7QJery5cs+32s3qr1798qmTZtkzZo1Jih06dIlyf2FhYVJRESEzwMAAABABg8WKigoSOrXry8jRoyQzZs3S2hoqMyfP18KFChgukq5YmNjZceOHT6vLV68uDRs2NB0gdJHs2bNpGDBgn44CgAAACBjC5EMbN26dbJkyRJp3ry5CQT6/fHjx6VSpUqSM2dOGThwoHz99ddStmxZmTBhgpw+fTrBNrTr0/Dhw+XSpUvyxhtv+OU4AAAAgIwuQwcL7Yq0cuVKefPNN80MTSVLlpTx48dLq1atTLenrVu3Svfu3SUkJESeeeYZadSoUYJtdOrUSZ588knTrUqnowUAAACQekFOSuZmRaI0zOjsUI9MXChRMbH+Lg4AAADSmXKFI2TKo3dIRq/v6sRFVxtfnOHHWAAAAADwP4IFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYC3EfhMoli+nhGb3dykAAACQ3kTmD5dAQbBIAwPb1ZCIiAh/FwMAAADpUGycI1mCgySzoysUAAAAcB1lCYBQoQgWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAA6VRsnCMZRYi/C5AZTPhyqxy/4O9SAAAAIDOJzB8uL3SoKRkFwSIN/HbivETFxPq7GAAAAIDf0BUKAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAQGAGi2nTpknRokUlLi7OZ/k999wjvXr1kgMHDpj/FypUSMLDw6VOnTry/fff+6xbqlQpee2118z6N910k5QoUcJsFwAAAECABIvOnTvLiRMnZNmyZZ5lJ0+elG+//Va6desm586dk9atW8uSJUtk8+bN0rJlS2nXrp1ERUX5bGf8+PFSu3Zts07fvn3liSeekL179/rhiAAAAICMLUMGizx58kirVq1kzpw5nmXz5s2T/PnzS6NGjaRGjRrSp08fqVq1qpQvX15eeeUVKVu2rHzxxRc+29HwoYGiXLly8vzzz5vXe4eV+C5evChnzpzxeQAAAADIoMFCacvEp59+air7avbs2XL//fdLcHCwabEYNGiQVKpUSXLnzm26Q+3evTtBi0X16tU9/w8KCpLChQvLsWPHktzn6NGjJVeuXJ5HZGTkdTxCAAAAIOPIsMFCuzY5jiNff/21REdHy6pVq0zYUBoq5s+fb8ZQ6PItW7ZItWrV5NKlSz7byJo1q8/3Gi7ij9vwNmTIEImJifE8dL8AAAAAREIkg8qWLZt07NjRtFTs379fKlSoILfccot57ocffpCePXtKhw4dzPfagnH48GHrfYaFhZkHAAAAgEwSLJS2ULRt21Z27twpDz74oGe5jqv47LPPTKuGtkIMHTo02ZYIAAAAAAHaFUo1btxY8ubNa2ZyeuCBBzzLJ0yYYAZ416tXz4SLFi1aeFozAAAAAKS9DN1ioQO1f//99wTL9TMqli5d6rOsX79+Pt8n1jVKx2IAAAAACLAWCwAAAADpA8ECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGAtxH4TKJYvp4Rm93cpAAAAkJlE5g+XjIRgkQYGtqshERER/i4GAAAAMpnYOEeyBAdJRkBXKAAAACCdypJBQoUiWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMBaiP0mApfjOObrmTNn/F0UAAAAIM259Vy33pscgoWFEydOmK+RkZH+LgoAAABw3Zw9e1Zy5cqV7DoECwt58+Y1X6Oioq56opH50rsGyujoaImIiPB3cXADce0DF9c+MHHdAxfXXjwtFRoqihYtKldDsLAQHPx/Q1Q0VATyGy6Q6XXn2gcmrn3g4toHJq574OLaS4pvoDN4GwAAAIA1ggUAAAAAawQLC2FhYTJ8+HDzFYGFax+4uPaBi2sfmLjugYtrn3pBTkrmjgIAAACAZNBiAQAAAMAawQIAAACANYIFAAAAAGsEi2s0ZcoUKVWqlGTLlk1uu+02Wb9+vb+LBEujR4+WOnXqyE033SQFCxaU9u3by969e33W+fvvv6Vfv36SL18+CQ8Pl3vvvVf++OMPn3X0AxPbtGkjOXLkMNt57rnn5MqVKzf4aHCtxowZI0FBQTJgwADPMq575vXbb7/Jgw8+aK5t9uzZpVq1arJx40bP8zoMcdiwYVKkSBHzfNOmTeXnn3/22cbJkyelW7duZp773LlzS+/eveXcuXN+OBqkVGxsrAwdOlRKly5trmvZsmXllVdeMdfbxbXPHFauXCnt2rUzH+6mv9sXLFjg83xaXedt27bJHXfcYeqF+qF6Y8eOlYCkg7eROnPnznVCQ0OdGTNmODt37nQeffRRJ3fu3M4ff/zh76LBQosWLZyZM2c6O3bscLZs2eK0bt3aKVGihHPu3DnPOo8//rgTGRnpLFmyxNm4caNz++23O/Xq1fM8f+XKFadq1apO06ZNnc2bNzvffPONkz9/fmfIkCF+Oiqkxvr1651SpUo51atXd55++mnPcq575nTy5EmnZMmSTs+ePZ1169Y5Bw8edBYtWuTs37/fs86YMWOcXLlyOQsWLHC2bt3q3H333U7p0qWdv/76y7NOy5YtnRo1ajg//vijs2rVKqdcuXJO165d/XRUSIlRo0Y5+fLlc7766ivn0KFDzieffOKEh4c7EydO9KzDtc8c9PfxSy+95Hz22WeaGp358+f7PJ8W1zkmJsYpVKiQ061bN1OH+Oijj5zs2bM77777rhNoCBbX4NZbb3X69evn+T42NtYpWrSoM3r0aL+WC2nr2LFj5pfQihUrzPenT592smbNav4AuXbv3m3WWbt2recXWHBwsHP06FHPOlOnTnUiIiKcixcv+uEokFJnz551ypcv7yxevNhp2LChJ1hw3TOv559/3mnQoEGSz8fFxTmFCxd2xo0b51mm74ewsDBTcVC7du0y74UNGzZ41lm4cKETFBTk/Pbbb9f5CHCt2rRp4/Tq1ctnWceOHU3FUHHtM6f4wSKtrvPbb7/t5MmTx+f3vf5+qVChghNo6AqVSpcuXZKffvrJNJW5goODzfdr1671a9mQtmJiYszXvHnzmq963S9fvuxz7StWrCglSpTwXHv9ql0pChUq5FmnRYsWcubMGdm5c+cNPwaknHZ10q5M3tdXcd0zry+++EJq164tnTt3Nt3XatasKdOnT/c8f+jQITl69KjPtc+VK5fp/up97bVrhG7Hpevr34V169bd4CNCStWrV0+WLFki+/btM99v3bpVVq9eLa1atTLfc+0DQ1pdZ13nzjvvlNDQUJ+/Adqd+tSpUxJIQvxdgIzmzz//NH0zvSsQSr/fs2eP38qFtBUXF2f62NevX1+qVq1qlukvH/2lob9g4l97fc5dJ7H3hvsc0qe5c+fKpk2bZMOGDQme47pnXgcPHpSpU6fKwIED5cUXXzTX/6mnnjLXu0ePHp5rl9i19b72Gkq8hYSEmBsSXPv064UXXjDBX28SZMmSxfxdHzVqlOlHr7j2gSGtrrN+1fE6Sf0NyJMnjwQKggWQxN3rHTt2mDtYyNyio6Pl6aeflsWLF5tBdwisGwh6F/K1114z32uLhf7cv/POOyZYIPP6+OOPZfbs2TJnzhypUqWKbNmyxdxM0gG+XHvg2tEVKpXy589v7m7EnxFGvy9cuLDfyoW08+STT8pXX30ly5Ytk+LFi3uW6/XVrnCnT59O8trr18TeG+5zSH+0q9OxY8fklltuMXeh9LFixQp56623zP/1rhPXPXPSWWAqV67ss6xSpUpmhi/va5fc73v9qu8fbzobmM4iw7VPv3TWNm21uP/++003xoceekieeeYZMzug4toHhrS6zvwN+AfBIpW0ibxWrVqmb6b3XS/9vm7dun4tG+zouC4NFfPnz5elS5cmaNbU6541a1afa6/9J7US4l57/bp9+3afX0J6J1ynqItfgUH60KRJE3PN9I6l+9C72Nolwv0/1z1z0q6O8aeU1j73JUuWNP/X3wFaKfC+9tp9RvtVe197DZ0aUF36+0P/Lmg/baRPFy5cMH3kvelNQ71uimsfGNLqOus6Oq3t5cuXff4GVKhQIaC6QRn+Hj2eUaeb1RkDPvjgAzNbwGOPPWamm/WeEQYZzxNPPGGmnFu+fLlz5MgRz+PChQs+047qFLRLly41047WrVvXPOJPO9q8eXMzZe23337rFChQgGlHMxjvWaEU1z3zTi8cEhJiph79+eefndmzZzs5cuRwPvzwQ5+pKPX3++eff+5s27bNueeeexKdirJmzZpmytrVq1eb2cWYcjR969Gjh1OsWDHPdLM6FalOET148GDPOlz7zDPjn04Drg+t9k6YMMH8/5dffkmz66wzSel0sw899JCZblbrifq7hOlmkWKTJk0yFQ39PAudflbnNkbGpr9wEnvoZ1u49BdN3759zbRy+kujQ4cOJnx4O3z4sNOqVSszh7X+oXr22Wedy5cv++GIkFbBguueeX355ZcmFOrNoooVKzrTpk3zeV6noxw6dKipNOg6TZo0cfbu3euzzokTJ0wlQz8HQacYfvjhh01lBunXmTNnzM+4/h3Pli2bU6ZMGfNZB97ThXLtM4dly5Yl+rddw2VaXmf9DIwGDRqYbWho1cASiIL0H3+3mgAAAADI2BhjAQAAAMAawQIAAACANYIFAAAAAGsECwAAAADWCBYAAAAArBEsAAAAAFgjWAAAAACwRrAAAAAAYI1gAQAAAMAawQIAcEMcPXpU+vfvL2XKlJGwsDCJjIyUdu3ayZIlS25oOYKCgmTBggU3dJ8AEAhC/F0AAEDmd/jwYalfv77kzp1bxo0bJ9WqVZPLly/LokWLpF+/frJnzx5/FxEAYCnIcRzHdiMAACSndevWsm3bNtm7d6/kzJnT57nTp0+bwBEVFWVaNLQFIzg4WFq2bCmTJk2SQoUKmfV69uxp1vVubRgwYIBs2bJFli9fbr6/6667pHr16pItWzZ57733JDQ0VB5//HF5+eWXzfOlSpWSX375xfP6kiVLmtADALBHVygAwHV18uRJ+fbbb03LRPxQoTRUxMXFyT333GPWXbFihSxevFgOHjwoXbp0SfX+/v3vf5v9rFu3TsaOHSsjR44021MbNmwwX2fOnClHjhzxfA8AsEdXKADAdbV//37RxvGKFSsmuY62Umzfvl0OHTpkxl6oWbNmSZUqVUzlv06dOinen7ZYDB8+3Py/fPnyMnnyZLP9Zs2aSYECBTxhpnDhwtbHBgD4By0WAIDrKiU9bnfv3m0ChRsqVOXKlU0A0OdSQ4OFtyJFisixY8dStQ0AQOoRLAAA15W2GuhMTLYDtHXcRfyQogPA48uaNavP97pv7WoFALi+CBYAgOsqb9680qJFC5kyZYqcP38+wfM6ILtSpUoSHR1tHq5du3aZ57TlQmk3Jh0X4U0HbqeWBo/Y2NhrOhYAQNIIFgCA605DhVbmb731Vvn000/l559/Nl2c3nrrLalbt640bdrUTEHbrVs32bRpk6xfv166d+8uDRs2lNq1a5ttNG7cWDZu3GjGXujrdRzFjh07Ul0WnRlKx1zo52qcOnXqOhwtAAQmggUA4LrTD8XTwNCoUSN59tlnpWrVqmYwtVbwp06darorff7555InTx658847TdDQ1/z3v//1bENbPYYOHSqDBw82g7nPnj1rwkdqjR8/3swSpeM5atasmcZHCgCBi8+xAAAAAGCNFgsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAsEawAAAAAGCNYAEAAADAGsECAAAAgDWCBQAAAABrBAsAAAAA1ggWAAAAAKwRLAAAAABYI1gAAAAAEFv/D/X0w0X7J7aHAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "try:\n",
+ " import matplotlib.pyplot as plt\n",
+ "\n",
+ " latest_cat = cat_metrics[0] if cat_metrics else None\n",
+ " if latest_cat and latest_cat.get(\"histogram\"):\n",
+ " hist = latest_cat[\"histogram\"]\n",
+ " labels = [e[\"value\"] for e in hist[\"values\"]]\n",
+ " counts = [e[\"count\"] for e in hist[\"values\"]]\n",
+ " if hist[\"other_count\"] > 0:\n",
+ " labels.append(\"(other)\")\n",
+ " counts.append(hist[\"other_count\"])\n",
+ "\n",
+ " fig, ax = plt.subplots(figsize=(8, 4))\n",
+ " ax.barh(labels, counts, color=\"steelblue\", edgecolor=\"white\")\n",
+ " ax.set_title(f\"vehicle_type distribution — {latest_cat['metric_date']}\")\n",
+ " ax.set_xlabel(\"Count\")\n",
+ " plt.tight_layout()\n",
+ " plt.show() # pragma: allowlist secret\n",
+ " else:\n",
+ " print(\"No categorical histogram data available.\")\n",
+ "except ImportError:\n",
+ " print(\"Install matplotlib to visualize: pip install matplotlib\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Feature view aggregates"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Date: 2024-12-01 Total rows: 5000 Features w/ nulls: 0 Max null rate: 0.0\n",
+ "Date: 2025-01-01 Total rows: 4922 Features w/ nulls: 0 Max null rate: 0.0\n",
+ "Date: 2025-01-01 Total rows: 576 Features w/ nulls: 0 Max null rate: 0.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "view_metrics = monitoring.get_feature_view_metrics(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " data_source_type=\"batch\",\n",
+ ")\n",
+ "\n",
+ "for m in view_metrics[:3]:\n",
+ " print(f\"Date: {m['metric_date']} Total rows: {m['total_row_count']} \"\n",
+ " f\"Features w/ nulls: {m['features_with_nulls']} Max null rate: {m.get('max_null_rate', 'N/A')}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Feature service aggregates"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Date: 2025-01-01 Total features: 4 Avg null rate: 0.0\n",
+ "Date: 2025-01-01 Total features: 4 Avg null rate: 0.0\n",
+ "Date: 2025-02-28 Total features: 4 Avg null rate: 0.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "svc_metrics = monitoring.get_feature_service_metrics(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_service_name=\"driver_service\",\n",
+ " data_source_type=\"batch\",\n",
+ ")\n",
+ "\n",
+ "for m in svc_metrics[:3]:\n",
+ " print(f\"Date: {m['metric_date']} Total features: {m['total_features']} \"\n",
+ " f\"Avg null rate: {m.get('avg_null_rate', 'N/A')}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Baseline metrics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Baseline mean: 0.4989\n",
+ "Baseline stddev: 0.1975\n",
+ "Baseline null_rate: 0.0000\n"
+ ]
+ }
+ ],
+ "source": [
+ "baseline = monitoring.get_baseline(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " feature_name=\"conv_rate\",\n",
+ " data_source_type=\"batch\",\n",
+ ")\n",
+ "\n",
+ "if baseline:\n",
+ " print(f\"Baseline mean: {baseline[0]['mean']:.4f}\")\n",
+ " print(f\"Baseline stddev: {baseline[0]['stddev']:.4f}\")\n",
+ " print(f\"Baseline null_rate: {baseline[0]['null_rate']:.4f}\")\n",
+ "else:\n",
+ " print(\"No baseline found.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 7: Visualize a Feature Distribution\n",
+ "\n",
+ "Use the histogram stored in the metrics to plot a distribution."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/opt/homebrew/Cellar/python@3.12/3.12.11/Frameworks/Python.framework/Versions/3.12/lib/python3.12/pty.py:95: DeprecationWarning: This process (pid=12140) is multi-threaded, use of forkpty() may lead to deadlocks in the child.\n",
+ " pid, fd = os.forkpty()\n"
+ ]
+ }
+ ],
+ "source": [
+ "!uv pip install -q 'matplotlib'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAGGCAYAAABmGOKbAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAARL1JREFUeJzt3Qd4FOXa//E7Cb2F3kNRkd4EBDwoUgQRUYr1KKIiKgIqqJQjCIIIBxVQATk2sCHKK6hU6eARUGlKURSlRCEUEQIoLZn3up/3P/vfDQmEMJOZ3f1+rmuNmV12752dze5vnhZjWZYlAAAAAADAcbHO3yUAAAAAAFCEbgAAAAAAXELoBgAAAADAJYRuAAAAAABcQugGAAAAAMAlhG4AAAAAAFxC6AYAAAAAwCWEbgAAAAAAXELoBgAAAADAJYRuAACyYNiwYRITExOyrVKlSnLvvfe6/tg7d+40jz116tTANn3cAgUKSHbRx9d9AAAAzo3QDQCICKtWrTIh8PDhwxJO5s2b59vw6ufanDZz5ky5/fbb5ZJLLpF8+fJJ1apV5YknnsjwePr888/liiuukDx58kiFChVk6NChcubMmZDbLFmyRO6//365/PLLzX3qfT/wwAOyd+/es+7v2muvNScy0l6uv/76TD8HrfXBBx+UEiVKSP78+aVFixayfv36s2730Ucfyd133y1VqlQxj6GPfaGy87EAINzl8LoAAACcCt3PPvusafEtXLiwJzVs27ZNYmNjLzjYTpw48YLCbcWKFeXvv/+WnDlzZqFKZ2rTx8+RI3K+RmiALFu2rAmIGqI3bdokEyZMMPtAw2TevHkDt50/f7507NjRBMhXX33V3Pa5556T/fv3y2uvvRa43YABA+TQoUNy6623mtD566+/mvucM2eObNy4UUqXLh1SQ/ny5WXUqFEh27SmzEhNTZX27dvLd999J0899ZQUL15cJk2aZGpct26deXyb1qjbGjVqJH/88ccF76vsfCwAiASR82kJAIgox48fNy1o4SR37tyu3r+2pGrgyZUrl2lh9ZLXj++0//mf/zmrFbZBgwbSrVs3+eCDD0wLte3JJ5+UOnXqyMKFCwMnHgoVKiTPP/+8PPbYY1KtWjWzbezYsdKsWbOQEzHact28eXMTvjWoB4uPjzehP6v164mnGTNmyC233GK23XbbbaaVXVvhp02bFrjte++9J+XKlTN11apVy9ePBQCRgO7lABAhfv/9d+nevbtpGdPwV7lyZenZs6ecOnUqcBttadNWt6JFi5rurk2aNJG5c+eG3M/y5ctNN9CPP/5YRo4caVrfNGC1atVKtm/fHrhd7969zRjiv/7666xa7rzzTtOKl5KSckHjo7du3Sr//Oc/pUiRIiasqO+//960XmvXXK1D71e77Aa3mum/1xY3pc/b7pqrY59t77//vglR2mKpz/+OO+6QxMTETNX33//+17TU6eNfeuml8p///Cfd26Ud03369GnT+q4tf/pvixUrZp7XokWLzPV6W21JVsFdioPHbb/44osyfvx487j6uuo+Sm9Md/Br3LZtW3PCQo+F4cOHi2VZZ72++jNY2vs8V232trQt4Bs2bJB27dqZAKrHhh4za9asCbmN3r/+26+++kr69esX6J7cqVMnOXDggHglvW7PWpP64YcfAtt0/+tFW8aDW/ofeeQRs581kNquueaas3o+6DY9/oLvM+2JlWPHjl1w/fq4pUqVks6dOwe26b7VMPzZZ5/JyZMnA9sTEhIuuEeGV48FAJGAlm4AiAB79uyRK6+8MjDOUlvaNITrl2MNxdoyum/fPrnqqqvM748++qgJgO+8847cdNNN5nZ2wLCNHj3afFnWVr0jR47ImDFj5K677pKvv/7aXK/jXzWUaWjXIG/T+589e7YJbXFxcRf0POxuuNpiaAdFDagaJO+77z4TuLds2SKvv/66+amBTgOcfvn/6aef5MMPP5Rx48aZ7q52EFB68mDIkCEmFGiLpYY77RasAUiD4rm6o2vX4TZt2pj70pCpoUhb8zR0nI/eXrsL62Pq65OcnCxr16413ZWvu+46eeihh8xrp89RWwTTM2XKFDlx4oR5XTV0a2DT1u706EkObUnVkyn6ei1YsCAw1ljD94XITG3B9PW4+uqrTeDu37+/6fquJyc0zK5YsUIaN24ccvs+ffqYkytanwZ+PbGgJ3J0DLBfJCUlmZ/28aT0eFENGzYMua2e4NATVPb1GdFArZfg+7TpMawnIPREmR5fPXr0kGeeeSZTwwj0cXWMedqAq8edvl/0vmvXri1OyM7HAoCIYAEAwt4999xjxcbGWt9+++1Z16Wmppqfjz/+uKZY68svvwxcd/ToUaty5cpWpUqVrJSUFLNt2bJl5nbVq1e3Tp48Gbjtyy+/bLZv2rQpcL/lypWzunTpEvJ4H3/8sbndypUrM13/0KFDzb+58847z7rur7/+Omvbhx9+eNZjvPDCC2bbjh07Qm67c+dOKy4uzho5cmTIdn0eOXLkOGt7Wh07drTy5Mlj7dq1K7Bt69at5j7TfoxWrFjR6tatW+D3unXrWu3btz/n/ffq1eus+1H6PHR7oUKFrP3796d73ZQpUwLb9HF1W58+fQLb9DXSx8+VK5d14MCBkNdXf57vPjOqTel2fd2C95M+zi+//BLYtmfPHqtgwYLWNddcE9im96//tnXr1oFjU/Xt29fs08OHD1t+0b17d1PTTz/9dNZxtnv37rNu36hRI6tJkybnvM8RI0aYf79kyZKQ7ffff781bNgw65NPPrHeffdd66abbjK3u+222zJVa/78+c19pDV37lxzPwsWLEj339WsWdNq3rx5ph7Di8cCgEhAfx8ACHPa6vnpp59Khw4dzmp9U3aXYJ0QSlui7G7bSrsAawuqtjRql9lg2rKsLeQ2bcVU2ups36+2TOv9BneH1ZZKHcMZ/DiZ9fDDD5+1LXgCK23xPXjwoGnJVenNlpzerNS6j7SVW/+tfdFWc21VX7ZsWYb/VluOv/jiCzNplk6uZatevbrpwn0+2oKuLcA///yzZFWXLl0CLfaZoa3FNn2N9HdtOV28eLG4RfeTjm/W/aTDAGxlypQxwwW0e7628gfT4y64u7oeX3o/u3btEj/QcclvvfWWmcE8eGIwnUAuo/H7OoTAvj49K1euNMMN9Fhs2bJlyHX6WNrqr702unbtarppa0u3DvNI20U/Pfq4GdUUXLcTsvOxACASELoBIMxpV2kNNOebpEjDjC6DlJYGSPv6YMEhU2lXYPXnn38GtmkXc/2CrcsnKQ3fGsI1jKddwzozdDx2Wjr7s05Opd1tNYBrALVvp93ez0cDrzbManDSfxt80XG1OuP0ufatPr/g0GVLb1+mpV26tcu/TjCl3W113LmOUb/YfZIR7e4bHHqVPrYKHt/uNN1POqwgo+NLT3qkHT+fmeMrLX0ttMt3Vi6ZOVZsX375pZkfQU+s6NCE9E4CBY9bDj4pFHySKNiPP/5ohnDo+/TNN9/MVB0a+JV9wkRPnqR9Xva8Cfq4GdUUXHdmZedjAUCkY0w3ACBdGY3HDp6US1ucdfIwbY3TFk0dy63BSMN4VqT3ZV1bBXWmZA2s9erVM63zGuJ07HJGY5uD6W30BIAu85Tec9L7c4uOGf/ll19Mq6W2BGvY0jHnkydPDpkN+1ycDjAZnQzJ7KR32Xl8paW9KLQHRlboLOTpTTyXli6DpfMcaDjWuQ7SLoumrfdK19rWScKC6TbtTZKWnnDQeQF0dnI9KVWwYMFM1Wzfv554Uvo+0PWwg+3YscO8B7Wu9Nb/trdldukxW3Y+FgBEOkI3AIQ5bbHVyas2b9583rWddR3p9Frg7OuzQkPxyy+/bFrbNRTpl3K7+/fF0lbPJUuWmC65OqGULb3u2hmFSZ31W4Octhjbrb4Xsm819Kb3eOnty/ToxGcaFPWiPQE0iOsEa3bozkqPgHOdYNDu/8HPUye1Uvq6BLcoawt8sPS6dWe2Nt1POht+RseXtsCnDahZoS3P9szvFyozQVBPkOjJnJIlS5pwnN4JGT3xo3RCvOCArZPO/fbbb6bbfDCdZV8Dt7YM67Fsh/bMsIdy2MML6tate9bzt9f61rq0hV6PgeAJznTiQ31tLvTYz87HAoBIR+gGgDCnX3p1LK0uiaVBIO24bg2cGp5uuOEGM0P06tWrpWnTpoG1sHW2YQ1kNWrUyNLja6u2zpStM6HrbNnaFdzp1tC0rZ/6PNKy1/ROGyZ1jOygQYNMcNd9FBwk9X61FVFncs/o8TXo6Zj53bt3B7pEa7d0Het9Phq4gu9bQ9xll10W0tU6uO5zzaKeWbr+8yuvvBJ4fvq7zn6ty3fZJ1f0een4Yj1ubJMmTTrrvjJbm96fBktt0ddu7HbA1xnzdWy0ju/XE0MXSwPrhYTWC6Hdp/U56PtJX9uMxtHXrFnTrA6g7xud4d0+Rl977TVzbNnrVtvvL33f6UoCOndAesMUlJ6w0jHSweOk9bWz1/G25w/QEyatW7dO9z70cbVlXucwsGvQuQt0LW2d7+FC15DPzscCgEhH6AaACKBLbGn35ebNm5uWNh1Hq1099UuwTmKlgWngwIFmSS1dR1mXDNMWWA3K2mX0k08+yfJaurp0kAbJp59+2rTmZbVreXo0qGnLsIZ6XfNaJ2jT56k1p6VrcCutQ9fg1qCpAUBbujW8aPDWQKhBU7v36n3MmjXL7C9dFi0jGtb1ZIJO9KVrMevyW7rcmIav843P1hMZumSW1qb7W0+KaFgJnuzMrltfEw1XGuK0/qzQiay0Vu1KrUt0aZd6XdLtX//6VyBEahdnHXOvz0FDou6fOXPmpDu2/UJq032sLaMasHU/abdsXTJMjwl9/fxOW7i1ZVmXO9P3jF5sOp+ALvFme+GFF0wXdA3puj+0l4me3NDeC/YcCUqX2Pvmm2/MuvJ6oiZ4bW49AWOf9NAJAXVte73oe0mHaOixqWuZ6/Gp77Hz0fCrPUy0R4VOiqhLkumJFB02oMdwMD3hohd7PL6eHLADvr7f9OKXxwKAiOD19OkAAGfokla6dFiJEiWs3LlzW5dccolZ8il42S9dzumWW26xChcubJbBuvLKK605c+aE3I+9pNSMGTPOu6SU7emnnzbXXXbZZVmq3V4yzF7WKthvv/1mderUydQcHx9v3XrrrWYpqrRLVtnLMekyZrp8Wtrlw3QppmbNmpnljvRSrVo1s3+2bdt23vpWrFhhNWjQwCyJpft18uTJgZrPtWTYc889Z/ax1p43b17zmLpE2alTpwK3OXPmjFnmS1+3mJiYwH3a+1uXqEoroyXD9Hnpa9ymTRsrX758VqlSpUyd9nJwNt3PutSb3qZIkSLWQw89ZG3evPms+8yoNpXe/l+/fr3Vtm1bq0CBAua+W7RoYa1atSrkNvaSYWmXt8toKbPsoo+d0SW9Za5mzZpl1atXz7zXypcvbw0ePDjkdbWPh4zuU6+z/frrr+a41qX79H2p+06PNz3OgpdVO59Dhw6ZZc6KFStm7kPrTm8ZQfvYTe+S9jX1w2MBQLiL0f94HfwBAAAAAIhELBkGAAAAAIBLGNMNAHCNztatl3PRscYZLR8FAAAQ7gjdAADXvPjii2dNrJSWvfYvAABAJGJMNwDANTobtL3WcEZ0tmuddRsAACASEboBAAAAAHAJE6kBAAAAAOASxnSLSGpqquzZs0cKFiwoMTExXpcDAAAAAPA57TR+9OhRKVu2rMTGZtyeTegWMYE7ISHB6zIAAAAAAGEmMTFRypcvn+H1hG4R08Jt76xChQp5XQ4AAAAAwOeSk5NN462dJzNC6NbZ5P5fl3IN3IRuAAAAAEBmnW+IMhOpAQAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDAAAAABCJoXvYsGESExMTcqlWrVrg+hMnTkivXr2kWLFiUqBAAenSpYvs27cv5D52794t7du3l3z58knJkiXlqaeekjNnznjwbAAAAAAACJVDPFazZk1ZvHhx4PccOf5/SX379pW5c+fKjBkzJD4+Xnr37i2dO3eWr776ylyfkpJiAnfp0qVl1apVsnfvXrnnnnskZ86c8vzzz3vyfAAAAAAA8E3o1pCtoTmtI0eOyFtvvSXTpk2Tli1bmm1TpkyR6tWry5o1a6RJkyaycOFC2bp1qwntpUqVknr16smIESNkwIABphU9V65cHjwjAAAiW0qqJXGxMV6X4Zs6AADwdej++eefpWzZspInTx5p2rSpjBo1SipUqCDr1q2T06dPS+vWrQO31a7net3q1atN6NaftWvXNoHb1rZtW+nZs6ds2bJF6tev79GzAgAgcmnQHT1rgyQePOZZDQnFC8jATnzOAwD8z9PQ3bhxY5k6dapUrVrVdA1/9tln5eqrr5bNmzdLUlKSaakuXLhwyL/RgK3XKf0ZHLjt6+3rMnLy5ElzsSUnJzv8zAAAiGwauLcn8fkJAICvQ3e7du0C/1+nTh0TwitWrCgff/yx5M2b17XH1dZ0DfgAAAAAAETNkmHaqn355ZfL9u3bzTjvU6dOyeHDh0Nuo7OX22PA9Wfa2czt39MbJ24bNGiQGTNuXxITE115PgAAAACA6Oar0H3s2DH55ZdfpEyZMtKgQQMzC/mSJUsC12/bts0sEaZjv5X+3LRpk+zfvz9wm0WLFkmhQoWkRo0aGT5O7ty5zW2CLwAAAAAARFT38ieffFI6dOhgupTv2bNHhg4dKnFxcXLnnXeaJcK6d+8u/fr1k6JFi5pg3KdPHxO0dRI11aZNGxOuu3btKmPGjDHjuAcPHmzW9tZgDQBAOPHLbNx+qQMAgEjgaej+7bffTMD+448/pESJEtKsWTOzHJj+vxo3bpzExsZKly5dzMRnOjP5pEmTAv9eA/qcOXPMbOUaxvPnzy/dunWT4cOHe/isAADIGmYFBwAg8ngauqdPn37O63UZsYkTJ5pLRrSVfN68eS5UBwBA9mNWcAAAIouvxnQDAAAAABBJCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAIh4KamW+IFf6gAAANknRzY+FgAAnoiLjZHRszZI4sFjntWQULyADOxU37PHBwAA3iB0AwCiggbu7UnJXpcBAACiDN3LAQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAQETyyxJtfqkDAOANZi8HAAARiaXiAAB+QOgGAAARi6XiAABeo3s5AAAAAAAuIXQDAAAAAOASQjcAAICH/DLRml/qAIBI45sx3aNHj5ZBgwbJY489JuPHjzfbTpw4IU888YRMnz5dTp48KW3btpVJkyZJqVKlAv9u9+7d0rNnT1m2bJkUKFBAunXrJqNGjZIcOXzz1AAAADLEhG8AENl8kUy//fZb+c9//iN16tQJ2d63b1+ZO3euzJgxQ+Lj46V3797SuXNn+eqrr8z1KSkp0r59eyldurSsWrVK9u7dK/fcc4/kzJlTnn/+eY+eDQAAwIVhwjcAiFyedy8/duyY3HXXXfLGG29IkSJFAtuPHDkib731lowdO1ZatmwpDRo0kClTpphwvWbNGnObhQsXytatW+X999+XevXqSbt27WTEiBEyceJEOXXqlIfPCgAAAAAAH4TuXr16mdbq1q1bh2xft26dnD59OmR7tWrVpEKFCrJ69Wrzu/6sXbt2SHdz7YKenJwsW7ZsycZnAQAAAACAz7qX61jt9evXm+7laSUlJUmuXLmkcOHCIds1YOt19m2CA7d9vX1dRnR8uF5sGtIBAAAAAIiYlu7ExEQzadoHH3wgefLkydbH1onWdIy4fUlISMjWxweASOGX2Y79UgcAAIBvWrq1+/j+/fvliiuuCGzTidFWrlwpEyZMkC+++MKMyz58+HBIa/e+ffvMxGlKf37zzTch96vX29dlRGdJ79evX0hLN8EbAC4csy4DAAD4NHS3atVKNm3aFLLtvvvuM+O2BwwYYEKwzkK+ZMkS6dKli7l+27ZtZomwpk2bmt/158iRI014L1mypNm2aNEiKVSokNSoUSPDx86dO7e5AAAuHrMuAwAA+DB0FyxYUGrVqhWyLX/+/FKsWLHA9u7du5sW6aJFi5og3adPHxO0mzRpYq5v06aNCdddu3aVMWPGmHHcgwcPNpOzEaoBAAAAAF7zxTrdGRk3bpzExsaalm6d+ExnJp80aVLg+ri4OJkzZ4707NnThHEN7d26dZPhw4d7WjcAAAAAAL4L3cuXLw/5XSdY0zW39ZKRihUryrx587KhOgAAAAAAwmydbgAAAAAAIhWhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugHAh1JSLfEDv9QBAAAQrnJ4XQAA4GxxsTEyetYGSTx4zLMaEooXkIGd6nv2+AAAAJGA0A0APqWBe3tSstdlAAAA4CLQvRwAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAABwXimplviBX+oAgMzKkelbAgAAIGrFxcbI6FkbJPHgMc9qSCheQAZ2qu/Z4wNAVhC6AQAAkCkauLcnJXtdBgCEFbqXAwAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAACRGLpfe+01qVOnjhQqVMhcmjZtKvPnzw9cf+LECenVq5cUK1ZMChQoIF26dJF9+/aF3Mfu3bulffv2ki9fPilZsqQ89dRTcubMGQ+eDQAAAAAAPgrd5cuXl9GjR8u6detk7dq10rJlS7n55ptly5Yt5vq+ffvK7NmzZcaMGbJixQrZs2ePdO7cOfDvU1JSTOA+deqUrFq1St555x2ZOnWqPPPMMx4+KwAAAAAA/k8O8VCHDh1Cfh85cqRp/V6zZo0J5G+99ZZMmzbNhHE1ZcoUqV69urm+SZMmsnDhQtm6dassXrxYSpUqJfXq1ZMRI0bIgAEDZNiwYZIrVy6PnhkAAAAAAD4a062t1tOnT5fjx4+bbuba+n369Glp3bp14DbVqlWTChUqyOrVq83v+rN27domcNvatm0rycnJgdZyAAAAAACisqVbbdq0yYRsHb+t47ZnzZolNWrUkI0bN5qW6sKFC4fcXgN2UlKS+X/9GRy47evt6zJy8uRJc7FpSAcAAAAAwBct3Zdccon88ccfZ20/fPiwue5CVK1a1QTsr7/+Wnr27CndunUzXcbdNGrUKImPjw9cEhISXH08AAAAAEB0ylLo3rlzp+kOnpa2Hv/+++8XdF/amn3ZZZdJgwYNTBiuW7euvPzyy1K6dGkzQZoG+WA6e7lep/Rn2tnM7d/t26Rn0KBBcuTIkcAlMTHxgmoGAAAAAMDx7uWff/554P+/+OIL00ps0xC+ZMkSqVSpklyM1NRUE941hOfMmdPcpy4VprZt22aWCNPu6Ep/6uRr+/fvN8uFqUWLFpnlx7SLekZy585tLgAAAAAA+CZ0d+zY0fyMiYkx3cCDaUDWwP3SSy9l+v60xbldu3ZmcrSjR4+amcqXL18eCPTdu3eXfv36SdGiRU2Q7tOnjwnaOnO5atOmjQnXXbt2lTFjxphx3IMHDzZrexOqAQAAAABhFbq1FVpVrlxZvv32WylevPhFPbi2UN9zzz2yd+9eE7Lr1KljAvd1111nrh83bpzExsaalm5t/daZySdNmhT493FxcTJnzhwzFlzDeP78+c3JgOHDh19UXQAAAAAAeDZ7+Y4dOxx5cF2H+1zy5MkjEydONJeMVKxYUebNm+dIPQAAAAAA+GLJMB1rrRdtrbZbwG1vv/22E7UBAAAAABB9ofvZZ581XbgbNmwoZcqUMWO8AQAAAACAA6F78uTJMnXqVDOBGQAAAAAAcHCdbl0/+6qrrsrKPwUAAABck5JqiR/4pQ4AYdrS/cADD5jlvYYMGeJ8RQAAAEAWxcXGyOhZGyTx4DHPakgoXkAGdqrv2eMDiIDQfeLECXn99ddl8eLFZpkvXaM72NixY52qDwAAALggGri3JyV7XQYAZD10f//991KvXj3z/5s3bw65jknVAAAAAAC4iNC9bNmyrPwzAAAAAACiSpYmUgMAAAAAAC61dLdo0eKc3ciXLl2albsFAAAAACCiZCl02+O5badPn5aNGzea8d3dunVzqjYAAAAAAKIvdI8bNy7d7cOGDZNjx7xbngEAAAAAgIgd03333XfL22+/7eRdAoCjUlIt8QO/1AEAAAAftnRnZPXq1ZInTx4n7xIAHBUXGyOjZ20wa7h6JaF4ARnYqb5njw8AAACfh+7OnTuH/G5Zluzdu1fWrl0rQ4YMcao2AHCFBu7tSclelwEAAIAokKXQHR8fH/J7bGysVK1aVYYPHy5t2rRxqjYAAAAAAKIvdE+ZMsX5SgAAAAAAiDAXNaZ73bp18sMPP5j/r1mzptSvzxhFAAAAAAAuKnTv379f7rjjDlm+fLkULlzYbDt8+LC0aNFCpk+fLiVKlMjK3QIAAAAAEFGytGRYnz595OjRo7JlyxY5dOiQuWzevFmSk5Pl0Ucfdb5KAAAAAACipaV7wYIFsnjxYqlevXpgW40aNWTixIlMpAYAAAAAwMW0dKempkrOnDnP2q7b9DoAAAAAAJDF0N2yZUt57LHHZM+ePYFtv//+u/Tt21datWrlZH0AAAAAAERX6J4wYYIZv12pUiW59NJLzaVy5cpm26uvvup8lQAAAAAARMuY7oSEBFm/fr0Z1/3jjz+abTq+u3Xr1k7XBwAAAABAdLR0L1261EyYpi3aMTExct1115mZzPXSqFEjs1b3l19+6V61AAAAAABEaugeP3689OjRQwoVKnTWdfHx8fLQQw/J2LFjnawPAAAAAIDoCN3fffedXH/99Rler8uFrVu3zom6AAAAAACIrtC9b9++dJcKs+XIkUMOHDjgRF0AAAAAAERX6C5Xrpxs3rw5w+u///57KVOmjBN1AQAAAAAQXaH7hhtukCFDhsiJEyfOuu7vv/+WoUOHyo033uhkfQAAAAAARMeSYYMHD5aZM2fK5ZdfLr1795aqVaua7bps2MSJEyUlJUWefvppt2oFAAAAACByQ3epUqVk1apV0rNnTxk0aJBYlmW26/Jhbdu2NcFbbwMAAAAAAC4wdKuKFSvKvHnz5M8//5Tt27eb4F2lShUpUqSIOxUCAAAAABAtodumIbtRo0bOVgMAAAAAQLROpAYAAAAAADKP0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAQCSG7lGjRkmjRo2kYMGCUrJkSenYsaNs27Yt5DYnTpyQXr16SbFixaRAgQLSpUsX2bdvX8htdu/eLe3bt5d8+fKZ+3nqqafkzJkz2fxsAAAAAADwUehesWKFCdRr1qyRRYsWyenTp6VNmzZy/PjxwG369u0rs2fPlhkzZpjb79mzRzp37hy4PiUlxQTuU6dOyapVq+Sdd96RqVOnyjPPPOPRswIAAAAA4P/kEA8tWLAg5HcNy9pSvW7dOrnmmmvkyJEj8tZbb8m0adOkZcuW5jZTpkyR6tWrm6DepEkTWbhwoWzdulUWL14spUqVknr16smIESNkwIABMmzYMMmVK5dHzw4AAAAAEO18NaZbQ7YqWrSo+anhW1u/W7duHbhNtWrVpEKFCrJ69Wrzu/6sXbu2Cdy2tm3bSnJysmzZsiXbnwMAAAAAAL5o6Q6Wmpoqjz/+uPzjH/+QWrVqmW1JSUmmpbpw4cIht9WArdfZtwkO3Pb19nXpOXnypLnYNKADAAAAABCxLd06tnvz5s0yffr0bJnALT4+PnBJSEhw/TEBAAAAANHHF6G7d+/eMmfOHFm2bJmUL18+sL106dJmgrTDhw+H3F5nL9fr7Nuknc3c/t2+TVqDBg0yXdntS2JiogvPCgAAAAAQ7TwN3ZZlmcA9a9YsWbp0qVSuXDnk+gYNGkjOnDllyZIlgW26pJguEda0aVPzu/7ctGmT7N+/P3AbnQm9UKFCUqNGjXQfN3fu3Ob64AsAAAAAABE1plu7lOvM5J999plZq9seg61dvvPmzWt+du/eXfr162cmV9Nw3KdPHxO0deZypUuMabju2rWrjBkzxtzH4MGDzX1ruAYAAAAAICpD92uvvWZ+XnvttSHbdVmwe++91/z/uHHjJDY2Vrp06WImP9OZySdNmhS4bVxcnOma3rNnTxPG8+fPL926dZPhw4dn87MBAAAAAMBHoVu7l59Pnjx5ZOLEieaSkYoVK8q8efMcrg4AAAAAgAiYSA0AAAAAgEhE6AYAAACyWUrq+Xt8RlMdQCTztHs5AAAAEI3iYmNk9KwNknjwmGc1JBQvIAM71ffs8YFoQegGAAAAPKCBe3tSstdlAHAZ3csBAAAAAHAJoRtARI0J80sdAAAAgKJ7OQBHMDYNAAAAOBuhG4BjGJsGAAAAhKJ7OQAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDAAAAAOASQjcAAAAAAC4hdAMAAAAA4BJCNwAAAAAALiF0AwAAAADgEkI3AAAAAAAuIXQDPpeSaokf+KUOAAAAIJzk8LoAAOcWFxsjo2dtkMSDxzyrIaF4ARnYqb5njw8AAACEK0I3EAY0cG9PSva6DAAAAAAXiO7lAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAgXSmpltcl+KIG4GLkuKh/DQAAACBixcXGyOhZGyTx4DFPHj+heAEZ2Km+J48NOIXQDQAAACBDGri3JyV7XQYQtuheDgAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAARGLoXrlypXTo0EHKli0rMTEx8umnn4Zcb1mWPPPMM1KmTBnJmzevtG7dWn7++eeQ2xw6dEjuuusuKVSokBQuXFi6d+8ux455s44gAAAAAAC+Cd3Hjx+XunXrysSJE9O9fsyYMfLKK6/I5MmT5euvv5b8+fNL27Zt5cSJE4HbaODesmWLLFq0SObMmWOC/IMPPpiNzwIAAAAAgPTlEA+1a9fOXNKjrdzjx4+XwYMHy80332y2vfvuu1KqVCnTIn7HHXfIDz/8IAsWLJBvv/1WGjZsaG7z6quvyg033CAvvviiaUEHAAAAAMArvh3TvWPHDklKSjJdym3x8fHSuHFjWb16tfldf2qXcjtwK719bGysaRnPyMmTJyU5OTnkAgAAAABA1IRuDdxKW7aD6e/2dfqzZMmSIdfnyJFDihYtGrhNekaNGmUCvH1JSEhw5TkAAAAAAKKbb0O3mwYNGiRHjhwJXBITE70uCQAAAAAQgXwbukuXLm1+7tu3L2S7/m5fpz/3798fcv2ZM2fMjOb2bdKTO3duM9t58AUAAAAAgKgJ3ZUrVzbBecmSJYFtOvZax2o3bdrU/K4/Dx8+LOvWrQvcZunSpZKammrGfgMAAAAAELWzl+t62tu3bw+ZPG3jxo1mTHaFChXk8ccfl+eee06qVKliQviQIUPMjOQdO3Y0t69evbpcf/310qNHD7Os2OnTp6V3795mZnNmLgcAAACiQ0qqJXGxMVFfA/zJ09C9du1aadGiReD3fv36mZ/dunWTqVOnSv/+/c1a3rrutrZoN2vWzCwRlidPnsC/+eCDD0zQbtWqlZm1vEuXLmZtbwAAAADRQcPu6FkbJPHgMU8eP6F4ARnYqb4njw3/8zR0X3vttWY97ozExMTI8OHDzSUj2io+bdo0lyoEAAAAEA40cG9PYilg+I9vx3QDAAAAABDuCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN2IWimpGa8RH411AAAAAHBeDhfuEwgLcbExMnrWBkk8eMyzGhKKF5CBnep79vgAAAAA3EXoRlTTwL09KdnrMgAAAABEKLqXAwAAAADgEkI3InaMsl/qAAAAABC96F4OxzFWGgAAAAD+D6EbrmCsNAAAAADQvRwAAAAAANcQugEAAAAAcAmhGwAAAADgqwmJU3xQg1MY0w0AAAAA8M3EyAkRNikyoTuM6NkefQN4zS91AAAAAHAHEyM7h9AdRrw+4xSJZ50AAAAAwE2E7jDDGScAAAAACB9MpAYAAAAAgEsI3QAAAAAQJTNy+6GGaEP3cgAAAACIgjmamJ/JG4RuAAAAAMgmzNEUfeheDgAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNANAAAAAIBLCN0AAAAAALiE0A0AAAAAgEsI3QAAAAAAuCRiQvfEiROlUqVKkidPHmncuLF88803XpcEAAAAAIhyERG6P/roI+nXr58MHTpU1q9fL3Xr1pW2bdvK/v37vS4NAAAAABDFIiJ0jx07Vnr06CH33Xef1KhRQyZPniz58uWTt99+2+vSAAAAAABRLIeEuVOnTsm6detk0KBBgW2xsbHSunVrWb16dbr/5uTJk+ZiO3LkiPmZnJwsflcin8ip+DhPHz8z+4k6M4c6nUWdzqJOZ1Gns6jTWdTpLOrM/hrt21Jn9NXpJbtGy7LOebsY63y38Lk9e/ZIuXLlZNWqVdK0adPA9v79+8uKFSvk66+/PuvfDBs2TJ599tlsrhQAAAAAEGkSExOlfPnykdvSnRXaKq5jwG2pqaly6NAhKVasmMTExEik0jMxCQkJ5qAoVKiQ+BV1Oos6nUWdzqJOZ1Gns6jTWdTpLOp0FnVGZ50XS9uvjx49KmXLlj3n7cI+dBcvXlzi4uJk3759Idv199KlS6f7b3Lnzm0uwQoXLizRQg/8cDj4qdNZ1Oks6nQWdTqLOp1Fnc6iTmdRp7OoMzrrvBjx8fGRP5Farly5pEGDBrJkyZKQlmv9Pbi7OQAAAAAA2S3sW7qVdhXv1q2bNGzYUK688koZP368HD9+3MxmDgAAAACAVyIidN9+++1y4MABeeaZZyQpKUnq1asnCxYskFKlSnldmq9ol3pdyzxt13q/oU5nUaezqNNZ1Oks6nQWdTqLOp1Fnc6izuisM7uE/ezlAAAAAAD4VdiP6QYAAAAAwK8I3QAAAAAAuITQDQAAAACASwjdAAAAAAC4hNCNsKJrsMM57E9nsT+dxf50FvvTWexPZ7E/ncX+dBb70zmpUbovCd0IC7t27ZLff/9dYmM5ZJ3w888/y6+//sr+dAjHp7PYn85ifzqL/eksPo+cxfHpLI5P5/wc5fsyOp81jO3bt8u4ceOkf//+Mn/+fNm3b5/40caNG6VBgwby5Zdfip/99NNPZq34e++9V959913ZtGmT+NF3330ntWrVki+++EL8jOPTWexPZ7E/ncX+dBafR87i+HQWx6dz2JdhRNfpRvTZtGmTVaRIEatZs2ZW48aNrdy5c1t33nmnNW/ePMtPNm7caOXNm9d64oknzrouNTXV8ostW7ZYhQsXtq6//npzKVWqlNWyZUtrypQplp9s2LDB7M8nn3zS8jOOT2exP53F/nQW+9NZfB45i+PTWRyfzmFfhhdCdxT666+/rBtvvNHq06ePdebMGbNt/vz5Vps2baxrr73WmjlzpuUHP/74o/lwGzZsmPlda/3vf/9r6vv+++8DtXvt1KlTVteuXa0HHngg8MH2zTffmN9r1Khhvfbaa5Yf/PTTT1aOHDms4cOHm99Pnz5tLViwwHr99det5cuXW/v27bP8gOPTWexPZ7E/ncX+dBafR87i+HQWx6dz2Jfhh9AdhfSPb/369a3nnnsuZPvq1autm266yZwtW7NmjeWlEydOWP/85z+tokWLWt9++63Z1qFDB6tmzZpW8eLFrbi4OOupp56yfv31V8tr+sfu6quvtnr16nXWh+AjjzxiNWjQwPrss88sr/849+/f38qTJ481d+5cs61du3Zmf5YtW9Zs1z/eegx4jePTWexPZ7E/ncX+dBafR9F3fP79998cn1F4fLIvww9juqNwxsCTJ09KmTJl5ODBg2ZbSkqK+dmkSRN58sknZffu3fLpp5+abXpixgu5c+eWBx98UFq1amVqqlKliql9ypQpZvyK/nzjjTfkvffe87RO+3Hr1KkjBw4ckD///DNwXdWqVeXhhx+WYsWKycyZMz2tM2fOnNK1a1d56KGHpG/fvlKxYkWz7cMPP5TExERT34YNG2Tq1Kme1snx6Syt6cSJE2GxP3v06OH7/an7juPTOWfOnGF/OkwfOxw+j+6++27ffx6pv/76y/fHZ548eaR79+6+Pz61pnA5Pu+66y5fH5989wxTXqd+eGPChAlWrly5rC+++ML8npKSErhu0qRJVsGCBa39+/dbXtOuJ/ZYlV9++SXkutGjR5uxLH/88YcntQWPkfr444/NeBXtLpN27NSMGTNM1xqvzjIHv7Zbt261HnzwQXOmUf8/2Ntvv23lzJnT2r17t+W1iRMnhsXxuWLFCt8en8F4v198S2cw7bbnx/2Zts4vv/zSl/vz0KFDYbE/09bp1/2p3TO3b9/u+88jrVO7mvr98+jnn3+2XnjhBd8fn1rnv//9b98fn2mPwWnTpvny+Exbi3bL9+PxGezDDz/05b5Mb16EB32+L7MDoTsKJCYmmvET+kEc/Obr1q2b+bDQcT/BFi5caNWuXTvb/zhnVKd2O5k9e7YZBxL8gacfhHXq1DHdV7LT0aNHz6pFDRkyxIypev/990O+/OrkJtqVJrv/8GVUp37pWbp0aWC/2dd98sknZhzQ4cOHs7XOpKQka+3atea405pt3bt399XxmVGd2p3PT8fnrl27zJcaPXGh47v8uj8zqtNv+1Mnqmnfvr21ePHikO06bs5P+zOjOvWY9dP+XL9+vRUbG2t+Bv9d8tv+zKhOv+3P7777zrr88svN+0j/RtkGDx7sq8+jjOrctm2brz6PtE7tpl2xYsWQIO234zO4zgMHDvj2+NTvGzp5Vs+ePa1Ro0b59vjMqE7tqu2X41NDqR5v+rn5ww8/BLYPHDjQV/syozq3bNnim33pFUJ3hNMzdTqbYaNGjcy4noYNG1q9e/cOjFW67bbbrHz58lnvvPOOtWPHDrNNZ76sW7eu9eeff3paZ/A4lbQtOOrRRx+1OnfubCY6ya6ZOfUMXdu2bc0fE/sPh/3hpnTclH5RGzFihAkSR44cMduqVKkS8sHoRZ3BXxzT21/6uuvkMMGBMjte9+rVq5vjLSYmxrrhhhvMNqVfeO666y7fHJ9p69QvPbbgfevl8al1JiQkWC1atLDi4+PNTw0NSo8/Hffnl/2ZUZ0qvUl/vNif+jh6clJr1MmUggOtfXxqK4PX+/Ncdfrp+NQvgRpc+vXrd9Z1Bw8eNDNC++H4TK/O4H2UXnDxYn9qUChWrJj12GOPpft3+/HHH/fF59H56jx58qQvPo/s2b/1vaStwq+88krI8emXz6O0db766qu+fL/r33kdU37rrbeaGbV1bLz2urLpvvPD8ZlendqDwRb8Hc+r41O/b+h3ZP1epydbmjRpYl5/v73X06vz3nvvPed3+Sc8eK97hdAdwfSskX4Y6JtR//+3334zb0g986VfzIIPeH1zVKhQwYRd/XAM/gLsVZ21atUy4Sa9s2h6llS/YG7evDnb6tQP2WrVqpluMFdddZU5O5deoB03bpw5a6f7VJ9X6dKls3V/ZrZOm3ZL/Ne//mU+vLULUHZ+EStTpox5LfVMrJ5RLl++vPliZtMvCHoW18vjM6M69XhNj1fHp9alx9rTTz9t9pvWoftNu5/Z9MuW1+/3jOqcPn26r/anTSek0WWCOnXqZLVu3TrQxdT+AjFgwABP92dGdWpLg5/2p/5t0aCgPYKCuxvrl7Tgkyza2uTl/syoTv1inl7Y9vL41Peynqiw39v6XtegqKHQNmbMGE8/jzJbp9efR/aSRvp5Y7+f9PNTv4/46fMoozp///33dG/v1fGpQU9b1nUSLaXf7bRr8dixY0Nup93jvTw+M1unfaLCi+NTe4boSf9BgwaZv0F6wnfo0KGmAUB7N/nlvZ7ZOr1+r3uJ0B3BtPumdudatWpVYJueSdLu27pdz+rZvvrqKzP+44MPPjChzS91Vq1aNaRO/QOiy3RUrlzZfPhkFz3TqWO8dLZSreG6664zM0MGB9rgL44a1JYtW2a+oAd/aPuhzuCz3NrdR2+j+zk796d+eXnooYdMl2dt5bD33eTJk81JobRn43WIgRfH5/nq1NAVXKe2QHhxfB4/ftx0f9QxU3oM2DXdcsst1siRI61nn302JNRqF0kv9uf56tQlRYLrXLdunSf7M5j2FtHxkF9//bU5e69n5LUu/bJoj0Hz6vg8X526z7RO/Rvr5fGpf8+bN29uvlzZtNVNW5T0C5nWFNyq6NXn0fnq1B4ZL7/8cuA63YdeHp/6vrHr0RYlncn40ksvNRftNWafZNWeT158HmWmTj1RZNepJza8+DzSk6kaSu0gq/Qzs1ChQqY7bNoT1l693y+0Tq/e70r/RurJ/+C5Bu677z7zftIeV/q5avPy+Dxfndrd3KYnCL04PleuXGnVq1fP2rNnT8g+095iun68niQIHq7h1b48V516IuDGoMY+PQHkxb70GqE7gukkMPrH9sUXXwzZrkFBzzDrOCQdX+X3OvUspIYcm477TjtJiNs0HOgfZz0RoDTABgdau3uc12thZrbO4KCofyjtL+XZ+eVWP9imTJkSsv3TTz81rcrJycmmxvRa5v1YZ7B58+Zl+/GpS8Z8/vnn5kuWTQOsBgX94qAtIfp+D+5F4IXM1hncrdeL93swHR+pdSnttq0tyeXKlTM1Z9S65Lc6g8fQerE/9eSVjje87LLLrI4dO5qTAvoFTIOLngDSbrsaEqdOnZqtdWWlziuvvNJ67733fHF8ao3690nH6+pJFu0CrRddwkpbnNLrKebHOnV5K5uefM/uzyMNzum1umtd11xzTaBLrNefR5mtM5hXx6f2aNLeALpuuJ5g1b/zOqmX9gzSXmIatuy/V17KTJ3NmjXz9PhctGiRGbuv4/VtGlT1b9H48eNNo5WedPVaZur86KOPPN2XXiN0RzD9A6xjPnQmS3ucbHCLk7aG3nHHHZbXwqXOtIFav6DZgXbmzJmBcT9er4uY2TpnzZpleSn4bKhds34J02EFwUE27UyX2S1c6gweF6ln5HXsoX0s6pdF/RKh3SG1q2w41BkcFL2kLQfaGmfTrttas7bY6WzBfuH3OvWEi4ZXPcHatGlTa+/evYHrdBKqf/zjHybUei0c6rTDnwYwfZ317/szzzwTchvtMaJdTb2cufhC6gxuafSixmD233U92aqt8fba116G7nCp06ZjirXLtp7409ddg6ye+Ldpy7x2f9ZW2XCo0+5J4AUNppUqVTLfk/X9og0l2uNBh2gpDbX6uem1cKnTS4TuCKfjJHRSA50wLe2H2ksvvWRdccUVJth6LVzqTBu+tCb9Q60hQVuXH374Yats2bIhQc3PdfqhpS74C4Ke+dSzzseOHTO/63gfbRnxw6yW4VKnzT4G7bp1SRE/zhDq9zq1Lm1F0q7kXbt2Ne8bnWRHW++0dVaXjfODcKhTTwDOmTPHmj9/fuBvk/1TJ87U+v0QGMKlTv2Sq93htTeDvubB9PXWVrqdO3daXguXOtNrELjkkkus+++/3/Izv9apPcH0pI++xnqSOu0s69qjRHvmec3PddonVnRyNO31qa+zzikTHF5vv/1201PMS+FSp9dyeL1OONyTmpoqtWrVks8++0xatWplfn/kkUekRYsW5voff/xRypcvLzlyeHsYhEudweLi4uTMmTOSL18++fzzz6Vjx45y9913S86cOWXlypVSpkwZCYc6y5Yt63WJEhsbG/j/U6dOydGjR81rPXToUBkzZoysXr1a4uPjxWvhUqetdOnSIXVv2rTJvM9y584tfuLnOvXEtL5/9GfTpk1NjXPnzpV69epJxYoV5d1335VKlSp5XWbY1Jk3b1657rrrTH36t0nZPw8ePGjqDX6feSUc6tTXukKFCvL666/LHXfcYV7vUaNGyaBBg+TkyZOyZMkSKVasmBQqVIg6syAlJcX8Derfv7+MHTtW1q1bJw0aNBC/8XOdBQsWNBf9Tqc1/vDDD3L11Veb6/T7XoECBaRcuXJel+nrOmNiYkxdjRo1kkWLFpn3zPHjx6VatWrmev27n5ycLM2aNfOkvnCr03Nep35cPD3jnrZLsX0W3t6uZ+t0ggNtMdZZDW+++WYz+Ubw2ErqPH+dadm305ZjnSgiO2cHjcQ6dYIabZXTGYx13cngsUFui8Q67V4O2hJfokQJ377ufq9Tx/lq1+20r7Pd0yE7RFKdaVuV9XXXORJ0fGV2iYQ67Z86tEAnK9MJi7Q+bYnXv/PZOUFRJNSZHh02lCtXrpAJ9LJDJNWpQ5q0l532ttPejNoirxOA+e1197rOc9WY3jJv2lNRu27rcmc6eW92CZc6/ShG/+N18EfWbd26VZ5//nlJSkqSKlWqyI033ijt27cPnAHVs/P2z927d5uzoEuXLpWEhAS56aabAmehqDPzdaY1YcIEefTRR03N9evXp86LqHPVqlXmTGiRIkXM2dIrrriCOi+iTu3dMHPmTPNe0jP2fn3d/V7n6dOnzVn7woULm9/1Y1PP7GeXSKvTNmvWLJkxY4YsX77ctID67XUPhzq1dUlb3f/44w/57bffZP78+aZluXHjxnLppZdS5wXWmZ5///vf5rY1a9akzgus0/4bpK3Hr7zyiuzcudP0vnnsscekevXq1HkBNQbbsWOHvPnmmzJlyhTf/U3yQ51+RegOY9u2bTMfWO3atTNdB/VDTLsNaxgYN25coBtsrly5sv3LV6TXGezAgQOm20x2fXGI5Dr1g+62226TqVOnSo0aNajzIuvctWuXCbN64srPr7tf69QucsHd3O3gkJ0iqc703kfvv/++3H777eYLHHVeXJ1eiOQ6zxVwqTPzddp/j/7++28zdENPDuptqfPCa7TpyVX9dyVKlDCNU9khXOr0Na+b2pE12oVDu7ppF5jgySCee+450z27R48eZy1x5MWsxZFap862vH//fup0sE57luD0lj2hzqzXmZ0TPkVyneHyPgqXOu3XPTuXWYzkOsPldafO6Ksz7fe69LogR2udWXnNw+U7stcrpfiR9zOWIEu0NXjPnj2mi4dNJ4LQ7sM6UdaGDRtk9OjRZrt26ejdu7fpMqNn8qjz4uvs1auXvPzyy9TpYJ2vvvqqOWOfnS0ikV6nvu7Z2XMkkusMl/dRuNRpv+7Z2SIfyXWGy+tOndFXZ9rvddn1tz4c6szKax4u35G9qNPvCN1hyB4RoONI9cu/dt0IfhPcf//9ZtzE7NmzTVcPHW+h2/SSnV8cqJM6L6TO7t27my5y2fWBHA116utOndH1PgqXOsPldQ+XOsPldadO6qTO8KkxnOoMC143tSPrdD1rnQ1QZ1g8evRoSHcYXatV18ScPXu2x1VSp9Oo01nU6SzqdBZ1Oos6nUWdzqLO6KszHGoMpzr9jNAd5pYuXWqWLOrVq5d14MCBwHYdh6ZLbq1atcryA+p0FnU6izqdRZ3Ook5nUaezqNNZ1Bl9dYZDjeFUp18RuiPA559/bt4EnTt3tqZPn27Waxw4cKBZBzMxMdHyC+p0FnU6izqdRZ3Ook5nUaezqNNZ1Bl9dYZDjeFUpx8RuiPEunXrrObNm1sVK1a0Lr30Uuvyyy+31q9fb/kNdTqLOp1Fnc6iTmdRp7Oo01nU6SzqjL46w6HGcKrTb1inO4LoGsyHDh2So0ePSpkyZaR48eLiR9TpLOp0FnU6izqdRZ3Ook5nUaezqDP66gyHGsOpTj8hdAMAAAAA4BLmcgcAAAAAwCWEbgAAAAAAXELoBgAAAADAJYRuAAAAAABcQugGAAAAAMAlhG4AAAAAAFxC6AYAAAAAwCWEbgAAAAAAXELoBgAA6dq5c6fExMTIxo0bvS4FAICwRegGACBK3XvvvSZU25dixYrJ9ddfL99//725PiEhQfbu3Su1atXyulQAAMIWoRsAgCimIVuDtV6WLFkiOXLkkBtvvNFcFxcXJ6VLlzbbAABA1hC6AQCIYrlz5zbBWi/16tWTgQMHSmJiohw4cOCs7uXLly83v2s4b9iwoeTLl0+uuuoq2bZtm9dPAwAA3yJ0AwAA49ixY/L+++/LZZddZrqaZ+Tpp5+Wl156SdauXWtawe+///5srRMAgHBCfzEAAKLYnDlzpECBAub/jx8/LmXKlDHbYmMzPi8/cuRIad68ufl/bRlv3769nDhxQvLkyZNtdQMAEC5o6QYAIIq1aNHCdB/XyzfffCNt27aVdu3aya5duzL8N3Xq1An8v4Z0tX///mypFwCAcEPoBgAgiuXPn990J9dLo0aN5M033zQt3m+88UaG/yZnzpyB/9cx3io1NTVb6gUAINwQugEAQEiI1q7lf//9t9elAAAQERjTDQBAFDt58qQkJSWZ///zzz9lwoQJZkK1Dh06eF0aAAARgdANAEAUW7BgQWBcdsGCBaVatWoyY8YMufbaa82SYQAA4OLEWJZlXeR9AAAAAACAdDCmGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAcAmhGwAAAAAAlxC6AQAAAABwCaEbAAAAAACXELoBAAAAAHAJoRsAAAAAAJcQugEAAAAAEHf8L6qfN8v/SQz9AAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "try:\n",
+ " import matplotlib.pyplot as plt\n",
+ "\n",
+ " # Get the latest daily metric for conv_rate\n",
+ " latest = metrics[0] if metrics else None\n",
+ " if latest and latest.get(\"histogram\"):\n",
+ " hist = latest[\"histogram\"]\n",
+ " bins = hist[\"bins\"]\n",
+ " counts = hist[\"counts\"]\n",
+ "\n",
+ " fig, ax = plt.subplots(figsize=(10, 4))\n",
+ " ax.bar(\n",
+ " [f\"{bins[i]:.2f}\" for i in range(len(counts))],\n",
+ " counts,\n",
+ " color=\"steelblue\",\n",
+ " edgecolor=\"white\",\n",
+ " )\n",
+ " ax.set_title(f\"conv_rate distribution — {latest['metric_date']}\")\n",
+ " ax.set_xlabel(\"Bin\")\n",
+ " ax.set_ylabel(\"Count\")\n",
+ " plt.xticks(rotation=45)\n",
+ " plt.tight_layout()\n",
+ " plt.show() # pragma: allowlist secret\n",
+ " else:\n",
+ " print(\"No histogram data available.\")\n",
+ "except ImportError:\n",
+ " print(\"Install matplotlib to visualize: pip install matplotlib\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 8: Time-Series Trend\n",
+ "\n",
+ "Plot how a metric (e.g., `mean`) evolves over time."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2 data points from 2025-01-01 to 2025-02-28\n",
+ " 2025-01-01: mean=0.4989, null_rate=0.0000\n",
+ " 2025-02-28: mean=0.5201, null_rate=0.0000\n",
+ " ...\n"
+ ]
+ }
+ ],
+ "source": [
+ "timeseries = monitoring.get_timeseries(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " feature_name=\"conv_rate\",\n",
+ " data_source_type=\"batch\",\n",
+ " granularity=\"daily\",\n",
+ " start_date=date(2025, 1, 1),\n",
+ " end_date=date(2025, 3, 1),\n",
+ ")\n",
+ "\n",
+ "if timeseries:\n",
+ " dates = [t[\"metric_date\"] for t in timeseries]\n",
+ " means = [t[\"mean\"] for t in timeseries]\n",
+ " null_rates = [t[\"null_rate\"] for t in timeseries]\n",
+ "\n",
+ " print(f\"{len(timeseries)} data points from {dates[0]} to {dates[-1]}\")\n",
+ " for t in timeseries[:5]:\n",
+ " print(f\" {t['metric_date']}: mean={t['mean']:.4f}, null_rate={t['null_rate']:.4f}\")\n",
+ " print(\" ...\")\n",
+ "else:\n",
+ " print(\"No time-series data.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAepVJREFUeJzt3Ql4lNX59/F7Jvs2gewsYacKiqyCoK0bLRSrolgBrSAiWlusihuogOBC3RFFsfXvVhdwodatvLVoqxZURK2IQmVfsxGSyb7NvNd9JjMkmQEDSSZ5Jt/Pdc01mWfOTM4kD8jz8z73sbndbrcAAAAAAAAAQWQP5jcDAAAAAAAAFKEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAJrNnXfeKTabrd6xHj16yOWXXy7txb/+9S/zM9B7AABweIRSAAAATbRmzRoTxhQUFIiVgiPvLTY2Vrp16ybnnnuuPPvss1JRUSFt0RlnnFFv3oe76ecDAABtX3hrTwAAACAUQqkFCxaYaqAOHTqIVTz55JMSHx9vQqi9e/fK//t//0+uuOIKWbx4sbzzzjuSmZl51O95xx13yOzZs1tkvrfffrtceeWVvsfr1q2TJUuWyG233Sb9+vXzHT/ppJNa5PsDAIDmRSgFAADQQElJicTFxUmou+iiiyQlJcX3eN68efLSSy/JlClT5Ne//rV8+umnR/2e4eHh5tYSfv7zn9d7HB0dbUIpPa5VVO399wkAgNWwfA8AADSZVtlMnz5dOnfuLFFRUdKzZ0+55pprpLKy0jdm27ZtJuhISkoyy8VOOeUUeffddwP24nn11Vflnnvuka5du5rg4eyzz5YtW7b4xs2cOdNU+JSWlvrNZfLkyZKRkSE1NTVHtZTtu+++k0suuUQ6duwop512mnnum2++MdVPvXr1MvPQ99VKogMHDtR7/c0332y+1s/tXUK2Y8cO35gXX3xRhg4dKjExMebzT5o0SXbv3i1t0aWXXmqqkT777DN5//33fcc//vhj8/vTZX76O9YqqhtuuEHKysp+tKdUXXoe6POPPPJIwIozfe6VV1455vkf6ffZ2N+FBlwnnniieY8zzzzTnK9dunSR+++/3+/77dmzR8aPH29Cr7S0NPMzaavLHwEAaGuolAIAAE2yb98+GT58uOmndNVVV8nxxx9vQqrXX3/dhEaRkZGSnZ0to0aNMo//8Ic/SHJysjz//PNy3nnnmXEXXHBBvff84x//KHa7XW666SYpLCw0YYCGJRqUqIkTJ8rSpUtNqKVBiZe+/9tvv22CpLCwsKP6HPo+ffv2lXvvvVfcbrc5pqGMhijTpk0zgdTGjRvlT3/6k7nXKiINPy688EL53//+Z4IUDVq8lUepqanmXsO1uXPnysUXX2zCntzcXHnsscfkZz/7mXz11VdtcrnfZZddZj7nP/7xD1910muvvWZ+vho26u/v888/N59DQxl9rrE04Dv11FNNRZYGOHXpsYSEBDn//POb/BkC/T6P5ndx8OBBGTt2rPn96ng9T2+99VYZMGCA/PKXvzRjNJDTwHTXrl3mvNZQ9i9/+Yt88MEHTZ4/AADtghsAAKAJpkyZ4rbb7e5169b5Pedyucz99ddfr6mA++OPP/Y9V1RU5O7Zs6e7R48e7pqaGnPsww8/NOP69evnrqio8I199NFHzfENGzb43rdLly7uCRMm1Pt+r776qhn30UcfNXr+8+fPN6+ZPHmy33OlpaV+x1555RW/7/HAAw+YY9u3b683dseOHe6wsDD3PffcU++4fo7w8HC/48Hi/cy5ubkBnz948KB5/oILLjjiz2LRokVum83m3rlzp99719W9e3f31KlTfY+feuopM+b777/3HausrHSnpKTUG/djXnvtNfM+et782O/zaH4Xp59+unmPF154wXdMz8eMjIx659zixYvNOD3vvEpKStx9+vTxmxcAAPDH8j0AAHDMXC6XvPnmm2bXtmHDhvk9713G9d5775lqqrrLqHT5nVZW6TI3XSZVl1YmaYWV109/+lNzr1VL3vfVShh93+LiYt+4FStWmGVWdb9PY/32t7/1O6ZLvLzKy8slLy/PLDtUX3755Y++58qVK83PSCtt9LXem1ZdaRXPhx9+KG2R/m5UUVFRwJ+F9mjSz6HVb1qFpFVGR0N/HrocUiujvLTJur7nb37zm2b5DA1/n0f7u9CfQd256Pmo57D3HFR6/nXq1Mn05vLSpX56XgMAgB/H8j0AAHDMdPmT0+k0/XeOZOfOnTJixAi/494d0/T5uu+hfYvq0r5A3iVVXrqET3eJe+utt0zvIA2nNCS4+uqrj9jT6HC0H1RD+fn5Zle95cuXS05OTr3ndFnhj/nhhx9MaKOhRyARERGHfa3249Lvfyw0QNF+ScfKG/TpUjovXaKmjdD1513399DYn0VdukxOg8yXX35Z7rrrLnNMAyoNFM866yxpDg1/n0f7u9B+Zg3PIz0Ptc+Yl563ffr08Rt33HHHNcMnAAAg9BFKAQCANudw/aC8vYGUViz16NHDNEXXUEp7SWmPHw2rjkXdSiAvrarR5tvayHzQoEGmekarbbTXkN7/GB2jgcXf//73gJ/JW5EUiH5fbbJ9LE4//XTTNP5Yffvtt+ZeAxelTeO1t5SGZNpXSfuGaWNv7R2m/bsa87NoSHf4015U+jm1T5OGXb/73e9ML7Hm0PD3ebS/i8acgwAAoGkIpQAAwDHTZt4Oh8MXYhxO9+7dZfPmzX7HN23a5Hv+WGho9Oijj5pqLV26pyGVd3ldU2k10OrVq02llFYI1a24aehwlVm9e/c2IYZW7fzkJz85qu8/cODAervfHQ1vZdmx0mbdasyYMeZ+w4YNppm7NqfXMMnrWOenNNjT80crpLSKTpuoa4P1ltKU38Xh6Hmr576+b91zINC5DgAA/BFKAQCAY6ZVLePHj5cXX3xRvvjiC7++Ut6L9XHjxpmldmvXrpWRI0f6+hLpDm8aJPXv3/+Yvr9WRenOfBqWrFq1Sq677jppLt5KmYaVMfo5GtKqIaU7ENalO7fNmTPHBFv6M6obXOj7auWR7mR3uGBp9OjREmy6pO7pp582vyfdWe5wPwv9WgPBYxUeHi6TJ0823+/777831VInnXSStJSm/C4OR89r3aFQd+bz7gKp4Zqe1wAA4McRSgEAgCa59957zYW5LhnTBs/aJ2r//v1madYnn3xi+gfNnj1bXnnlFfnlL38pf/jDH0y/Iw2Stm/fLm+88cYxL9kaMmSIWWJ2++23S0VFxTEv3QtEK8B+9rOfmdCrqqrK9DvSz6lzbmjo0KHmXucxadIk059IeyZpdc7dd99twhBt6K4BnvZp0vf461//an5eN910k7QWDVN02Zr2r9KleNps/D//+Y+p0tLfn5cu19PPonPVcfqz0d9bw95SR0urrpYsWWKajN93333SklridzFjxgx5/PHHzedYv369aXquVWba7BwAAPw4QikAANAkGtZ89tlnMnfuXLMUS5fS6TENoLwX5+np6aZ3kPYjeuyxx8xOdloVo32gzjnnnCZ9fw2i7rnnHhNOaUjVnLSK59prr5WlS5eaappf/OIXpidR586d6407+eSTTcPuZcuWmYot7V+kYYdWUGkgp8vFHnnkEVOlozIzM817nXfeedKarrnmGnOvO+GlpKSYvlnPPPOM6dEVFRXlG6chm/6uNFBctGiRGX/BBRfIzJkzTYB1rDTMO+GEE0yl1KWXXiotrbl/F3p+6xJPPUf0vNbH+jn03NfliQAA4Mhsbro1AgAAoJUMHjzYVM5puAMAANqX5tneBAAAADhK2ofs66+/rtc8HQAAtB9USgEAgJBUXFxsbkeiu795m3gjeHTHOu3B9NBDD0leXp5s27bNLAkEAADtC5VSAAAgJD344IOm8fSRbrt3727tabZL2mB92rRppoG8NsAnkAIAoH2iUgoAAIQkrb7R25GcdtppBCIAAACthFAKAAAAAAAAQcfyPQAAAAAAAARdePC/ZehxuVyyb98+SUhIEJvN1trTAQAAAAAAaDW6KK+oqEg6d+4sdvvh66EIpZqBBlKZmZmtPQ0AAAAAAIA2QzeV6dq162GfJ5RqBloh5f1hOxwOsWq1V25urtka+0gpJgAAAAAAaBmuELk2dzqdpnjHm5ccDqFUM/Au2dNAysqhVHl5uZm/lU98AAAAAACsyhVi1+Y/1uLI+p8QAAAAAAAAlkMoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAACgfYdSS5culR49ekh0dLSMGDFCPv/888OOfe6550zDrLo3fZ1XVVWV3HrrrTJgwACJi4uTzp07y5QpU2Tfvn313ic/P18uvfRS00SsQ4cOMn36dCkuLm7RzwkAAAAAANBQrrNMvt9XZO7bgzYTSq1YsUJmzZol8+fPly+//FIGDhwoY8aMkZycnMO+RoOk/fv3+247d+70PVdaWmreZ+7cueZ+5cqVsnnzZjnvvPPqvYcGUhs3bpT3339f3nnnHfnoo4/kqquuatHPCgAAAAAAUNeqr3bJ1Mf/JX9893/mXh+HOpvb7XZLG6CVUSeffLI8/vjjvm0QMzMz5dprr5XZs2cHrJS6/vrrpaCgoNHfY926dTJ8+HATXnXr1k2+//576d+/vzk+bNgwM2bVqlUybtw42bNnj6muagyn0ymJiYlSWFhogjIr0p+3BoBpaWkhse0kAAAAAABtXWV1jWzLLpKvt+fJsx9urvec3WaTF/5wpqQ6YsRqGpuThEsbUFlZKevXr5c5c+b4jmkwMnr0aFm7du1hX6fL7Lp3724ClSFDhsi9994rJ5xwwmHH6w9Dl/npMj2l761fewMppd9Tv/dnn30mF1xwQcD3qaioMLe6P2yl89CbFem8NZ+06vwBAAAAAGjLyiurZVtOkWzZ75QtWYWyJcspO3OLxXWYWiE9vvdAsSTHR4nVNDZbaBOhVF5entTU1Eh6enq94/p406ZNAV9z3HHHyTPPPCMnnXSSCZsefPBBGTVqlFmK17VrV7/x5eXlpsfU5MmTfSldVlaWqQyqKzw8XJKSksxzh7No0SJZsGCB3/Hc3FzzfaxITxj9OWowRaUUAAAAAADHrqyyRnYeKJUdeaWyM89zv7+wXALlTwnR4dKlQ7Rsyqrf39puE4msKTtiW6O2qqioyDqh1LEYOXKkuXlpINWvXz956qmn5K677qo3VpueX3zxxSZwefLJJ5v8vbWiS/tf1a2U0qWGqampll6+p1Vk+hkIpQAAAAAAaJyisipT+bQ1SyugPFVQe/NLA45Nio+SPhkO6ZOR6Lnv5JCUhGhzPf7/vt4tS977VlxuTyD1h3EnyvG9/IturKDuRnRtPpRKSUmRsLAwyc7OrndcH2dkZDTqPSIiImTw4MGyZcuWgIGU9pH64IMP6oVG+t4NE8fq6mqzI9+Rvm9UVJS5NaRhjpUDHf1DYPXPAAAAAABASykoqfAET/sL5Yf9ugSvULIKAu+Ul5YY4wug+nZKlN4ZDklOOHxY88sh3WVIrxT5bts+6d+rs6R3iBOramyu0CZCqcjISBk6dKisXr1axo8f76vc0cczZ85s1Hvo8r8NGzaYJuUNA6kffvhBPvzwQ0lOTq73Gq200kbp2s9Kv7/S4Eq/tzZeBwAAAAAA7dOBonJP76f9Tl8AlesM3LKnU8dYE0Bp+KQhlAZQHeKOvhdUqiNG+nVOsGRz82PRJkIppcvhpk6dapqO6w55ixcvlpKSEpk2bZp5fsqUKdKlSxfTz0ktXLhQTjnlFOnTp48Jlh544AFTDXXllVf6AqmLLrpIvvzyS3nnnXdMaOXtE6U9ozQI0+V+Y8eOlRkzZsiyZcvMazQEmzRpUqN33gMAAAAAANalrX40bNLQyRM+eSqh8osPbXBWV9ekOOmj4VMnh/Q1AVSiJMREBH3eoaDNhFITJ040jcLnzZtnwqNBgwbJqlWrfM3Pd+3aVa/86+DBgyZM0rEdO3Y0lU5r1qyR/v37m+f37t0rb731lvla36surZo644wzzNcvvfSSCaLOPvts8/4TJkyQJUuWBPGTAwAAAACAYAVQ2QVlJnz6IetQAFVYWuk3Vvs6dU2O91Q/dfIsweuVniBxUQRQzcXm1t8ImkQbnScmJprd66zc6Fz7a+luhPSUAgAAAABYncvtlv35pb6ld9774vJqv7Fhdpt0T02obT5eG0ClJUh0ZHBreVwhcm3e2JykzVRKAQAAAAAAHIsal1v2HCg2VU9a/aQBlO6GV1rpH0BFhNmlR5ongPJWQfVMS5DI8LBWmXt7RigFAAAAAAAso7rGJbvyig9VP+13ytZsp1RU1fiNjQy3S690bwNyz0543dMSTDCF1kcoBQAAAAAA2qTK6hrZmVvsW3qnAdS2bKdU1bj8xkZHhJld77w74Ol9ZkqchFl4GVyoI5QCAAAAAACtTiudtufo0jtnbQBVKDtyiqTa5d8KOzYq/FD/pwzPErwuSRpA2Vpl7jg2hFIAAAAAACCoyiqrTcWTBk/eEEororQ5eUMJMRGm8qluD6hOHWPFbiOAsjpCKQAAAAAA0GJKyqtMzydP/ydPI/LdecXiHz+JJMZGmuDJ1wOqU6KkJ8aIjQAqJBFKAQAAAACAZuEsqzS73nkDqB+yCmVffmnAsckJUb6ld94eUHqMAKr9IJQCAAAAAABHraCkorYBee0yvKxCyS4oCzhWq518PaA6JZqG5Enx0UGfM9oWQikAAAAAAHBEB4rKfQGUdye8PGd5wLHa78lb+dSnk8N8rcvygIYIpQAAAAAAgOF2uyXXWV7bgNwTPmkQlV9c4TdWF9l1SY7zC6DioyNaZe6wHkIpAAAAAADaaQCVVVBWpwG5J4AqLK30G2u3iWSmxNc2IPf0geqd7pDYKGIFHDvOHgAAAAAAQpzL7ZZ9+SX1ekBpCFVcXu03Nsxukx6pCb7KJw2ieqY7JDoirFXmjtBFKAUAAAAAQAipcblld16xr/JJg6itWYVSVlnjNzYizC490zSAql2Cl+GQHmkJEhlOAIWWRygFAAAAAIBFVde4ZGeuJ4Dy9oDaluWUimqX39iocLv0SnfUC6C6pSaYYApoDYRSAAAAAABYQGV1jQmgNHzyBlDbs4ukqsY/gNKldr0zHL4eUHqfmRInYXYCKLQdhFIAAAAAALQxFVU1si3b6VmCt9+zBG9HbpFZmtdQXFS4qX7SyidvAKW74tltuj8e0HYRSgEAAAAA0IrKKqtlqzYf9y7B2++UXXnFpjl5QwkxEfWqnzSIyugYSwAFSyKUAgAAAAAgSErKqzy73/kCqELZc6BE/OMnkQ5xkSZ46puR6KuESkuMERsBFEIEoRQAAAAAAC3AWVrp2/3OsxNeoezLLw04NiUh2tOAPONQI/Kk+CgCKIQ0QikAAAAAAJqooKSiTgNyp6mAyi4sCzg2PTHGV/nkXYrXMT4q6HMGWhuhFAAAAAAAjeR2uyW/2BNAbakTQuUVlQcc3zkp1oROdXtAOWIjgz5voC0ilAIAAAAA4DABVE5hma/y6YfanfAOllT4jdVFdl2T43xL7zSE6p3hkPjoiFaZO2AFhFIAAAAAgHZPA6j9B0vr94DaXyjOsiq/sXabSLeUBE/41MnhC6BiIrnEBo4Gf2IAAAAAAO2Ky+2WvQdK6jQg91RClVRU+40Ns9ukR2r9AKpnukOiI8JaZe5AKCGUAgAAAACErBqXS3bnHQqg9H5btlPKKmv8xkaE2aVneoKv/5PeuqfGS2Q4ARTQEgilAAAAAAAhoarGJbtyiw4twasNoCqqXX5jo8Lt0ivDU/nk7QGlAVR4mL1V5g60R4RSAAAAAADLqayukR05Rb7d7zSA2p5TZIKphmIiw6R3nd3vNIDKTIk3S/MAtB5CKQAAAABAm1ZeVSPbs+s2IHfKjtwiqXG5/cbGR4eb0KlPbQClQVTnpDix2wiggLaGUAoAAAAA0GaUVlTL1mxP5ZO3B9TuvGIJkD+JIybCt/TO04g8UTI6xIiNAAqwBEIpAAAAAECrKC6v8lU+eaugdFe8APmTdIyLkr61u995A6hURzQBFGBhhFIAAAAAgBbnLK2UH0wApdVPThNA7T9YGnBsiiO6TgNyzxK85ITooM8ZQMsilAIAAAAANKuDxRW+pXcmhMpySk5hWcCx6R1ipG+DHlAd4qKCPmcAwUcoBQAAAAA4Jm63Ww4UVdRpQK4BVKE5FkiXpDjP7nedPFVQvTMc4oiJDPq8AbQNhFIAAAAAgEYFUNmFZbUNyA/1gCooqfQbq12eMlPifZVPGkL1TndIXHREq8wdQNtEKAUAAAAAqMfldpt+T57+T54QSgOoorIqv7F2m026p2oApdVPniqoXukOiYnkchPAkbWpvyWWLl0qDzzwgGRlZcnAgQPlsccek+HDhwcc+9xzz8m0adPqHYuKipLy8nLf45UrV8qyZctk/fr1kp+fL1999ZUMGjSo3mvOOOMM+fe//13v2NVXX21eBwAAAAChrsbllr35Jb6ld3q/NcspJRXVfmPD7TbpkZZQ2//JE0L1THNIVERYq8wdgLW1mVBqxYoVMmvWLBMGjRgxQhYvXixjxoyRzZs3S1paWsDXOBwO87xXw61AS0pK5LTTTpOLL75YZsyYcdjvrc8tXLjQ9zg2NrZZPhMAAAAAtCU1Lpfsyi32VT79UBtAlVfV+I2NCLObiqc+Wv1UuxOeVkRFhhNAAQixUOrhhx824ZC3+knDqXfffVeeeeYZmT17dsDXaAiVkZFx2Pe87LLLzP2OHTuO+L01hDrS+wAAAACA1VTVuGRnTtGhXfCynLIt2ymV1S6/sVrp1Ls2gDI9oDISpVtKvISH2Vtl7gDahzYRSlVWVpoldnPmzPEds9vtMnr0aFm7du1hX1dcXCzdu3cXl8slQ4YMkXvvvVdOOOGEo/7+L730krz44osmmDr33HNl7ty5VEsBAAAAsIzK6hrZrgFUnR5QO3KKTDDVUGxkuNn1zhM+eXpAdU2OlzB7/ZUnANAuQqm8vDypqamR9PT0esf18aZNmwK+5rjjjjNVVCeddJIUFhbKgw8+KKNGjZKNGzdK165dG/29L7nkEhNsde7cWb755hu59dZbzZJA7Ud1OBUVFebm5XQ6zb2GY3qzIp237qZh1fkDAAAA7YUutduerbvfeZbg6fK7nXnFpjdUQ/HR4abqScMnbxDVqWOsaU5en14L+L8eQHC5QuTavLHzbxOh1LEYOXKkuXlpINWvXz956qmn5K677mr0+1x11VW+rwcMGCCdOnWSs88+W7Zu3Sq9e/cO+JpFixbJggUL/I7n5ubWa7RutRNGwz09+bVKDQAAAEDrK6uskZ0HSmVnXqnsqL3tLywXd4D8KCE6XHqkxEr3lFjpkRxrvk5JiKzfe7e6RPJyS4L6GQC0v2vzoqIi64RSKSkpEhYWJtnZ2fWO6+PG9nqKiIiQwYMHy5YtW5o0F22yrvR9DhdK6TJDbcpet1IqMzNTUlNTTfN1q574+h8r/QxWPvEBAAAAqyour/I1IN9iqqCcsi+/RALVLyXFRx2qftJKqE4OSUmI9tv8CYC1uELk2jw6Oto6oVRkZKQMHTpUVq9eLePHj/f9IvTxzJkzG/Ueuvxvw4YNMm7cuCbN5euvvzb3WjF1OFFRUebWkJ4wVj5p9MS3+mcAAAAArKCwtNL0f6rbhHz/wdKAY1Md0b7d77w74SUnNO6CD4D12ELg2ryxc28ToZTSyqOpU6fKsGHDZPjw4bJ48WIpKSnx7cY3ZcoU6dKli1k6pxYuXCinnHKK9OnTRwoKCuSBBx6QnTt3ypVXXul7z/z8fNm1a5fs27fPPNZeUUqrr/SmS/RefvllE2QlJyebnlI33HCD/OxnPzO9qgAAAACgqfKLy03lkyd88gRQOYVlAcdmdIjx7X6nDci1GqpDnP//EAeAUNBmQqmJEyeankzz5s2TrKwsGTRokKxatcrX/FzDpbpJ28GDB2XGjBlmbMeOHU2l1Zo1a6R///6+MW+99ZYv1FKTJk0y9/Pnz5c777zTVGj985//9AVgugRvwoQJcscddwT1swMAAACwPu0Bk1dU7gmfapuQ69f5xYc2SaqrS1JcvR3wNIhKiIkI+rwBoLXY3Po3J5pEe0olJiaaZmRW7imVk5MjaWlpli4RBAAAAIJBL6OyC8rkB9P/qVB+0F5Q+wvNsryG7DaRrsnxvgBK73tlOCQuigAKQGhemzc2J2kzlVIAAAAA0Ba53G7Zn19aJ4DyVEJpY/KG7DabdE+NN5VP3hCqd7pDoiO59AKAhvibEQAAAABq1bjcsvdAsa/5uLcHVGlFtd/YcLtNeqQl1DYg9yy/65WeIJHhYa0ydwCwGkIpAAAAAO1Sjcslu3KLfZVPGkBtzXJKeVWN39jIcLv0Snf4+j/1zUiU7mkJEhFm3eU1ANDaCKUAAAAAhLyqGpfsyCnyVD7pErz9Ttme45TKapff2KiIME/4lHFoCV5mSryEE0ABQLMilAIAAAAQUiqqamR7bQDl2Qmv0ARS1S7/PZ5iI8OlT6f6AVSX5HgJ0+7kAIAWRSgFAAAAwLLKK6tla7az3g54O3OLTXPyhuKjI0wApUvvvEvwOiXFmubkAIDgI5QCAAAAYAklFVWyLcvpa0Ku93sOaADlPzYxNtJX+eTdCS89MUZsBFAA0GYQSgEAAABoc4rKqnz9n7wB1N78koBjkxOizPI73xK8Tg5JSYgmgAKANo5QCgAAAECrKiipMMGTpwG5hlCFklVQFnBsWmKMqX7yVEF5Aqik+OigzxkA0HSEUgAAAACC5kBReW0Dck8IpV/nOssDju3UMbZeANU7wyEd4qKCPmcAQMsglAIAAADQ7NxutwmbPA3IPUvw9Ov84oqA47smxZneT95G5L0zEiUhJiLo8wYABA+hFAAAAIAmB1C63M4XQNX2gSosrfQba7eJZKbE1y698/SA6pWeIHFRBFAA0N4QSgEAAABoNJfbLfvyS2SLLr8zy/A8S/CKy6v9xobZbdI9NeHQEjwNoNISJDqSyxAAAKEUAAAAgMOocbllz4Hi2gooz/K7rVlOKa30D6AiwuzSIy2htv+TwwRQPdMSJDI8rFXmDgBo+wilAAAAAEh1jUt25RX7Kp+0EmprtlMqqmr8xkaG26V3uid48lZBdUtNMMEUAACNRSgFAAAAtDOV1TWyM/dQAKX327OLpKrG5Tc2OiLM7Hrn3QFP7zNT4iTMTgAFAGgaQikAAAAghGml0/Ycp/ygPaBqQ6gdOUVS7XL7jY2NCvctvdMd8PS+S5IGULZWmTsAILQRSgEAAAAhoqyyWrZle8InE0JlFZqKKG1O3lBCTISv8skbRHXqGCt2GwEUACA4CKUAAAAACyoprzI9n8wSPFMB5ZTdecXiHz+JdIiL9C2/8/aASkuMERsBFACgFRFKAQAAAG2cs6zSNB73NCDXnfAKZV9+acCxyQlRvqV33kooPUYABQBoawilAAAAgDakoKSitgF5bRVUVqFkF5QFHJueGHOoB1SnRNOQPCk+OuhzBgDgWBBKAQAAAK3kQFG5b/ndD1meSqg8Z3nAsdrvydcDqpPDfJ0YGxn0OQMA0FwIpQAAAIAW5na7JddZXtuA3FP9pJVQ+cUVfmN1kV2X5LhDPaBqA6j46IhWmTsAAC2FUAoAAABo5gAqq6CsTgNyTwBVWFrpN9ZuE+mWkuALnjSI6pXukNgo/pkOAAh9/NcOAAAAOEYut1v2HijxBU8aRG3NKpTi8mq/sWF2m/RI9QRQ3iqonukOiY4Ia5W5AwDQ2gilAAAAgEaocblkd54ngPI2ItcAqqyyxm9sRJhdeqZpAFXbAyrDIT3SEiQynAAKAAAvQikAAACggeoal+zMLa4TQBXKtiynVFS7/MZGhdvNkru6AVT31AQJD7O3ytwBALAKQikAAAC0a5XVNSaA0vDJ2wdqe06RVNX4B1AxkWHSW5uPZxzqAZWZEidhdgIoAACOFqEUAAAA2o2KqhrZlu309IDa7+kBtSO3SGpcbr+xcVHhpvpJAyhvDyjdFc9u0/3xAABAq4dSNTU18txzz8nq1aslJydHXK76/0fpgw8+aOq3AAAAAI5aWWW1bM3yBFCeCiin7MorNs3JG0qIiTDBU1+tgqoNojp1jBUbARQAAG03lLruuutMKHXOOefIiSeeyH+4AQAAEHQl5VW+3e88VVCFsudAifjHTyId4iL9Aqi0xBj+HQsAgNVCqeXLl8urr74q48aNa54ZAQAAAEfgLK2UH2qX33mroPYfLA04NiUh2tOAPONQI/Kk+CgCKAAAQiGUioyMlD59+jTPbAAAAIA6CkoqDjUg16V4+wslu7As4Nj0DjGm71PdHlAd46OCPmcAABCkUOrGG2+URx99VB5//HH+jxMAAACOidvtlgNFFb6ld94QKq+oPOD4zkmxvt3vvEGUIzYy6PMGAACtGEp98skn8uGHH8rf//53OeGEEyQiIqLe8ytXrmzqtwAAAECIBVA5hWUNekA55WBJhd9Y/V+eXZPjPOFTnQAqLrr+vzkBAEA7DKU6dOggF1xwQbNMZunSpfLAAw9IVlaWDBw4UB577DEZPnx4wLHaXH3atGn1jkVFRUl5eXm9QGzZsmWyfv16yc/Pl6+++koGDRpU7zU6Xqu9tDdWRUWFjBkzRp544glJT09vls8EAADQ3gMo7ffkW35XWwnlLKvyG2u32aR7arwneOrkWYLXK90hMZFN/icrAABog5r8X/hnn322WSayYsUKmTVrlgmRRowYIYsXLzYB0ebNmyUtLS3gaxwOh3neq+HywZKSEjnttNPk4osvlhkzZgR8jxtuuEHeffddee211yQxMVFmzpwpF154ofznP/9pls8FAADQXrjcbrPjnYZO3gbkW7OcUlJR7Tc23G6THmkJ9QKoHmkOiY4Ia5W5AwCA4Gsz/9vp4YcfNsGRt/pJwykNi5555hmZPXt2wNdoCJWRkXHY97zsssvM/Y4dOwI+X1hYKP/3f/8nL7/8spx11lm+kK1fv37y6aefyimnnNIMnwwAACD01LhcsjuvxLf8Tu+3ZTulrLLGb2xEmF16pnsCKA2f9KYVUZHhBFAAALRnzRJKvf766/Lqq6/Krl27pLKyst5zX3755Y++Xl+jS+zmzJnjO2a322X06NGydu3aw76uuLhYunfvLi6XS4YMGSL33nuv6WvVWPo9q6qqzPfxOv7446Vbt27m+xJKAQAAiFTVuGRXbtGhHlC1AVRFtctvbFS4XXrV2f1ObxpAhYfZW2XuAAAghEOpJUuWyO233y6XX365/O1vfzOVTlu3bpV169bJ73//+0a9R15entTU1Pj1cdLHmzZtCvia4447zlRRnXTSSabi6cEHH5RRo0bJxo0bpWvXro36vtq7KjIy0vTFavh99bnD0d5TevNyOp3mXsMxvVmRzlt7Plh1/gAAoHlUVtfIjpxiT+8n0wPKKdtznFJd4/YbGxMZJr3THbUNyB3m1jU5XsLs/jsy828MAADaz7W5q5Hzb3IopU3B//SnP8nkyZNN8/FbbrlFevXqJfPmzTPNxVvKyJEjzc1LAylddvfUU0/JXXfdJS1p0aJFsmDBAr/jubm59RqtW+2E0XBPT36tUgMAAKFPK512HyiVHXmlsjPPc7/3YJkEyJ8kNjJMeqTESne9JcdKz5RYSUuMMs3JfdxlciCvLKifAQCAUOIKkWvzoqKi4IRSumRPAyEVExPj+8baz0mXvz3++OM/+h4pKSkSFhYm2dnZ9Y7r4yP1jKorIiJCBg8eLFu2bGn03PW9delgQUFBvWqpH/u+usxQm7LXrZTKzMyU1NRU03zdqie+9ujSz2DlEx8AAARWWlFtltz5dsDLcsruvGJxBQigHDER9aqfdAleRocYv01lAABA83KFyLV5dHR0cEIpDW+0Ikp7O2kvJm0QPnDgQNm+fbtJ9hpDl9ANHTpUVq9eLePHj/f9IvSx7obXGLr8b8OGDTJu3LhGz12/p4ZZ+n0mTJhgjulufhq01a3CaigqKsrcGtITxsonjZ74Vv8MAABApLi8yhM87a/tAZVVKHsPlEigf5l1jIuSvp08S/D6mp3wEiXVEU0ABQBAK7GFwLV5Y+fe5FBKd6176623TJWS9pO64YYbTOPzL774Qi688MJGv49WHk2dOlWGDRsmw4cPl8WLF0tJSYlvN74pU6ZIly5dzNI5tXDhQlOJ1adPH1Pp9MADD8jOnTvlyiuv9L2nhmUaMO3bt88XOHmDNL0lJibK9OnTzfdOSkoyVU7XXnutCaRocg4AAKygsLSyNoDSHfA8VVD7D5YGHJviiPYFT31qm5EnJzTu/2QCAAA0tyaHUtpPytvAShubJycny5o1a+S8886Tq6++utHvM3HiRNOTSXtRaZPxQYMGyapVq3zNzzVcqpu0HTx4UGbMmGHGduzY0VQ96fft37+/b4yGZd5QS02aNMncz58/X+68807z9SOPPGLeVyultHn5mDFjTJ8sAACAtuZgcYWv8smEUFlOySkM3MNJl9uZ3e+0Aqo2hOoQ51/pDQAA0Fps7sauscNhaU8prbrSZmRW7imVk5MjaWlpli4RBAAgFOg/z/KKys3yOw2gvEHUgaJDu//W1SUpzlf5pCFU7wyHOGIigz5vAADQNK4QuTZvbE7S5Eop9fHHH5td77Zu3WqW7ukyu7/85S/Ss2dPOe2005rjWwAAAIRsAJVdWFa7/M7TgFwDqIKSSr+x2uUpMyXeL4CKi4polbkDAAA0RZNDqTfeeMPstHfppZfKV199ZZbAKU3D7r33Xnnvvfea+i0AAABCgsvtNv2eGgZQRWVVfmPtNpt0T42vbUDuaUTeK90hMZHN8v8UAQAAWl2T/1Vz9913y7Jly0wj8uXLl/uOn3rqqeY5AACA9qjG5Za9+SW1vZ88PaA0hCqtqPYbG263SY+0hNoG5J4eUD3TEiQqIqxV5g4AAGCJUEp3tPvZz37md1zXDuqueAAAAKGuxuWSXbnFvsonrYLamuWU8qoav7ERYXZT8dSnU+0SvIxEUxEVGU4ABQAA2pcmh1IZGRmyZcsW6dGjR73jn3zyifTq1aupbw8AANCmVNW4ZGdOUZ0G5E7Zlu2UymrPbsR1aaVT73Rv/yeHCaC6pcRLeJh1G5cCAAC0mVBqxowZct1118kzzzwjNptN9u3bJ2vXrpWbbrpJ5s6d2zyzBAAAaAWV1TWyPafIEz7VBlA7copMMNVQbGS4aTruqX7y9IDqmhwvYXZtTw4AAIBmD6Vmz55ttiw8++yzpbS01Czli4qKMqHUtdde29S3BwAACIryymrZ1iCA2plbZHpDNRQfHeFZfpdxqAdUp6RY05wcAAAAjWNz6z7EzaCystIs4ysuLpb+/ftLfHy8tBdOp9P00NIdBx0Oh1iRBos5OTmSlpYmdjtLCgAAoU2bjW/V5XfaA6p2J7w9B4olQP4kibGRtQ3IPVVQGkSld4gxFeIAAADNyRUi1+aNzUmOuVLqiiuuaNQ4XdYHAADQWorKqmoDKK2A8oRQe/JLAo5Nio8yAZSpgKrtAZXqiCaAAgAAaAHHHEo999xz0r17dxk8eLA0U7EVAABAkxSWVvoqn7QRuS7B23+wNOBYDZu8u995A6jkhOigzxkAAKC9OuZQ6pprrpFXXnlFtm/fLtOmTZPf/OY3kpSU1LyzAwAAOIz84vLa/k9O3054uc7ygGM7dYz1NB+v7f+kDck7xEUFfc4AAABopp5SFRUVsnLlSrNEb82aNXLOOefI9OnT5Re/+EW7KnOnpxQAAC1H/6miYZOpfNrvrF2GVyj5xRUBx3dNiqvXA6p3RqIkxEQEfd4AAADt9drc2dI9pZTusjd58mRz27lzp1nS97vf/U6qq6tl48aN7arZOQAAaJ4AKrugzARPniV4nh5QuiyvIbtNpGtyvGcJnukD5ZBeGQ6JiyKAAgAAsIImhVJ1aYKn1VH6j8mamprmelsAABCiXG637M8v9VU+eRuRF5dX+Y2122zSPfVQAKVVUL3THRId2Wz/lAEAAECQNelfcnWX733yySfyq1/9Sh5//HEZO3aspcvMAABA86pxuWXvgeJD1U+1TchLK6r9xobbbdIjLeFQBVSnROmZliCR4WGtMncAAAC0sVBKl+ktX75cMjMz5YorrjBNz1NSUpp3dgAAwHJqXC7ZmVt8qAfU/kLZmu2Uiir/SurIcLv0StcG5I7aJXiJ0j0tQSLC+J9bAAAAoe6YQ6lly5ZJt27dpFevXvLvf//b3ALRSioAABCaqmpcsiOnqDaA0j5QTtme45TKapff2OiIMLPrnXcHPA2iuqXGSxjV1QAAAO3SMYdSU6ZMaVc77AEA0N5ppdP22gDKLMPbX2gCqWqX/0a+sVHh9aqf9OsuyRpA8W8HAAAANDGU0p32AABAaCqvrDZL7jwNyD33uiRPm5M3FB8d4at88vaA6tQx1jQnBwAAAA6HLWsAAGjnSiqqZGtt8KTNx7UKandesfjHTyKJsZG+AMrbiDw9MYbqaQAAABw1QikAANoRZ1mlL4Dy7oS3N78k4NjkhKg6/Z80gHJISkI0ARQAAACaBaEUAAAhqqCkwlf55KmCKpSsgrKAY9MSYw5VP9UGUEnx0UGfMwAAANoPQikAAELAgaLy2gbk3j5QhZLnLA84Vvs9eSqgPDvh6RI8XZYHAAAABBOhFAAAFuJ2uyXXWe4LnrQSSr/OL67wG6uL7Lokx/kqn7QKqnd6oiTERLTK3AEAAIC6CKUAAGjDAZQut/MFULU9oApLK/3G2m0imSnxh3pAmQDKIbFR/KceAAAAbRP/UgUAoA1wud2yL79Etux31gmgCqW4vNpvbJjdJt1TE+otv+uV7pDoiLBWmTsAAABwLAilAAAIshqXW/YcKPbtfqcBlO6IV1rpH0BFhNmlR1pCbQNyhwmgeqYlSGQ4ARQAAACsjVAKAIAWVF3jkl153gBKK6CcsjXbKRVVNX5jI8PtZsmdBk/enfC6pSaYYAoAAAAINYRSAAA0k8rqGtmZeyiA0vvt2UVSVePyG6tL7XrXBk/ePlCZKXESZieAAgAAQPtAKAUAwDHQSqftOU75Yb9n+Z2GUDtyiqTa5fYbq83GvZVP3h5QXZI0gNL98QAAAID2iVAKAIAfUVZZLduyNYDyLL/TAEororQ5eUMJMRGHwqfaICqjY6zYbQRQAAAAQF2EUgAA1FFSXuVpPl67A54GUXsOlIh//CTSIS6y3vI7DaHSEmPERgAFAAAA/ChCKQBAu+Usq/RVPnn7QO3LLw04NiUh2rf7nTeISk6IIoACAAAAjhGhFACgXSgoqagNnmqX4WUVSnZBWcCx6YkxfgFUx/iooM8ZAAAACGWEUgCAkOJ2uyW/uDaA0uV3tUvx8pzlAcd36hhbpwG5Q/pmJIojNjLo8wYAAADaG0IpAIClA6hcZ7kvgPIsw3PKwZIKv7G6yK5Lcly9HlC9MxwSHx3RKnMHAAAA2rs2FUotXbpUHnjgAcnKypKBAwfKY489JsOHDw849rnnnpNp06bVOxYVFSXl5eX1Llbmz58vf/7zn6WgoEBOPfVUefLJJ6Vv376+MT169JCdO3fWe59FixbJ7Nmzm/3zAQCOnf6dvv9gqacJuamA8gRRzrIqv7F2m0i3lART+eQNoHqlOyQ2qk39Zw8AAABo19rMv85XrFghs2bNkmXLlsmIESNk8eLFMmbMGNm8ebOkpaUFfI3D4TDPezVsNnv//ffLkiVL5Pnnn5eePXvK3LlzzXt+9913Eh0d7Ru3cOFCmTFjhu9xQkJCi3xGAEDjuNxu2XugxLMDnrcH1P5CKamo9hsbZrdJj1RPAOWtguqZ7pDoiLBWmTsAAAAAi4VSDz/8sAmGvNVPGk69++678swzzxy2aklDqIyMjMP+H3UNtu644w45//zzzbEXXnhB0tPT5c0335RJkybVC6EO9z4AgJZV43LJ7jxPAOVtRL41q1DKKmv8xkaE2aVnWkKdBuQO6ZGWIJHhBFAAAACA1bSJUKqyslLWr18vc+bM8R2z2+0yevRoWbt27WFfV1xcLN27dxeXyyVDhgyRe++9V0444QTz3Pbt280yQH0Pr8TERFOFpe9ZN5T64x//KHfddZd069ZNLrnkErnhhhskPPzwP5qKigpz83I6neZe56E3K9J5a5Bn1fkDsIbqGpfsyis24dNW04DcKduynVJR7f93T1S43Sy50+BJez9pCNUtJV7Cw+x+Y/m7CwAAAKHAFSLX5o2df5sIpfLy8qSmpsZUMdWljzdt2hTwNccdd5ypojrppJOksLBQHnzwQRk1apRs3LhRunbtagIp73s0fE/vc+oPf/iDCbSSkpJkzZo1Jhjbv3+/qdw6HO05tWDBAr/jubm59XpaWe2E0Z+jnvwaCAJAU1XVuGRPfpnsyCuVnXml5n53fplUu9x+Y6Mj7NItOVZ6pHhu3ZNjpVOHaLM075ByyT9gzb9jAQAAgPZ0bV5UVGSdUOpYjBw50ty8NJDq16+fPPXUU6bqqbG0j5WXBlyRkZFy9dVXm+BJG6cHosFV3ddppVRmZqakpqaaPldWPfF1OaR+Biuf+ABaR3lVjWzPccrW/U75wVRAFcrO3GKpCRBAxUWFm+onXYJn7jMc0jkpTuwN+gICAAAA7Y0rRK7N6/bxbvOhVEpKioSFhUl2dna94/q4sb2eIiIiZPDgwbJlyxbz2Ps6fY9OnTrVe89BgwYd9n10eV91dbXs2LHDVGMFomFVoMBKTxgrnzR64lv9MwBoeWWV1Wbpnaf/kzYgd5oledqcvCFHTISn/1NGoi+E6tQx1m9jCgAAAAChc23e2Lm3iVBKq5OGDh0qq1evlvHjx/vSQX08c+bMRr2HLv/bsGGDjBs3zjzW3fY0mNL38IZQWtH02WefyTXXXHPY9/n666/ND+9wO/4BQHtSUl51aPe72kbkuiuef/wk0jEuSvp20sonTwClPaBSHdEEUAAAAADabiildDnc1KlTZdiwYTJ8+HCzc15JSYlvN74pU6ZIly5dzLI6tXDhQjnllFOkT58+UlBQIA888IDs3LlTrrzySvO8XgRdf/31cvfdd0vfvn1NSDV37lzp3LmzL/jShucaUp155plmBz59rE3Of/Ob30jHjh1b8acBAMHnLK2UH2orn7wB1P6DpQHHpjiiTfjUt3YZngZQSfFRBFAAAAAArBdKTZw40TQKnzdvnmlErtVNq1at8jUq37VrV73yr4MHD8qMGTPMWA2QtNJKG5X379/fN+aWW24xwdZVV11lgqvTTjvNvKd3baMuwVu+fLnceeedZjc9Da40lKrbLwoAQtHB4gpf8LTFVEE5JbuwLODY9A4xngDK1wMqUTrGB+65BwAAAACNZXNrS3c0iS4LTExMNB3yrdzoPCcnxyxbtPK6VQD16V/xB4r8A6i8osC72HVOiq0TQOkyPIc4YiKDPm8AAACgPXKFyLV5Y3OSNlMpBQBoegCVU1hW2//p0BK8gpJKv7G6yK5rcpwnfPIGUBkOiYuOaJW5AwAAAGh/CKUAwKIBlPZ7qhtAaRWUs6zKb6zdZpPuqfG+yicNonqlOyQmkv8EAAAAAGg9XJEAQBvncrtlz4GS2qV3nuqnrVlOKamo9hsbbrdJj7SEOjvgOaRnmkOiIsJaZe4AAAAAcDiEUgDQhtS4XLI7r6S2AsoTQG3LdkpZZY3f2Igwu/RMT/D1f9J7rYiKDCeAAgAAAND2EUoBQCupqnHJrtyiQ0vwagOoimqX31itdOqd7jDL77wBVLeUeAkPs27zQwAAAADtG6EUAARBZXWNbM8p8u1+p0HUjpwiE0w1FBsZLr0zNIDyNB/XAKprcryE2bU9OQAAAACEBkIpAGhm5VU1puJJAyhvFdTO3CKpcbn9xsZHhx/q/1TbiLxzUpxpTg4AAAAAoYxQCgCaoLSiWrbWC6AKZXdesQTIn8QRE+Hp/+QLoBIlo0OM2AigAAAAALRDhFIA0EjF5VW+5uNb9jvN13sPlEiA/EmS4qMOLb+rDaBSHdEEUAAAAABQi1AKAAIoLK2s7f+kIZQngNp/sDTg2BRHtC946lvbiDw5ITrocwYAAAAAKyGUAtDu5ReX+yqfvD2gcgrLAo7V5Xa+HlC1lVAd4qKCPmcAAAAAsDpCKQDthtvtlryihgFUoRwoqgg4vktSnG/3Ow2hdEc8R0xk0OcNAAAAAKGIUApAyAZQ2YVl9XbA0wCqoKTSb6x2ecpMifdVPnkDqLioiFaZOwAAAAC0B4RSACzP5Xabfk+eBuSHAqiisiq/sXabTbqnxtfugOcJoHqlOyQmkr8OAQAAACCYuAoDYCk1LrfsPVBsgqcfsg6FUKUV1X5jw+026ZGWUKf/U6L0TEuQqIiwVpk7AAAAAOAQQikAbVaNyyW7cmsDqNr+T1uznFJeVeM3NiLMbiqezO53tQGUBlJ6HAAAAADQ9hBKAWgTqmpcsjOnqF7107Zsp1RWu/zGaqVT73RvA3KHCaC6pcRLOAEUAAAAAFgGoRSAoKusrpHtGkDV9oDS+x05RVLtcvuNjY0M9wVP3p3wuiTHS5hd25MDAAAAAKyKUApAiyqvrJat2dp43OkLoHbmFpvm5A3FR0eYAKqvBlCmEXmidEqKNc3JAQAAAAChhVAKQLMpqaiSbaYB+aEAas8BDaD8xybGRtbbAU8DqPQOMWIjgAIAAACAdoFQCsAxKSqrkq1ZnuDJWwW1J78k4Nik+Chf8ORdipfqiCaAAgAAAIB2jFAKwI8qLK309X/SHfD066yCsoBjNWwyDcjrBFDJCdFBnzMAAAAAoG0jlAJQT35xeW0A5fQFULnO8oBjO3WMNc3HNXjSIKp3hkM6xEUFfc4AAAAAAOshlALaKbfbbcImDZ40gPrB3BdKfnFFwPFdk+LMEjxvI/LeGYmSEBMR9HkDAAAAAEIDoRTQTgKo7IIyEzzV7QGly/IasttEuibHe5bg1TYi75XhkLgoAigAAAAAQPMhlAJCjMvtlv35pbXhU2FtBZRTisur/MaG2W3SPTXBswRPA6hOidIrLUGiI/mrAQAAAADQsrjyBCysxuWWPQeKaxuQO00QtTXLKaWV1X5jI8Ls0iOtfgDVMy1BIsPDWmXuAAAAAID2jVAKsIjqGpfsyiv2NR/X6qet2U6pqKrxGxsZbpde6dqA3OHbCa97WoIJpgAAAAAAaAsIpYA2qLK6Rnbm1g+gtuc4pbLa5Tc2OiLM7HrnDZ80iOqWGi9hdgIoAAAAAEDbRSgFtDKtdNqeU+TrAaVL8XbkFEm1y+03NjYq/NDyOw2gOiVKl6Q40xsKAAAAAAArIZQCgqi8stosudPg6YfaHfC0IkqbkzcUHx1RW/1UWwXVKVE6dYwVu40ACgAAAABgfYRSQAspqagyTcdNAFXbiHx3XrH4x08iibGRfgFUemKM2AigAAAAAAAhilAKaAbOskq/AGpvfknAsckJUab3k68HVCeHpCREE0ABAAAAANoVQingKBWUVJjQydOA3NMHKqugLODYtMQY6VvbA8obQCXFRwd9zgAAAAAAtDWEUsARHCgq91U+efpAFUqeszzgWO335KmAOhRC6bI8AAAAAADQxkOppUuXygMPPCBZWVkycOBAeeyxx2T48OEBxz733HMybdq0eseioqKkvPxQYOB2u2X+/Pny5z//WQoKCuTUU0+VJ598Uvr27esbk5+fL9dee628/fbbYrfbZcKECfLoo49KfHx8C35StDV6ruQ6y33Bk6cCyin5xRV+Y3WRXZfkuENL8Do5pHd6oiTERLTK3AEAAAAAsKI2E0qtWLFCZs2aJcuWLZMRI0bI4sWLZcyYMbJ582ZJS0sL+BqHw2Ge92rYk+f++++XJUuWyPPPPy89e/aUuXPnmvf87rvvJDras4Tq0ksvlf3798v7778vVVVVJui66qqr5OWXX27hT4zWDKB0ud2h/k+eAKqwtNJvrN0mkpkSX6f/U6L0TndIbFSb+aMDAAAAAIAl2dx6hd4GaBB18skny+OPP24eu1wuyczMNFVMs2fPDlgpdf3115sKqED0Y3Xu3FluvPFGuemmm8yxwsJCSU9PN6+dNGmSfP/999K/f39Zt26dDBs2zIxZtWqVjBs3Tvbs2WNe3xhOp1MSExPN+2tQZkX6887JyTEBoFaMhQqX2y378kvqLcHTEKq4vNpvbJjdJt1TEzzL72oDqF7pDomOCGuVuQMAAAAA2hdXiFybNzYnaRPlHpWVlbJ+/XqZM2eO75j+8EePHi1r16497OuKi4ule/fu5pc2ZMgQuffee+WEE04wz23fvt0sA9T38NIfiIZf+p4aSul9hw4dfIGU0vH6vT/77DO54IILAn7fiooKc6v7w1Y6D71Zkc5bgzyrzl/VuNyy50CxJ3yqDaC2ZjulrLLGb2x4mE16pmn45L0lSo+0eIkM9w+grPwzAQAAAABYhysErs1VY+ffJkKpvLw8qampMVVMdenjTZs2BXzNcccdJ88884ycdNJJJnl78MEHZdSoUbJx40bp2rWrCaS879HwPb3P6X3DpYHh4eGSlJTkGxPIokWLZMGCBX7Hc3Nz6/W0stoJoz9HPfmtkMZWu9yy72CZ7MgrlZ15peZ+V36ZVFb7n/iRYTbJTI6VHimeW/eUWOnSIVrCw+p+zgopyPfvHwUAAAAAQLC4LHZtfjhFRUXWCaWOxciRI83NSwOpfv36yVNPPSV33XVXi35vrejS/ld1K6V0qWFqaqqll+9pTy79DG3txK+srpGdubUVULXL8LbnFElVjX8ApUvtetepftL7zJQ4CWtjnwkAAAAAACtdmx8Nbx9vS4RSKSkpEhYWJtnZ2fWO6+OMjIxGvUdERIQMHjxYtmzZYh57X6fv0alTp3rvOWjQIN8YXatZV3V1tdmR70jfV3f501tDesJY+aTRE7+1P0NFVY1sz3F6ekDt12V4hbIjp8hURjWkzcY1dKrbhLxLkgZQ9RveAwAAAABgFbY2cG3eVI2de5sIpSIjI2Xo0KGyevVqGT9+vC8d1MczZ85s1Hvo8r8NGzaYJuVKd9vTYEnfwxtCaUWT9oq65pprzGOttNJG6drPSr+/+uCDD8z31t5TaFllldWy1fR/8gRQGkTtyis2zckbSoiJOBQ+1QZRGR1jxd5gx0UAAAAAAGANbSKUUrocburUqabp+PDhw2Xx4sVSUlIi06ZNM89PmTJFunTpYvo5qYULF8opp5wiffr0McHSAw88IDt37pQrr7zSlyzq7nx333239O3b14RUc+fONTvqeYMvXe43duxYmTFjhixbtkyqqqpMCKZN0Bu78x4ap6S8qrYBuQZQhSaA2nOgRAJt/dghLtIXQHnuHZKWGGN+pwAAAAAAIDS0mVBq4sSJplH4vHnzTJNxrW5atWqVr1H5rl276pV/HTx40IRJOrZjx46m0mnNmjXSv39/35hbbrnFBFtXXXWVCa5OO+0085511za+9NJLJog6++yzzftPmDBBlixZEuRPH1qcpZW+AMosw8sqlH35pQHHpiREH1qCVxtEJSdEEUABAAAAABDibG5t6Y4m0WWBiYmJpkO+lRuda38t3Y3waNatFpRUmODJEz55gqjsgrKAY9MTY2qDp0N9oDrG+/fmAgAAAACgPXId47W5VXOSNlMphbZNs8v8Yk8AZZbf1e6El1dUHnB856TY2v5Ph5bgOWIjgz5vAAAAAADQNhFKwch1lsn3+4rEFp0gaYmxklNY5ql8MgGUpxH5wZIKv9fpIruuyXG+pXcaQPXOcEh8dESrfA4AAAAAAGANhFKQVV/tksXvbKhtOv4/iY4Ik/KqGr9xdptIt5QE6dPp0PK7XukOiY3iNAIAAAAAAEeHNKGd0wqpR9/1BlIeGkhpANUzzduA3GECqJ7pDhNYAQAAAAAANBWhVDu3N79EXAFa3d89+WQZ2jutNaYEAAAAAADaAeu2ckez6JIUZ6qi6rLbbNItNaG1pgQAAAAAANoBQql2LtURI9edM8AXTOn9deecaI4DAAAAAAC0FJbvQcYO7iaDeybLd9v2Sf9enSW9Q1xrTwkAAAAAAIQ4KqVgaGVUv84JVEgBAAAAAICgIJQCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0NDpvBm6329w7nU6xKpfLJUVFRRIdHS12O1klAAAAAADBFirX5t58xJuXHA6hVDPQE0ZlZma29lQAAAAAAADaTF6SmJh42Odt7h+LrdCoJHPfvn2SkJAgNptNrJpiaqi2e/ducTgcrT0dAAAAAADaHWeIXJtr1KSBVOfOnY9Y8UWlVDPQH3DXrl0lFOhJb+UTHwAAAAAAq3OEwLX5kSqkvKy7QBEAAAAAAACWRSgFAAAAAACAoCOUghEVFSXz58839wAAAAAAIPii2tm1OY3OAQAAAAAAEHRUSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEXXjwv2Xocblcsm/fPklISBCbzdba0wEAAAAAAGg1brdbioqKpHPnzmK3H74eilCqGWgglZmZ2drTAAAAAAAAaDN2794tXbt2PezzhFLNQCukvD9sh8MhVq32ys3NldTU1COmmAAAAAAAoGW4QuTa3Ol0muIdb15yOIRSzcC7ZE8DKSuHUuXl5Wb+Vj7xAQAAAACwKleIXZv/WIsj639CAAAAAAAAWA6hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEHaEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgsF0otXbpUevToIdHR0TJixAj5/PPPjzj+tddek+OPP96MHzBggLz33nuHHfvb3/5WbDabLF68uAVmDgAAAAAAAEuGUitWrJBZs2bJ/Pnz5csvv5SBAwfKmDFjJCcnJ+D4NWvWyOTJk2X69Ony1Vdfyfjx483t22+/9Rv717/+VT799FPp3LlzED4JAAAAAABA+2apUOrhhx+WGTNmyLRp06R///6ybNkyiY2NlWeeeSbg+EcffVTGjh0rN998s/Tr10/uuusuGTJkiDz++OP1xu3du1euvfZaeemllyQiIiJInwYAAAAAAKD9skwoVVlZKevXr5fRo0f7jtntdvN47dq1AV+jx+uOV1pZVXe8y+WSyy67zARXJ5xwQgt+AgAAAAAAAHiFi0Xk5eVJTU2NpKen1zuujzdt2hTwNVlZWQHH63Gv++67T8LDw+UPf/hDo+dSUVFhbl5Op9MXcOnNinTebrfbsvMHAAAAAMDqXCFybd7Y+VsmlGoJWnmlS/y0P5U2OG+sRYsWyYIFC/yO5+bmSnl5uVj1hCksLDQnv1agAQAAAACA4HKFyLV5UVFRaIVSKSkpEhYWJtnZ2fWO6+OMjIyAr9HjRxr/8ccfmybp3bp18z2v1Vg33nij2YFvx44dAd93zpw5puF63UqpzMxMSU1NFYfDIVY98TWY089g5RMfAAAAAACrcoXItXl0dHRohVKRkZEydOhQWb16tdlBz/vL0sczZ84M+JqRI0ea56+//nrfsffff98cV9pLKlDPKT2uzdQPJyoqytwa0hPGyieNnvhW/wwAAAAAAFiZLQSuzRs7d8uEUkqrk6ZOnSrDhg2T4cOHm2qmkpISX4A0ZcoU6dKli1lep6677jo5/fTT5aGHHpJzzjlHli9fLl988YX86U9/Ms8nJyebW126+55WUh133HGt8AkBAAAAAADaB0uFUhMnTjR9m+bNm2ealQ8aNEhWrVrla2a+a9euemncqFGj5OWXX5Y77rhDbrvtNunbt6+8+eabcuKJJ7bipwAAAAAAAIDNrd2z0CTaUyoxMdE0I7NyTyntr5WWlmbpEkEAAAAAAKzKFSLX5o3NSaz7CQEAAAAAAGBZhFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEHaEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAILOcqHU0qVLpUePHhIdHS0jRoyQzz///IjjX3vtNTn++OPN+AEDBsh7773ne66qqkpuvfVWczwuLk46d+4sU6ZMkX379gXhkwAAAAAAALRflgqlVqxYIbNmzZL58+fLl19+KQMHDpQxY8ZITk5OwPFr1qyRyZMny/Tp0+Wrr76S8ePHm9u3335rni8tLTXvM3fuXHO/cuVK2bx5s5x33nlB/mQAAAAAAADti83tdrvFIrQy6uSTT5bHH3/cPHa5XJKZmSnXXnutzJ4922/8xIkTpaSkRN555x3fsVNOOUUGDRoky5YtC/g91q1bJ8OHD5edO3dKt27dGjUvp9MpiYmJUlhYKA6HQ6xIf5Ya7qWlpYndbqmsEgAAAACAkOAKkWvzxuYklvmElZWVsn79ehk9erTvmP6C9PHatWsDvkaP1x2vtLLqcOOV/sBsNpt06NChGWcPAAAAAACAusLFIvLy8qSmpkbS09PrHdfHmzZtCviarKysgOP1eCDl5eWmx5Qu+TtSkldRUWFudRNAb6KpNyvSeWvRnFXnDwAAAACA1blC5Nq8sfO3TCjV0rTp+cUXX2x++U8++eQRxy5atEgWLFjgdzw3N9cEW1Y9YbRKTD+/lUsEAQAAAACwKleIXJsXFRWFViiVkpIiYWFhkp2dXe+4Ps7IyAj4Gj3emPHeQEr7SH3wwQc/2hdqzpw5puF63Uop7W2Vmppq6Z5SumxRP4OVT3wAAAAAAKzKFSLX5tHR0aEVSkVGRsrQoUNl9erVZgc97y9LH8+cOTPga0aOHGmev/76633H3n//fXO8YSD1ww8/yIcffijJyck/OpeoqChza0hPGCufNHriW/0zAAAAAABgZbYQuDZv7NwtE0oprU6aOnWqDBs2zOyQt3jxYrO73rRp08zzU6ZMkS5dupjldeq6666T008/XR566CE555xzZPny5fLFF1/In/70J18gddFFF8mXX35pdujTnlXeflNJSUkmCAMAAAAAAEDzs1QoNXHiRNO3ad68eSY8GjRokKxatcrXzHzXrl310rhRo0bJyy+/LHfccYfcdttt0rdvX3nzzTflxBNPNM/v3btX3nrrLfO1vlddWjV1xhlnBPXzAQAAAAAAtBc2t3bPQpNoT6nExETTjMzKPaVycnIkLS3N0iWCAAAAAABYlStErs0bm5NY9xMCAAAAAADAsgilAAAAAAAAEHSEUgAAAAAAALBGKFVQUCBPP/20zJkzR/Lz880x3cFOG4cDAAAAAAAAzb773jfffCOjR482Dat27NghM2bMkKSkJFm5cqXZ/e6FF1442rcEAAAAAABAO3PUlVKzZs2Syy+/XH744QeJjo72HR83bpx89NFHzT0/AAAAAAAAhKCjDqXWrVsnV199td/xLl26SFZWVnPNCwAAAAAAACHsqEOpqKgocTqdfsf/97//SWpqanPNCwAAAAAAACHsqEOp8847TxYuXChVVVXmsc1mM72kbr31VpkwYUJLzBEAAAAAAADtPZR66KGHpLi4WNLS0qSsrExOP/106dOnjyQkJMg999zTMrMEAAAAAABA+959T3fde//99+U///mP/Pe//zUB1ZAhQ8yOfAAAAAAAAECLhFIvvPCCTJw4UU499VRz86qsrJTly5fLlClTjvYtAQAAAAAA0M4c9fK9adOmSWFhod/xoqIi8xwAAAAAAADQ7KGU2+02zc0b2rNnj1naBwAAAAAAADTb8r3BgwebMEpvZ599toSHH3ppTU2NbN++XcaOHdvYtwMAAAAAAEA71uhQavz48eb+66+/ljFjxkh8fLzvucjISOnRo4dMmDChZWYJAAAAAACA9hlKzZ8/39xr+KSNzqOjo1tyXgAAAAAAAAhhR7373tSpU1tmJgAAAAAAAGg3jjqU0v5RjzzyiLz66quya9cuqaysrPd8fn5+c84PAAAAAAAAIeiod99bsGCBPPzww2YJX2FhocyaNUsuvPBCsdvtcuedd7bMLAEAAAAAANC+Q6mXXnpJ/vznP8uNN95oduCbPHmyPP300zJv3jz59NNPW2aWAAAAAAAAaN+hVFZWlgwYMMB8rTvwabWU+tWvfiXvvvtu888QAAAAAAAAIeeoQ6muXbvK/v37zde9e/eWf/zjH+brdevWSVRUVPPPEAAAAAAAACHnqEOpCy64QFavXm2+vvbaa2Xu3LnSt29fmTJlilxxxRUtMUcAAAAAAAC09933/vjHP/q+1mbn3bt3lzVr1phg6txzz23u+QEAAAAAACAEHXUo1dApp5xibuqLL76QYcOGNce8AAAAAAAAEMKOevlecXGxlJWV1Tv29ddfmyqpESNGNOfcAAAAAAAA0N5Dqd27d8vIkSMlMTHR3GbNmiWlpaWml5SGUXFxcWYZHwAAAAAAANBsy/duvvlmKS8vl0cffVRWrlxp7j/++GMTSG3dutXsygcAAAAAAAA0a6XURx99JE8++aTMnDlTli9fLm63Wy699FJ5/PHHgxpILV26VHr06CHR0dEmEPv888+POP61116T448/3owfMGCAvPfee/We188xb9486dSpk8TExMjo0aPlhx9+aOFPAQAAAAAA0L41OpTKzs6Wnj17mq/T0tIkNjZWfvnLX0owrVixwiwbnD9/vnz55ZcycOBAGTNmjOTk5AQcr8sJJ0+eLNOnT5evvvpKxo8fb27ffvutb8z9998vS5YskWXLlslnn31mliHqe2pVWLviPCCRezebewAAAAAA0Aqc7eva3ObWUqFGCAsLk6ysLElNTTWPHQ6H/Pe///UFVcGglVEnn3yyqc5SLpdLMjMz5dprr5XZs2f7jZ84caKUlJTIO++84zumOwUOGjTIhFD60Tt37iw33nij3HTTTeb5wsJCSU9Pl+eee04mTZrUqHk5nU7TZ0tfqz8Xy1n/vrjfeVJPBnHbbGL75QyRQWe29qwAAAAAAGg/vv5Q3H//86Fr83N/JzJktFhRY3OSRveU0gDnJz/5idhsNt8ufIMHDxa7vX6xVX5+vrSEyspKWb9+vcyZM8d3TL+3Lrdbu3ZtwNfoca2sqkuroN58803z9fbt203Qpu/hpT80Db/0tYcLpSoqKsyt7g/bG5LpzVKcB8RWG0gpc//enzw3AAAAAAAQNDbvvQZTbz8p7l4DRRzJYjWNzUYaHUo9++yz0pry8vKkpqbGVDHVpY83bdoU8DUaOAUar8e9z3uPHW5MIIsWLZIFCxb4Hc/NzbXcsj8tC0xqXLEcAAAAAAAIEpvbJQe3fi+VXX4iVlNUVNS8odTUqVObMp+QotVadSuwtFJKlxHq0kbLLd+LDvOUBdYJptw2u7ivWWzJNBYAAAAAAMtxHhDbk9f5XZt36N3Pktfmutlcs4ZSrS0lJcX0tdKG63Xp44yMjICv0eNHGu+912O6+17dMdp36nCioqLMrSFdTthwOWOb1yFV5NzfmbJATWH1pLede43Y0jJbe2YAAAAAALQP0bGBr831mt2CGpuNWCZBiYyMlKFDh8rq1avrrVHUxyNHjgz4Gj1ed7x6//33feO1SbsGU3XHaNWT7sJ3uPcMSUNGi/u6ZZJ/7g3m3qqN1AAAAAAAsKwh7e/a3DKVUkqXzOkywmHDhsnw4cNl8eLFZne9adOmmeenTJkiXbp0MT2f1HXXXSenn366PPTQQ3LOOefI8uXL5YsvvpA//cnTxFubtl9//fVy9913S9++fU1INXfuXLMj3/jx46VdcSR71qlasCwQAAAAAICQ4Ghf1+aWCqUmTpxomonPmzfPNCLXJXarVq3yNSrftWtXvRKxUaNGycsvvyx33HGH3HbbbSZ40p33TjzxRN+YW265xQRbV111lRQUFMhpp51m3rOx6x8BAAAAAABw9GxuN1uvNZUu+UtMTJTCwkLrNTqvsxQyJydH0tLSrNcXCwAAAACAEOAKkWvzxuYkjaqUqrvT3I95+OGHGz0WAAAAAAAA7VOjQqmvvvqqUW+mPZoAAAAAAACAZgmlPvzww8YMAwAAAAAAABrFugsUAQAAAAAAENqVUhdeeGGj33DlypVNmQ8AAAAAAADagUaFUtoxHQAAAAAAAAhqKPXss8822zcEAAAAAAAA6CkFAAAAAACAtlkpVVfPnj3FZrMd9vlt27Y1dU4AAAAAAAAIcUcdSl1//fX1HldVVclXX30lq1atkptvvrk55wYAAAAAAIAQddSh1HXXXRfw+NKlS+WLL75ojjkBAAAAAAAgxDVbT6lf/vKX8sYbbzTX2wEAAAAAACCENVso9frrr0tSUlJzvR0AAAAAAABC2FEv3xs8eHC9Rudut1uysrIkNzdXnnjiieaeHwAAAAAAAELQUYdS48ePr/fYbrdLamqqnHHGGXL88cc359wAAAAAAAAQoo46lJo/f37LzAQAAAAAAADtRrP1lAIAAAAAAACavVJKl+nV7SUViD5fXV3d6G8OAAAAAACA9qnRodRf//rXwz63du1aWbJkibhcruaaFwAAAAAAAEJYo0Op888/3+/Y5s2bZfbs2fL222/LpZdeKgsXLmzu+QEAAAAAACAEHVNPqX379smMGTNkwIABZrne119/Lc8//7x07969+WcIAAAAAACA9h1KFRYWyq233ip9+vSRjRs3yurVq02V1IknnthyMwQAAAAAAED7Xb53//33y3333ScZGRnyyiuvBFzOBwAAAAAAADSGze12uxu7+15MTIyMHj1awsLCDjtu5cqV0t44nU5JTEw0lWQOh0OsSJvU5+TkSFpamvldAwAAAACA4HKFyLV5Y3OSRldKTZkyRWw2W3PNDwAAAAAAAO1Yo0Op5557rmVnAgAAAAAAgHbDurVgAAAAAAAAsCxCKQAAAAAAAASdZUKp/Px8ufTSS02DrA4dOsj06dOluLj4iK8pLy+X3//+95KcnCzx8fEyYcIEyc7O9j3/3//+VyZPniyZmZmmiXu/fv3k0UcfDcKnAQAAAAAAaN8sE0ppILVx40Z5//335Z133pGPPvpIrrrqqiO+5oYbbpC3335bXnvtNfn3v/8t+/btkwsvvND3/Pr1601H+xdffNG89+233y5z5syRxx9/PAifCAAAAAAAoP2yud1ut7Rx33//vfTv31/WrVsnw4YNM8dWrVol48aNkz179kjnzp39XqPbDqampsrLL78sF110kTm2adMmUw21du1aOeWUUwJ+L62s0u/3wQcfNPtWh21ZqGw7CQAAAACAVblC5Nq8sTmJJT6hhki6ZM8bSKnRo0ebX9Bnn30W8DVaBVVVVWXGeR1//PHSrVs3836Hoz+wpKSkZv4EAAAAAAAAqCtcLCArK8ukhHWFh4eb8EifO9xrIiMjTZhVV3p6+mFfs2bNGlmxYoW8++67R5xPRUWFudVNAL2Jpt6sSOetRXNWnT8AAAAAAFbnCpFr88bOv1VDqdmzZ8t99913xDG6lC4Yvv32Wzn//PNl/vz58otf/OKIYxctWiQLFizwO56bm2uaq1v1hNEqMT35rVwiCAAAAACAVblC5Nq8qKio7YdSN954o1x++eVHHNOrVy/JyMgwayrrqq6uNjvy6XOB6PHKykopKCioVy2lu+81fM13330nZ599tmmcfscdd/zovLUZ+qxZs+pVSukOftrDyso9pWw2m/kMVj7xAQAAAACwKleIXJtHR0e3/VBKf8h6+zEjR4404ZL2iRo6dKg5po3I9Zc1YsSIgK/RcREREbJ69WqZMGGCObZ582bZtWuXeT8v3XXvrLPOkqlTp8o999zTqHlHRUWZW0N6wlj5pNET3+qfAQAAAAAAK7OFwLV5Y+duiU+oO+aNHTtWZsyYIZ9//rn85z//kZkzZ8qkSZN8O+/t3bvXNDLX55V2eZ8+fbqpaPrwww9NoDVt2jQTSHl33tMle2eeeaZZrqfjtNeU3nQZHgAAAAAAANp5o3P10ksvmSBKl9lp4qbVT0uWLPE9rzvtaSVUaWmp79gjjzziG6uNyceMGSNPPPGE7/nXX3/dBFAvvviiuXl1795dduzYEcRPBwAAAAAA0L7Y3No9C02iPaW0MkubkVm5p5T27dJdDq1cIggAAAAAgFW5QuTavLE5iXU/IQAAAAAAACyLUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAg6QikAAAAAAAAEHaEUAAAAAAAAgo5QCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNBZJpTKz8+XSy+9VBwOh3To0EGmT58uxcXFR3xNeXm5/P73v5fk5GSJj4+XCRMmSHZ2dsCxBw4ckK5du4rNZpOCgoIW+hQAAAAAAACwVCilgdTGjRvl/fffl3feeUc++ugjueqqq474mhtuuEHefvttee211+Tf//637Nu3Ty688MKAYzXkOumkk1po9gAAAAAAALBcKPX999/LqlWr5Omnn5YRI0bIaaedJo899pgsX77cBE2BFBYWyv/93//Jww8/LGeddZYMHTpUnn32WVmzZo18+umn9cY++eSTpjrqpptuCtInAgAAAAAAaN8sEUqtXbvWLNkbNmyY79jo0aPFbrfLZ599FvA169evl6qqKjPO6/jjj5du3bqZ9/P67rvvZOHChfLCCy+Y9wMAAAAAAEDLCxcLyMrKkrS0tHrHwsPDJSkpyTx3uNdERkaaMKuu9PR032sqKipk8uTJ8sADD5iwatu2bY2aj75Ob15Op9Pcu1wuc7Minbfb7bbs/AEAAAAAsDpXiFybN3b+rRpKzZ49W+67774fXbrXUubMmSP9+vWT3/zmN0f1ukWLFsmCBQv8jufm5prm6lY9YXTJo578VIwBAAAAABB8rhC5Ni8qKmr7odSNN94ol19++RHH9OrVSzIyMiQnJ6fe8erqarMjnz4XiB6vrKw0vaLqVkvp7nve13zwwQeyYcMGef31181j/aWrlJQUuf322wMGT94wa9asWfUqpTIzMyU1NdXsDmjVE193HtTPYOUTHwAAAAAAq3KFyLV5dHR02w+l9Iestx8zcuRIEy5pnyhtWO4NlPSXpY3PA9FxERERsnr1apkwYYI5tnnzZtm1a5d5P/XGG29IWVmZ7zXr1q2TK664Qj7++GPp3bv3YecTFRVlbg3pCWPlk0ZPfKt/BgAAAAAArMwWAtfmjZ27JXpK6RK7sWPHyowZM2TZsmWmgfnMmTNl0qRJ0rlzZzNm7969cvbZZ5uG5cOHD5fExESZPn26qWjS3lNawXTttdeaQOqUU04xr2kYPOXl5fm+X8NeVAAAAAAAAGg+lgil1EsvvWSCKA2eNHHT6qclS5b4ntegSiuhSktLfcceeeQR31htTD5mzBh54oknWukTAAAAAAAAwMvm9jZSwjHTnlJamaXNyKzcU0r7dukuh1YuEQQAAAAAwKpcIXJt3ticxLqfEAAAAAAAAJZFKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAAAAACDpCKQAAAAAAAAQdoRQAAAAAAACCjlAKAAAAAAAAQUcoBQAAAAAAgKAjlAIAAAAAAEDQEUoBAAAAAAAg6AilAAAAAAAAEHSEUgAAAAAAAAi68OB/y9DjdrvNvdPpFKtyuVxSVFQk0dHRYreTVQIAAAAAEGyuELk29+Yj3rzkcAilmoGeMCozM7O1pwIAAAAAANBm8pLExMTDPm9z/1hshUYlmfv27ZOEhASx2Wxi1RRTQ7Xdu3eLw+Fo7ekAAAAAANDuOEPk2lyjJg2kOnfufMSKLyqlmoH+gLt27SqhQE96K5/4AAAAAABYnSMErs2PVCHlZd0FigAAAAAAALAsQikAAAAAAAAEHaEUjKioKJk/f765BwAAAAAAwRfVzq7NaXQOAAAAAACAoKNSCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIICjZ5BAAAAAAAdYXXewQ0s+LiYomKipKIiAgTTNlsttaeEgAAAAAA7crWrVvllVdekZKSEjnxxBPl0ksvlbaASim0mO+//14uuOACWbFihVRWVppAioopAAAAAACCZ8OGDTJq1Cj54osv5O2335bHH39c3nvvPWkLqJRCi9i5c6dMmDDBpLFaLRUdHS3nnXeeREZGUjEFAAAAAEAQZGdny8SJE2X69Oly7733Sl5enpx11lmyb98+aQuolEKzq6mpkTfeeEP69Okjn3/+uXTo0MGc/G+99RYVUwAAAAAABMnmzZvNNfjvf/978zglJUUGDhwo//3vf+V3v/uduVZvTYRSaHZhYWEmeZ0yZYo52d99911JT0/3BVMVFRUEUwAAAAAAtLDw8HApLS31LdfT6/KXXnpJ7Ha7qZpavny5XHzxxdJabG6SAbSAqqoq09zcSyukzj//fFM6eNttt5mv9fm//e1v5msAAAAAANC89Br8D3/4g1nF1LdvX/nggw9k5cqVpr2Oev755+Xuu++Wv/71r6YBerDRUwrNQhPW3bt3S2xsrKSlpUnHjh3F5XKZ9LW6utr0knrzzTdl/PjxJpnVJX4ffvihqZw6+eSTpXPnzq39EQAAAAAAsLTS0lJzi4mJMdfhumppyZIlpoeU9n7Ozc2V008/3Te+e/fu5tq9blFJMBFKocm++eYb+fWvf22CJl2apye9dvM/5ZRTfOWCGkxFRUWZyijdke+yyy4zf0A++ugjAikAAAAAAJpo48aNcv3110tWVpZ5fOWVV8rUqVPNNbre9Jpdr8vz8/MlMTHRjPnHP/4hqampptdUayCUQpPoyX7uuefKpEmTTDf/7777TlasWCE/+9nP5IUXXjDHvcGU/gHQIEqT2ISEBBNInXDCCa39EQAAAAAAsLTvv/9ezjzzTHMNrk3NtYfUU089JaNGjTKrk5QGU1u3bpXrrrtOevToYdru6PW7rmJKTk5ulXnTUwpN8vXXX5uqp7ffftuc1KqsrEzmzZtnSgR1reo555zjW8r3xBNPyMyZM2X9+vUyePDg1p4+AAAAAACWdvDgQRNG9enTR5YuXeo7PnToUBk+fLg8+eSTvmty3XVv9uzZ5rpdVy3dfvvtrVosQqUUmqSwsNCUCHqzTT3Rde3q/fffb07ySy65RL744gvTUE1NnDhRxo4dK7169WrlmQMAAAAAYH179+4Vh8Nhrre9G43pKqWzzz5bDhw4YI7ZbDazemngwIHy2muvSXx8vGm/o8v5WpO9Vb87LO+0006Tn/70pzJnzhyzLlWTVw2m9ITXY4MGDZJXXnnFhFZ6XEsCCaQAAAAAAGgeWumklVLaRsfbPkclJSVJcXGx+Vqv0cPCwsxjDaRUawdSilAKTaIntaaxO3bsMMv1nE6nCaZUly5dzMm+adMm8wfAexwAAAAAADSdtyhkwoQJ5rEWhHivvUtKSsxue166ounOO+80FVNtBcv3cMz0ZNeT/5prrjHN0nRnPV2yp2tStXRQaWVUx44dzUmvfzB0PAAAAAAAaDpvAOW9PtdbdXW1qZbSDca8u+zNnTtX7rnnHtMXWotL2goaneOYadCkJ7O3Ydpdd90l7777rhQUFMh5550nu3fvlnfeeUc+/fRTdtkDAAAAAKAFr82L6yzNU48++qh888030r17d1m0aJF88sknpvl5W8J6KjSKBk+BTvqdO3fKgAED5F//+pdJXu+77z75xS9+IRs2bDDrU9euXUsgBQAAAABAM3O73aYqynttPn78eBM8eenyvWeffdYs22uLgZRi+R5+dHc9LffzNjD3lgZ6T/pTTz1VfvWrX5mG5+r00083N/3DUXctKwAAAAAAODb79u2TdevWSXl5udndfsiQIWapni7T27Ztm5xxxhnyy1/+0ndtrjIyMkyV1HvvvSf9+vWTtojlezis7777TkaNGiW33HKL3HbbbeZY3WDqiiuukIiICFm2bJmvV5R3HSsAAAAAAGi6DRs2yAUXXGD6Nefk5JhjTzzxhJxzzjnmGnzs2LGSkpIiL774Yr3rcX0uKytLOnXqJG0VoRQC2rNnj+kLpeV+eXl5cvPNN8vs2bPrLd2rqqoyoRQAAAAAAGh+W7duNauRfvOb35hrcu3drIGU7qr3/PPPS1xcnFRWVppr87qBVN2CkraM5XvwoyfvG2+8IT179pSZM2fK559/Lvfee695Tv8QEEgBAAAAANCyKisrZenSpWYFk24sptfgHTp0kJNPPtn0dPb2fo6MjPR7rRUCKUUohYAn77hx4yQtLU3OPPNMGTRokCn702793mBK/zBYJXkFAAAAAMBq7Ha79OnTxxSM6DW4t13OWWedJQsXLjQ9oBMSEuq9xmotdQilEJA2TtOTX+m61SuvvNKc2HUrpvRkf/vtt2XkyJFm/SoAAAAAAGge4eHhppdUw55Q3sooba3jDaE2bdokxx9/vKUCKUUoBV8n/71798qBAwdk9OjRJpHVm24vqX8QNHTSxuZKgyk98XXso48+Krt27Wrt6QMAAAAAEDLX5nl5eTJmzBhJT083x73X5rpiyel0SmlpqQmnNISaM2eO3HfffXLw4EFxOByWCqYIpSDffPON/OpXvzJlf//73/9kwIABctVVV5lGavHx8b7G5qmpqTJ9+nQTSOlufLqWde3atW26kz8AAAAAAFa9Np8xY4Zcdtll5trc20JHwygNqGJiYmTBggWm79Snn34qiYmJYjU0BGrnNH2dNGmSXHLJJfLuu++aVFZL/p577jnTOK2oqMgEUt4Galox9d1335k/JJ988okMGzastT8CAAAAAAAheW3+/PPP+67NvT2dNaDSohEtJtGVTB9++KEMHz5crIhQqp3LysqSsrIyc+L36NHDnNgaSGmZ4Jo1a0wJYHl5uTn5tULqxRdflH/84x/mpO/fv39rTx8AAAAAgHZzba5yc3Nlw4YN8s4778jnn38uQ4cOFasilGrnvGtQvX2hdJ2qHtMk9vTTTzcJ7bp168xzOu7UU0+Vzz77TIYMGdLKMwcAAAAAoP1dm3fp0kVuvPFGWb9+vQwcOFCszObW8he0WxUVFXLaaadJRkaGvPnmm2apnreBmp4aeoIPHjzYlAxabWtJAAAAAABC7drcOz4qKkqsjkqpdkz7ROlJ/Oyzz8pHH30k11xzjTnuPek1gDrvvPMkJyfHHCeQAgAAAACg9a7N3bV1RaEQSClCqXZM+0TpznonnniiSVtfeeUVmTJlimRnZ/vGbN++XTp27GjGAQAAAACA1rs2d9VuQhYqWL7XjjRcfuctBSwuLjalf19//bVpqta9e3dJSkqS5ORk+dvf/iZr1641W1ECAAAAAICm4dr8ECql2oGtW7fKwYMH6530msLqSb9jxw75yU9+YhqmnX322bJx40YZN26caZyWlpZmOvmH2kkPAAAAAECwcW3uj0qpEPff//7XNEN7+umn5Yorrqj33O7du80ueueff778+c9/NmWA2kzNm9rqYy0jBAAAAAAAx45r88AIpUL8pD/11FNl5syZ8sc//tHv+ccee0y2bdsmDz/8cL2k1nvis9seAAAAAABNw7X54RFKhahNmzaZ0r558+bJ3LlzTbL6r3/9S7Zs2WKap/Xt21dSU1NDOnEFAAAAAKA1cW1+ZOE/8jwsSE/mV1991axNveiii8yxn//853LgwAGzTlWbpPXs2dOksCeddFJrTxcAAAAAgJDDtfmPa38xXDug6erVV18tM2bMMGtWNZXt0KGD2VoyNzdXHnzwQbM+9e677zbd/QEAAAAAQPPi2vzHUSkVotLT082JrV38tUu/ft2vXz/z3AUXXCA7d+6U++67TwoLCyU+Pr61pwsAAAAAQMjh2vzICKVCxL59++TLL7+UyspK6datmwwbNsysS73jjjvMSd67d28zTssGNYnt06ePdOzYUSIjI1t76gAAAAAAhASuzY8OoVQI2LBhg4wfP15SUlJMx/4ePXrILbfcIr/+9a+lU6dOkpGR4evUrye9+uc//yldu3aV2NjYVp49AAAAAADWx7X50aOnlMVt3bpVxo0bZ5qm/eMf/5BVq1bJCSecYO41eW24deSuXbvk5ptvlr/85S/y0EMPSVxcXKvOHwAAAAAAq+Pa/NjY3PqTgSVpOeCcOXNkz5495kT2lvs988wzJo3dvHmz6ebvpetXn3rqKVmzZo288sorMmjQoFacPQAAAAAA1se1+bFj+Z7Ft5fUMj9tkqYnvTd5HTVqlGmQVlVVVW/88OHDpaioSBYuXChdunRptXkDAAAAABAquDY/doRSFhYdHW3Wq/bs2bPecd1iMiIiot6Jv379ehk6dKicffbZrTBTAAAAAABCE9fmx46eUhazf/9+U+qn61I1jfWe9LpG1bs+VbeSPHjwoO818+bNk5///Ody4MABk9gCAAAAAIBjx7V586BSykK++eYbOe+88yQqKkqys7NN9349qceMGSNJSUm+EkG92e12UyZ49913y4MPPigff/xxvTWsAAAAAADg6HFt3nxodG4Rubm58rOf/UwuvPBCmT59uikPnDVrlvnDcPHFF8vvf/97SU1NNWNzcnJk7Nix8pOf/ET++te/muZpWh4IAAAAAACOHdfmzYtKKQud+OXl5ebE79Wrlzm2fPlymT17tqxcudJsH6knf2xsrCkF/Prrr2XTpk3y2WeftetO/gAAAAAANBeuzZsXPaUsQhujVVdXS2lpqXlcVlZm7v/4xz/KmWeeKU8++aRs2bLFHOvYsaP87ne/ky+//JKTHgAAAACAZsK1efNi+Z6F6LaRuhb1gw8+MI8rKirMGlZ18sknS58+feSVV14xjzW51TJCAAAAAADQfLg2bz5USrVRJSUlUlRUJE6n03fsqaeeko0bN8oll1xiHutJrwmt0jWt+hovTnoAAAAAAJqGa/OWRSjVBn333Xdmferpp58u/fr1k5deeskc168fffRRef/99+XXv/61KRvUTv7eBmq6dlX/IFD8BgAAAABA03Bt3vJodN4GT3pNVqdMmSLDhg2T9evXy7Rp06R///4yePBgs+2knuC6LvWkk06S448/XiIjI+Xdd9+VTz/9VMLD+ZUCAAAAANAUXJsHBz2l2pD8/HyZPHmyOZk1dfXSZmkDBgyQJUuW+I5p+eDdd99tXqPlgNdcc435wwEAAAAAAI4d1+bBQ3TXhmjJX0FBgVx00UXmscvlMiWAPXv2NCe40gxRbwkJCXLffffVGwcAAAAAAJqGa/Pg4afVhqSnp8uLL74oP/3pT83jmpoac9+lSxffiW2z2czXdZus6TEAAAAAANB0XJsHD6FUG9O3b19fwhoREWG+1vRVm6V5LVq0SJ5++mlfd39OfAAAAAAAmg/X5sHB8r02ShNXPeG9J7U3jZ03b55Zr/rVV1/ROA0AAAAAgBbEtXnLolKqDfP2oNcTPDMzUx588EG5//775YsvvpCBAwe29vQAAAAAAAh5XJu3HOK8NsybwGqp4J///GdxOBzyySefyJAhQ1p7agAAAAAAtAtcm7ccKqUsYMyYMeZ+zZo1MmzYsNaeDgAAAAAA7Q7X5s3P5vbWoaFNKykpkbi4uNaeBgAAAAAA7RbX5s2LUAoAAAAAAABBx/I9AAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAACDoCKUAAAAAAAAQdIRSAAAAbcjll18uNpvN3CIiIiQ9PV1+/vOfyzPPPCMul6vR7/Pcc89Jhw4dWnSuAAAATUEoBQAA0MaMHTtW9u/fLzt27JC///3vcuaZZ8p1110nv/rVr6S6urq1pwcAANAsCKUAAADamKioKMnIyJAuXbrIkCFD5LbbbpO//e1vJqDSCij18MMPy4ABAyQuLk4yMzPld7/7nRQXF5vn/vWvf8m0adOksLDQV3V15513mucqKirkpptuMu+trx0xYoQZDwAAEGyEUgAAABZw1llnycCBA2XlypXmsd1ulyVLlsjGjRvl+eeflw8++EBuueUW89yoUaNk8eLF4nA4TMWV3jSIUjNnzpS1a9fK8uXL5ZtvvpFf//rXpjLrhx9+aNXPBwAA2h+b2+12t/YkAAAAcKinVEFBgbz55pt+z02aNMkESd99953fc6+//rr89re/lby8PPNYK6quv/56815eu3btkl69epn7zp07+46PHj1ahg8fLvfee2+LfS4AAICGwv2OAAAAoE3S/5eoS/HUP//5T1m0aJFs2rRJnE6n6TVVXl4upaWlEhsbG/D1GzZskJqaGvnJT35S77gu6UtOTg7KZwAAAPAilAIAALCI77//Xnr27GkaoGvT82uuuUbuueceSUpKkk8++USmT58ulZWVhw2ltOdUWFiYrF+/3tzXFR8fH6RPAQAA4EEoBQAAYAHaM0ornW644QYTKrlcLnnooYdMbyn16quv1hsfGRlpqqLqGjx4sDmWk5MjP/3pT4M6fwAAgIYIpQAAANoYXU6XlZVlAqTs7GxZtWqVWaqn1VFTpkyRb7/9VqqqquSxxx6Tc889V/7zn//IsmXL6r1Hjx49TGXU6tWrTYN0rZ7SZXuXXnqpeQ8NtDSkys3NNWNOOukkOeecc1rtMwMAgPaH3fcAAADaGA2hOnXqZIIl3Rnvww8/NDvt/e1vfzPL7jRkevjhh+W+++6TE088UV566SUTWtWlO/Bp4/OJEydKamqq3H///eb4s88+a0KpG2+8UY477jgZP368rFu3Trp169ZKnxYAALRX7L4HAAAAAACAoKNSCgAAAAAAAEFHKAUAAAAAAICgI5QCAAAAAABA0BFKAQAAAAAAIOgIpQAAAAAAABB0hFIAAAAAAAAIOkIpAAAAAAAABB2hFAAAAAAAAIKOUAoAAAAAAABBRygFAAAAAACAoCOUAgAAAAAAQNARSgEAAAAAAECC7f8D11SrppRFFAIAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "try:\n",
+ " import matplotlib.pyplot as plt\n",
+ "\n",
+ " if timeseries:\n",
+ " fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), sharex=True)\n",
+ "\n",
+ " ax1.plot(dates, means, marker=\".\", color=\"steelblue\")\n",
+ " ax1.set_ylabel(\"Mean\")\n",
+ " ax1.set_title(\"conv_rate — Daily Trend\")\n",
+ " ax1.grid(True, alpha=0.3)\n",
+ "\n",
+ " ax2.plot(dates, null_rates, marker=\".\", color=\"coral\")\n",
+ " ax2.set_ylabel(\"Null Rate\")\n",
+ " ax2.set_xlabel(\"Date\")\n",
+ " ax2.grid(True, alpha=0.3)\n",
+ "\n",
+ " plt.xticks(rotation=45)\n",
+ " plt.tight_layout()\n",
+ " plt.show() # pragma: allowlist secret\n",
+ "except ImportError:\n",
+ " print(\"Install matplotlib to visualize: pip install matplotlib\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 9: On-Demand Exploration (Transient Compute)\n",
+ "\n",
+ "Compute metrics for an arbitrary date range without storing them. Useful for ad-hoc investigation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "conv_rate (numeric):\n",
+ " rows=905 nulls=0 null_rate=0.0000\n",
+ " mean=0.5041 stddev=0.1929\n",
+ " p50=0.4964 p95=0.8221 p99=0.9757\n",
+ "\n",
+ "avg_daily_trips (numeric):\n",
+ " rows=905 nulls=0 null_rate=0.0000\n",
+ " mean=20.1525 stddev=4.4410\n",
+ " p50=20.0000 p95=27.0000 p99=31.9600\n",
+ "\n",
+ "vehicle_type (categorical):\n",
+ " rows=905 nulls=0 null_rate=0.0000\n",
+ " unique_values=5\n",
+ " van: 194\n",
+ " sedan: 193\n",
+ " suv: 186\n",
+ " truck: 171\n",
+ " compact: 161\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "transient_result = monitoring.compute_transient(\n",
+ " project=\"monitoring_demo\",\n",
+ " feature_view_name=\"driver_stats\",\n",
+ " feature_names=[\"conv_rate\", \"avg_daily_trips\", \"vehicle_type\"],\n",
+ " start_date=date(2025, 1, 10),\n",
+ " end_date=date(2025, 1, 20),\n",
+ ")\n",
+ "\n",
+ "for fm in transient_result.get(\"metrics\", []):\n",
+ " print(f\"{fm['feature_name']} ({fm['feature_type']}):\")\n",
+ " print(f\" rows={fm['row_count']} nulls={fm['null_count']} null_rate={fm['null_rate']:.4f}\")\n",
+ " if fm[\"feature_type\"] == \"numeric\":\n",
+ " print(f\" mean={fm['mean']:.4f} stddev={fm['stddev']:.4f}\")\n",
+ " print(f\" p50={fm['p50']:.4f} p95={fm['p95']:.4f} p99={fm['p99']:.4f}\")\n",
+ " elif fm[\"feature_type\"] == \"categorical\" and fm.get(\"histogram\"):\n",
+ " hist = fm[\"histogram\"]\n",
+ " print(f\" unique_values={hist['unique_count']}\")\n",
+ " for entry in hist[\"values\"]:\n",
+ " print(f\" {entry['value']}: {entry['count']}\")\n",
+ " print()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 10: REST API Usage\n",
+ "\n",
+ "Once the Feast registry server is running, all monitoring endpoints are available via HTTP.\n",
+ "\n",
+ "```bash\n",
+ "# Start the server\n",
+ "feast serve_registry\n",
+ "```\n",
+ "\n",
+ "### Compute metrics via REST"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'job_id': '077f59c5-c341-4fbb-9adc-b0111fc9228b', 'status': 'completed', 'computed_feature_views': 1, 'computed_features': 20, 'granularities': ['biweekly', 'daily', 'monthly', 'quarterly', 'weekly'], 'duration_ms': 98}\n",
+ "[{'project_id': 'monitoring_demo', 'feature_view_name': 'driver_stats', 'feature_name': 'conv_rate', 'metric_date': '2025-01-01', 'granularity': 'daily', 'data_source_type': 'batch', 'computed_at': '2026-04-21T13:41:42.687597+05:30', 'is_baseline': True, 'feature_type': 'numeric', 'row_count': 4922, 'null_count': 0, 'null_rate': 0.0, 'mean': 0.4988999058272324, 'stddev': 0.1975387054069251, 'min_val': 0.0, 'max_val': 1.0, 'p50': 0.4998365219303598, 'p75': 0.633892663793526, 'p90': 0.7521919750314627, 'p95': 0.825733080299169, 'p99': 0.9640086762359101, 'histogram': {'bins': [0.0, 0.05, 0.1, 0.15000000000000002, 0.2, 0.25, 0.30000000000000004, 0.35000000000000003, 0.4, 0.45, 0.5, 0.55, 0.6000000000000001, 0.65, 0.7000000000000001, 0.75, 0.8, 0.8500000000000001, 0.9, 0.9500000000000001, 1.0], 'counts': [53, 67, 75, 146, 180, 267, 355, 399, 432, 493, 505, 420, 411, 330, 283, 186, 124, 93, 46, 57], 'bin_width': 0.05}}, {'project_id': 'monitoring_demo', 'feature_view_name': 'driver_stats', 'feature_name': 'conv_rate', 'metric_date': '2025-02-28', 'granularity': 'daily', 'data_source_type': 'batch', 'computed_at': '2026-04-21T19:02:39.068597+05:30', 'is_baseline': False, 'feature_type': 'numeric', 'row_count': 104, 'null_count': 0, 'null_rate': 0.0, 'mean': 0.5201334885346333, 'stddev': 0.21216576270117404, 'min_val': 0.09993354474902831, 'max_val': 1.0, 'p50': 0.5065079886167952, 'p75': 0.6963620898617928, 'p90': 0.7809868206291576, 'p95': 0.8538056054296318, 'p99': 0.9187701931117264, 'histogram': {'bins': [0.09993354474902831, 0.1449368675115769, 0.18994019027412548, 0.23494351303667405, 0.27994683579922264, 0.32495015856177123, 0.3699534813243198, 0.4149568040868684, 0.459960126849417, 0.5049634496119656, 0.5499667723745142, 0.5949700951370628, 0.6399734178996113, 0.6849767406621599, 0.7299800634247084, 0.774983386187257, 0.8199867089498056, 0.8649900317123542, 0.9099933544749028, 0.9549966772374514, 1.0], 'counts': [4, 1, 6, 7, 5, 5, 7, 6, 11, 5, 4, 10, 5, 8, 9, 3, 4, 2, 1, 1], 'bin_width': 0.045003322762548585}}]\n",
+ "[{'project_id': 'monitoring_demo', 'feature_view_name': 'driver_stats', 'feature_name': 'conv_rate', 'metric_date': '2025-01-01', 'granularity': 'daily', 'data_source_type': 'batch', 'computed_at': '2026-04-21T13:41:42.687597+05:30', 'is_baseline': True, 'feature_type': 'numeric', 'row_count': 4922, 'null_count': 0, 'null_rate': 0.0, 'mean': 0.4988999058272324, 'stddev': 0.1975387054069251, 'min_val': 0.0, 'max_val': 1.0, 'p50': 0.4998365219303598, 'p75': 0.633892663793526, 'p90': 0.7521919750314627, 'p95': 0.825733080299169, 'p99': 0.9640086762359101, 'histogram': {'bins': [0.0, 0.05, 0.1, 0.15000000000000002, 0.2, 0.25, 0.30000000000000004, 0.35000000000000003, 0.4, 0.45, 0.5, 0.55, 0.6000000000000001, 0.65, 0.7000000000000001, 0.75, 0.8, 0.8500000000000001, 0.9, 0.9500000000000001, 1.0], 'counts': [53, 67, 75, 146, 180, 267, 355, 399, 432, 493, 505, 420, 411, 330, 283, 186, 124, 93, 46, 57], 'bin_width': 0.05}}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# This cell is for reference — run it when the registry server is up.\n",
+ "\n",
+ "import requests\n",
+ "\n",
+ "BASE_URL = \"http://localhost:6572/api/v1\"\n",
+ "\n",
+ "# Auto-compute all metrics\n",
+ "resp = requests.post(f\"{BASE_URL}/monitoring/auto_compute\", json={\n",
+ " \"project\": \"monitoring_demo\",\n",
+ "})\n",
+ "print(resp.json())\n",
+ "\n",
+ "# Read per-feature metrics\n",
+ "resp = requests.get(f\"{BASE_URL}/monitoring/metrics/features\", params={\n",
+ " \"project\": \"monitoring_demo\",\n",
+ " \"feature_view_name\": \"driver_stats\",\n",
+ " \"feature_name\": \"conv_rate\",\n",
+ " \"granularity\": \"daily\",\n",
+ " \"data_source_type\": \"batch\",\n",
+ "})\n",
+ "print(resp.json())\n",
+ "\n",
+ "# Read baseline\n",
+ "resp = requests.get(f\"{BASE_URL}/monitoring/metrics/baseline\", params={\n",
+ " \"project\": \"monitoring_demo\",\n",
+ " \"feature_view_name\": \"driver_stats\",\n",
+ " \"feature_name\": \"conv_rate\",\n",
+ " \"data_source_type\": \"batch\",\n",
+ "})\n",
+ "print(resp.json())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 11: Monitoring Feature Serving Logs\n",
+ "\n",
+ "If your feature service has logging enabled, you can compute metrics from actual production traffic.\n",
+ "\n",
+ "### Define a feature service with logging"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "See the code cell above for the logging config pattern.\n",
+ "Once applied, log metrics can be computed with:\n",
+ " CLI: feast monitor run --source-type log\n",
+ " API: POST /monitoring/compute/log\n",
+ " SDK: monitoring.compute_log_metrics(project, feature_service_name)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Example feature service definition with logging\n",
+ "#\n",
+ "# from feast import FeatureService, LoggingConfig\n",
+ "# from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (\n",
+ "# PostgreSQLLoggingDestination,\n",
+ "# )\n",
+ "#\n",
+ "# driver_service = FeatureService(\n",
+ "# name=\"driver_service\",\n",
+ "# features=[driver_stats_fv],\n",
+ "# logging_config=LoggingConfig(\n",
+ "# destination=PostgreSQLLoggingDestination(table_name=\"feast_driver_logs\"),\n",
+ "# sample_rate=1.0,\n",
+ "# ),\n",
+ "# )\n",
+ "print(\"See the code cell above for the logging config pattern.\")\n",
+ "print(\"Once applied, log metrics can be computed with:\")\n",
+ "print(\" CLI: feast monitor run --source-type log\")\n",
+ "print(\" API: POST /monitoring/compute/log\")\n",
+ "print(\" SDK: monitoring.compute_log_metrics(project, feature_service_name)\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Compute log metrics (SDK)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Uncomment when you have a feature service with logging enabled\n",
+ "#\n",
+ "# result = monitoring.compute_log_metrics(\n",
+ "# project=\"monitoring_demo\",\n",
+ "# feature_service_name=\"driver_service\",\n",
+ "# granularity=\"daily\",\n",
+ "# )\n",
+ "# print(result)\n",
+ "\n",
+ "# Or auto-compute all log metrics\n",
+ "# result = monitoring.auto_compute_log_metrics(project=\"monitoring_demo\")\n",
+ "# print(result)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Read log vs. batch metrics side-by-side"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Uncomment the cell above once log metrics have been computed.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Compare batch vs. log metrics for the same feature\n",
+ "#\n",
+ "# batch = monitoring.get_feature_metrics(\n",
+ "# project=\"monitoring_demo\",\n",
+ "# feature_view_name=\"driver_stats\",\n",
+ "# feature_name=\"conv_rate\",\n",
+ "# data_source_type=\"batch\",\n",
+ "# granularity=\"daily\",\n",
+ "# )\n",
+ "#\n",
+ "# log = monitoring.get_feature_metrics(\n",
+ "# project=\"monitoring_demo\",\n",
+ "# feature_view_name=\"driver_stats\",\n",
+ "# feature_name=\"conv_rate\",\n",
+ "# data_source_type=\"log\",\n",
+ "# granularity=\"daily\",\n",
+ "# )\n",
+ "#\n",
+ "# print(\"Batch metrics:\")\n",
+ "# for m in batch[:3]:\n",
+ "# print(f\" {m['metric_date']}: mean={m['mean']:.4f}\")\n",
+ "#\n",
+ "# print(\"\\nLog metrics:\")\n",
+ "# for m in log[:3]:\n",
+ "# print(f\" {m['metric_date']}: mean={m['mean']:.4f}\")\n",
+ "\n",
+ "print(\"Uncomment the cell above once log metrics have been computed.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 12: Scheduling in Production\n",
+ "\n",
+ "### Cron (simplest)\n",
+ "\n",
+ "```bash\n",
+ "# Compute all batch + log metrics daily at 2 AM\n",
+ "0 2 * * * cd /path/to/feast/repo && feast monitor run --source-type all >> /var/log/feast-monitor.log 2>&1\n",
+ "```\n",
+ "\n",
+ "### Airflow\n",
+ "\n",
+ "```python\n",
+ "from airflow.operators.bash import BashOperator\n",
+ "\n",
+ "monitor_task = BashOperator(\n",
+ " task_id=\"feast_monitor\",\n",
+ " bash_command=\"feast monitor run --source-type all\",\n",
+ " cwd=\"/path/to/feast/repo\",\n",
+ ")\n",
+ "```\n",
+ "\n",
+ "### Kubernetes CronJob\n",
+ "\n",
+ "```yaml\n",
+ "apiVersion: batch/v1\n",
+ "kind: CronJob\n",
+ "metadata:\n",
+ " name: feast-monitor\n",
+ "spec:\n",
+ " schedule: \"0 2 * * *\"\n",
+ " jobTemplate:\n",
+ " spec:\n",
+ " template:\n",
+ " spec:\n",
+ " containers:\n",
+ " - name: feast-monitor\n",
+ " image: feast-image:latest\n",
+ " command: [\"feast\", \"monitor\", \"run\", \"--source-type\", \"all\"]\n",
+ " volumeMounts:\n",
+ " - name: feast-repo\n",
+ " mountPath: /feast/repo\n",
+ " restartPolicy: OnFailure\n",
+ " volumes:\n",
+ " - name: feast-repo\n",
+ " configMap:\n",
+ " name: feast-repo-config\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary\n",
+ "\n",
+ "| Capability | CLI | REST API | SDK |\n",
+ "|-----------|-----|----------|-----|\n",
+ "| Auto-compute (all granularities) | `feast monitor run` | `POST /monitoring/auto_compute` | `monitoring.auto_compute_metrics()` |\n",
+ "| Targeted compute | `feast monitor run --feature-view X --granularity daily` | `POST /monitoring/compute` | `monitoring.compute_metrics()` |\n",
+ "| Set baseline | `feast monitor run --set-baseline` | `POST /monitoring/compute` (with `set_baseline: true`) | `monitoring.compute_metrics(set_baseline=True)` |\n",
+ "| Log metrics | `feast monitor run --source-type log` | `POST /monitoring/compute/log` | `monitoring.compute_log_metrics()` |\n",
+ "| On-demand exploration | — | `POST /monitoring/compute/transient` | `monitoring.compute_transient()` |\n",
+ "| Read metrics | — | `GET /monitoring/metrics/*` | `monitoring.get_feature_metrics()` etc. |\n",
+ "| Read baseline | — | `GET /monitoring/metrics/baseline` | `monitoring.get_baseline()` |\n",
+ "| Time-series | — | `GET /monitoring/metrics/timeseries` | `monitoring.get_timeseries()` |"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv312",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/infra/feast-operator/api/v1/featurestore_types.go b/infra/feast-operator/api/v1/featurestore_types.go
index 81e1dfa14c1..8ea38a3af5f 100644
--- a/infra/feast-operator/api/v1/featurestore_types.go
+++ b/infra/feast-operator/api/v1/featurestore_types.go
@@ -127,6 +127,9 @@ type FeatureStoreSpec struct {
AuthzConfig *AuthzConfig `json:"authz,omitempty"`
CronJob *FeastCronJob `json:"cronJob,omitempty"`
BatchEngine *BatchEngineConfig `json:"batchEngine,omitempty"`
+ // DataQualityMonitoring configures Data Quality Monitoring behaviour.
+ // +optional
+ DataQualityMonitoring *DataQualityMonitoringConfig `json:"dqm,omitempty"`
// Replicas is the desired number of pod replicas. Used by the scale sub-resource.
// Mutually exclusive with services.scaling.autoscaling.
// +kubebuilder:default=1
@@ -229,6 +232,13 @@ type BatchEngineConfig struct {
ConfigMapKey string `json:"configMapKey,omitempty"`
}
+// DataQualityMonitoringConfig defines the Data Quality Monitoring configuration.
+type DataQualityMonitoringConfig struct {
+ // AutoBaseline controls whether baseline distribution is computed automatically on feast apply. Defaults to true.
+ // +kubebuilder:default=true
+ AutoBaseline *bool `json:"autoBaseline,omitempty"`
+}
+
// JobSpec describes how the job execution will look like.
type JobSpec struct {
// PodTemplateAnnotations are annotations to be applied to the CronJob's PodTemplate
diff --git a/infra/feast-operator/api/v1/zz_generated.deepcopy.go b/infra/feast-operator/api/v1/zz_generated.deepcopy.go
index 6d31e31a9de..a662707e7eb 100644
--- a/infra/feast-operator/api/v1/zz_generated.deepcopy.go
+++ b/infra/feast-operator/api/v1/zz_generated.deepcopy.go
@@ -165,6 +165,26 @@ func (in *DefaultCtrConfigs) DeepCopy() *DefaultCtrConfigs {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DataQualityMonitoringConfig) DeepCopyInto(out *DataQualityMonitoringConfig) {
+ *out = *in
+ if in.AutoBaseline != nil {
+ in, out := &in.AutoBaseline, &out.AutoBaseline
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataQualityMonitoringConfig.
+func (in *DataQualityMonitoringConfig) DeepCopy() *DataQualityMonitoringConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(DataQualityMonitoringConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FeastCronJob) DeepCopyInto(out *FeastCronJob) {
*out = *in
@@ -450,6 +470,11 @@ func (in *FeatureStoreSpec) DeepCopyInto(out *FeatureStoreSpec) {
*out = new(BatchEngineConfig)
(*in).DeepCopyInto(*out)
}
+ if in.DataQualityMonitoring != nil {
+ in, out := &in.DataQualityMonitoring, &out.DataQualityMonitoring
+ *out = new(DataQualityMonitoringConfig)
+ (*in).DeepCopyInto(*out)
+ }
if in.Replicas != nil {
in, out := &in.Replicas, &out.Replicas
*out = new(int32)
diff --git a/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml b/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml
index e1a1adfabe8..8e4f907ccc5 100644
--- a/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml
+++ b/infra/feast-operator/config/crd/bases/feast.dev_featurestores.yaml
@@ -529,6 +529,15 @@ spec:
description: The time zone name for the given schedule, see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
type: string
type: object
+ dqm:
+ description: DataQualityMonitoring configures Data Quality Monitoring behaviour.
+ properties:
+ autoBaseline:
+ default: true
+ description: AutoBaseline controls whether baseline distribution
+ is computed automatically on feast apply. Defaults to true.
+ type: boolean
+ type: object
feastProject:
description: FeastProject is the Feast project id.
pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$
@@ -6487,6 +6496,15 @@ spec:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
type: string
type: object
+ dqm:
+ description: DataQualityMonitoring configures Data Quality Monitoring behaviour.
+ properties:
+ autoBaseline:
+ default: true
+ description: AutoBaseline controls whether baseline distribution
+ is computed automatically on feast apply. Defaults to true.
+ type: boolean
+ type: object
feastProject:
description: FeastProject is the Feast project id.
pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$
diff --git a/infra/feast-operator/config/samples/v1_featurestore_serving.yaml b/infra/feast-operator/config/samples/v1_featurestore_serving.yaml
index f60640624c9..412499412e6 100644
--- a/infra/feast-operator/config/samples/v1_featurestore_serving.yaml
+++ b/infra/feast-operator/config/samples/v1_featurestore_serving.yaml
@@ -26,8 +26,8 @@ spec:
push: true # push/write request counters
materialization: true # materialization counters and duration histograms
freshness: false # feature freshness gauges (can be expensive at scale)
- # Example: when a future SDK adds "registry_sync", enable it here
- # registry_sync: false
+ offline_features: true # offline store retrieval counters, latency, row count
+ audit_logging: false # structured JSON audit logs via the feast.audit logger
offlinePushBatching:
enabled: true
batchSize: 1000 # max rows per offline write batch
diff --git a/infra/feast-operator/dist/install.yaml b/infra/feast-operator/dist/install.yaml
index c466442b8e8..402bdcbcfcf 100644
--- a/infra/feast-operator/dist/install.yaml
+++ b/infra/feast-operator/dist/install.yaml
@@ -537,6 +537,15 @@ spec:
description: The time zone name for the given schedule, see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
type: string
type: object
+ dqm:
+ description: DataQualityMonitoring configures Data Quality Monitoring behaviour.
+ properties:
+ autoBaseline:
+ default: true
+ description: AutoBaseline controls whether baseline distribution
+ is computed automatically on feast apply. Defaults to true.
+ type: boolean
+ type: object
feastProject:
description: FeastProject is the Feast project id.
pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$
@@ -6495,6 +6504,15 @@ spec:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
type: string
type: object
+ dqm:
+ description: DataQualityMonitoring configures Data Quality Monitoring behaviour.
+ properties:
+ autoBaseline:
+ default: true
+ description: AutoBaseline controls whether baseline distribution
+ is computed automatically on feast apply. Defaults to true.
+ type: boolean
+ type: object
feastProject:
description: FeastProject is the Feast project id.
pattern: ^[A-Za-z0-9][A-Za-z0-9_-]*$
diff --git a/infra/feast-operator/docs/api/markdown/ref.md b/infra/feast-operator/docs/api/markdown/ref.md
index 4f57ff865f5..d0872d596f3 100644
--- a/infra/feast-operator/docs/api/markdown/ref.md
+++ b/infra/feast-operator/docs/api/markdown/ref.md
@@ -121,6 +121,20 @@ _Appears in:_
| `image` _string_ | |
+#### DataQualityMonitoringConfig
+
+
+
+DataQualityMonitoringConfig defines the Data Quality Monitoring configuration.
+
+_Appears in:_
+- [FeatureStoreSpec](#featurestorespec)
+
+| Field | Description |
+| --- | --- |
+| `autoBaseline` _boolean_ | AutoBaseline controls whether baseline distribution is computed automatically on feast apply. Defaults to true. |
+
+
#### FeastCronJob
@@ -275,6 +289,7 @@ _Appears in:_
| `authz` _[AuthzConfig](#authzconfig)_ | |
| `cronJob` _[FeastCronJob](#feastcronjob)_ | |
| `batchEngine` _[BatchEngineConfig](#batchengineconfig)_ | |
+| `dqm` _[DataQualityMonitoringConfig](#dataqualitymonitoringconfig)_ | DataQualityMonitoring configures Data Quality Monitoring behaviour. |
| `replicas` _integer_ | Replicas is the desired number of pod replicas. Used by the scale sub-resource.
Mutually exclusive with services.scaling.autoscaling. |
| `materialization` _[MaterializationConfig](#materializationconfig)_ | Materialization controls feature materialization behavior (batch size, pull strategy).
diff --git a/infra/feast-operator/internal/controller/services/repo_config.go b/infra/feast-operator/internal/controller/services/repo_config.go
index aa33819c2b4..272f671ecc8 100644
--- a/infra/feast-operator/internal/controller/services/repo_config.go
+++ b/infra/feast-operator/internal/controller/services/repo_config.go
@@ -106,6 +106,10 @@ func getServiceRepoConfig(
}
}
+ if appliedSpec.DataQualityMonitoring != nil {
+ setRepoConfigDataQualityMonitoring(appliedSpec.DataQualityMonitoring, &repoConfig)
+ }
+
return repoConfig, nil
}
@@ -486,6 +490,15 @@ func coerceStringToYamlType(v string) interface{} {
return v
}
+func setRepoConfigDataQualityMonitoring(dqmConfig *feastdevv1.DataQualityMonitoringConfig, repoConfig *RepoConfig) {
+ if dqmConfig.AutoBaseline == nil {
+ return
+ }
+ repoConfig.DataQualityMonitoring = &DataQualityMonitoringYamlConfig{
+ AutoBaseline: *dqmConfig.AutoBaseline,
+ }
+}
+
func (feast *FeastServices) getClientFeatureStoreYaml() ([]byte, error) {
clientRepo := getClientRepoConfig(feast.Handler.FeatureStore, feast)
return yaml.Marshal(clientRepo)
diff --git a/infra/feast-operator/internal/controller/services/repo_config_test.go b/infra/feast-operator/internal/controller/services/repo_config_test.go
index 9550068d251..01c1a1d2c39 100644
--- a/infra/feast-operator/internal/controller/services/repo_config_test.go
+++ b/infra/feast-operator/internal/controller/services/repo_config_test.go
@@ -318,6 +318,30 @@ var _ = Describe("Repo Config", func() {
Expect(repoConfig.OfflineStore).To(Equal(expectedOfflineConfig))
Expect(repoConfig.OnlineStore).To(Equal(expectedOnlineConfig))
Expect(repoConfig.Registry).To(Equal(expectedRegistryConfig))
+
+ By("Having DQM config with auto_baseline disabled")
+ featureStore = minimalFeatureStore()
+ dqmAutoBaseline := false
+ featureStore.Spec.DataQualityMonitoring = &feastdevv1.DataQualityMonitoringConfig{
+ AutoBaseline: &dqmAutoBaseline,
+ }
+ ApplyDefaultsToStatus(featureStore)
+ repoConfig, err = getServiceRepoConfig(featureStore, emptyMockExtractConfigFromSecret, emptyMockExtractConfigFromConfigMap, false)
+ Expect(err).NotTo(HaveOccurred())
+ Expect(repoConfig.DataQualityMonitoring).NotTo(BeNil())
+ Expect(repoConfig.DataQualityMonitoring.AutoBaseline).To(BeFalse())
+
+ fsYaml, marshalErr := yaml.Marshal(repoConfig)
+ Expect(marshalErr).NotTo(HaveOccurred())
+ Expect(string(fsYaml)).To(ContainSubstring("dqm:"))
+ Expect(string(fsYaml)).To(ContainSubstring("auto_baseline: false"))
+
+ By("Having no DQM config — dqm should be nil")
+ featureStore = minimalFeatureStore()
+ ApplyDefaultsToStatus(featureStore)
+ repoConfig, err = getServiceRepoConfig(featureStore, emptyMockExtractConfigFromSecret, emptyMockExtractConfigFromConfigMap, false)
+ Expect(err).NotTo(HaveOccurred())
+ Expect(repoConfig.DataQualityMonitoring).To(BeNil())
})
It("should set feature_server block with type local and all options", func() {
diff --git a/infra/feast-operator/internal/controller/services/services_types.go b/infra/feast-operator/internal/controller/services/services_types.go
index a68772806c5..6b70358b166 100644
--- a/infra/feast-operator/internal/controller/services/services_types.go
+++ b/infra/feast-operator/internal/controller/services/services_types.go
@@ -271,6 +271,7 @@ type RepoConfig struct {
FeatureServer *FeatureServerYamlConfig `yaml:"feature_server,omitempty"`
Materialization *MaterializationYamlConfig `yaml:"materialization,omitempty"`
OpenLineage *OpenLineageYamlConfig `yaml:"openlineage,omitempty"`
+ DataQualityMonitoring *DataQualityMonitoringYamlConfig `yaml:"dqm,omitempty"`
}
// FeatureServerYamlConfig maps to the feature_server section of feature_store.yaml.
@@ -298,6 +299,11 @@ type MetricsYamlConfig struct {
Categories map[string]interface{} `yaml:",inline,omitempty"`
}
+// DataQualityMonitoringYamlConfig mirrors the Python DqmConfig in feature_store.yaml.
+type DataQualityMonitoringYamlConfig struct {
+ AutoBaseline bool `yaml:"auto_baseline"`
+}
+
// MaterializationYamlConfig maps to the materialization section of feature_store.yaml.
// ExtraConfig is merged inline so future Feast MaterializationConfig fields appear
// at the same YAML level as the typed fields above.
diff --git a/sdk/python/feast/api/registry/rest/__init__.py b/sdk/python/feast/api/registry/rest/__init__.py
index 14db40d7af6..6cc5a99934a 100644
--- a/sdk/python/feast/api/registry/rest/__init__.py
+++ b/sdk/python/feast/api/registry/rest/__init__.py
@@ -7,6 +7,7 @@
from feast.api.registry.rest.features import get_feature_router
from feast.api.registry.rest.lineage import get_lineage_router
from feast.api.registry.rest.metrics import get_metrics_router
+from feast.api.registry.rest.monitoring import get_monitoring_router
from feast.api.registry.rest.permissions import get_permission_router
from feast.api.registry.rest.projects import get_project_router
from feast.api.registry.rest.saved_datasets import get_saved_dataset_router
@@ -25,3 +26,4 @@ def register_all_routes(app: FastAPI, grpc_handler, server=None):
app.include_router(get_saved_dataset_router(grpc_handler))
app.include_router(get_search_router(grpc_handler))
app.include_router(get_metrics_router(grpc_handler, server))
+ app.include_router(get_monitoring_router(grpc_handler, server))
diff --git a/sdk/python/feast/api/registry/rest/monitoring.py b/sdk/python/feast/api/registry/rest/monitoring.py
new file mode 100644
index 00000000000..9cd7257f667
--- /dev/null
+++ b/sdk/python/feast/api/registry/rest/monitoring.py
@@ -0,0 +1,376 @@
+import logging
+from datetime import date
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel, Field
+
+from feast.infra.offline_stores.offline_store import OfflineStore
+from feast.permissions.action import AuthzedAction
+from feast.permissions.security_manager import assert_permissions
+
+logger = logging.getLogger(__name__)
+
+VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES
+
+
+class ComputeMetricsRequest(BaseModel):
+ project: str
+ feature_view_name: Optional[str] = None
+ feature_names: Optional[List[str]] = None
+ start_date: Optional[str] = None
+ end_date: Optional[str] = None
+ granularity: str = Field("daily")
+ set_baseline: bool = False
+
+
+class AutoComputeRequest(BaseModel):
+ project: str
+ feature_view_name: Optional[str] = None
+
+
+class ComputeLogMetricsRequest(BaseModel):
+ project: str
+ feature_service_name: str
+ start_date: Optional[str] = None
+ end_date: Optional[str] = None
+ granularity: str = Field("daily")
+ set_baseline: bool = False
+
+
+class AutoComputeLogRequest(BaseModel):
+ project: str
+ feature_service_name: Optional[str] = None
+
+
+class ComputeTransientRequest(BaseModel):
+ project: str
+ feature_view_name: str
+ feature_names: Optional[List[str]] = None
+ start_date: Optional[str] = None
+ end_date: Optional[str] = None
+
+
+def get_monitoring_router(grpc_handler, server=None):
+ router = APIRouter()
+
+ _monitoring_service = None
+
+ def _get_monitoring_service():
+ nonlocal _monitoring_service
+ if _monitoring_service is None:
+ from feast.monitoring.monitoring_service import MonitoringService
+
+ store = server.store if server else grpc_handler.store
+ _monitoring_service = MonitoringService(store)
+ return _monitoring_service
+
+ def _get_store():
+ return server.store if server else grpc_handler.store
+
+ # ------------------------------------------------------------------ #
+ # DQM Job: submit and track
+ # ------------------------------------------------------------------ #
+
+ @router.post("/monitoring/compute", tags=["Monitoring"])
+ async def compute_metrics(request: ComputeMetricsRequest):
+ """Submit a DQM job to compute and store metrics. Returns job_id."""
+ if request.granularity not in VALID_GRANULARITIES:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Invalid granularity '{request.granularity}'. "
+ f"Must be one of {VALID_GRANULARITIES}",
+ )
+
+ store = _get_store()
+ if request.feature_view_name:
+ fv = store.registry.get_feature_view(
+ name=request.feature_view_name, project=request.project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.UPDATE])
+
+ svc = _get_monitoring_service()
+
+ params: Dict[str, Any] = {}
+ if request.start_date:
+ params["start_date"] = request.start_date
+ if request.end_date:
+ params["end_date"] = request.end_date
+ if request.feature_names:
+ params["feature_names"] = request.feature_names
+ params["granularity"] = request.granularity
+ params["set_baseline"] = request.set_baseline
+
+ job_id = svc.submit_job(
+ project=request.project,
+ job_type="compute",
+ feature_view_name=request.feature_view_name,
+ parameters=params,
+ )
+
+ # Execute synchronously for now; async worker is a future enhancement
+ try:
+ result = svc.execute_job(job_id)
+ return {"job_id": job_id, **result}
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+ @router.post("/monitoring/auto_compute", tags=["Monitoring"])
+ async def auto_compute(request: AutoComputeRequest):
+ """Auto-detect date ranges and compute all granularities."""
+ store = _get_store()
+ if request.feature_view_name:
+ fv = store.registry.get_feature_view(
+ name=request.feature_view_name, project=request.project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.UPDATE])
+
+ svc = _get_monitoring_service()
+
+ job_id = svc.submit_job(
+ project=request.project,
+ job_type="auto_compute",
+ feature_view_name=request.feature_view_name,
+ )
+
+ try:
+ result = svc.execute_job(job_id)
+ return {"job_id": job_id, **result}
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+ # ------------------------------------------------------------------ #
+ # Log source: compute from feature serving logs
+ # ------------------------------------------------------------------ #
+
+ @router.post("/monitoring/compute/log", tags=["Monitoring"])
+ async def compute_log_metrics(request: ComputeLogMetricsRequest):
+ """Compute metrics from feature serving logs for a feature service."""
+ if request.granularity not in VALID_GRANULARITIES:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Invalid granularity '{request.granularity}'. "
+ f"Must be one of {VALID_GRANULARITIES}",
+ )
+
+ store = _get_store()
+ fs = store.registry.get_feature_service(
+ name=request.feature_service_name, project=request.project
+ )
+ assert_permissions(fs, actions=[AuthzedAction.UPDATE])
+
+ svc = _get_monitoring_service()
+
+ start_d = date.fromisoformat(request.start_date) if request.start_date else None
+ end_d = date.fromisoformat(request.end_date) if request.end_date else None
+
+ try:
+ result = svc.compute_log_metrics(
+ project=request.project,
+ feature_service_name=request.feature_service_name,
+ start_date=start_d,
+ end_date=end_d,
+ granularity=request.granularity,
+ set_baseline=request.set_baseline,
+ )
+ return result
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+ @router.post("/monitoring/auto_compute/log", tags=["Monitoring"])
+ async def auto_compute_log(request: AutoComputeLogRequest):
+ """Auto-detect date ranges from log data and compute all granularities."""
+ store = _get_store()
+ if request.feature_service_name:
+ fs = store.registry.get_feature_service(
+ name=request.feature_service_name, project=request.project
+ )
+ assert_permissions(fs, actions=[AuthzedAction.UPDATE])
+
+ svc = _get_monitoring_service()
+ try:
+ result = svc.auto_compute_log_metrics(
+ project=request.project,
+ feature_service_name=request.feature_service_name,
+ )
+ return result
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+ @router.get("/monitoring/jobs/{job_id}", tags=["Monitoring"])
+ async def get_job_status(job_id: str):
+ svc = _get_monitoring_service()
+ job = svc.get_job(job_id)
+ if job is None:
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
+ return job
+
+ # ------------------------------------------------------------------ #
+ # Transient compute (not stored)
+ # ------------------------------------------------------------------ #
+
+ @router.post("/monitoring/compute/transient", tags=["Monitoring"])
+ async def compute_transient(request: ComputeTransientRequest):
+ """Compute metrics on-the-fly for an arbitrary date range. Results are
+ returned directly and NOT persisted to the monitoring tables."""
+ store = _get_store()
+ fv = store.registry.get_feature_view(
+ name=request.feature_view_name, project=request.project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.DESCRIBE])
+
+ svc = _get_monitoring_service()
+
+ start_d = date.fromisoformat(request.start_date) if request.start_date else None
+ end_d = date.fromisoformat(request.end_date) if request.end_date else None
+
+ result = svc.compute_transient(
+ project=request.project,
+ feature_view_name=request.feature_view_name,
+ feature_names=request.feature_names,
+ start_date=start_d,
+ end_date=end_d,
+ )
+ return result
+
+ # ------------------------------------------------------------------ #
+ # Read endpoints
+ # ------------------------------------------------------------------ #
+
+ @router.get("/monitoring/metrics/features", tags=["Monitoring"])
+ async def get_feature_metrics(
+ project: str = Query(...),
+ feature_view_name: Optional[str] = Query(None),
+ feature_name: Optional[str] = Query(None),
+ feature_service_name: Optional[str] = Query(None),
+ granularity: Optional[str] = Query(None),
+ data_source_type: Optional[str] = Query(None),
+ start_date: Optional[str] = Query(None),
+ end_date: Optional[str] = Query(None),
+ ):
+ store = _get_store()
+ if feature_view_name:
+ fv = store.registry.get_feature_view(
+ name=feature_view_name, project=project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.DESCRIBE])
+
+ svc = _get_monitoring_service()
+ return svc.get_feature_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ feature_view_name=feature_view_name,
+ feature_name=feature_name,
+ granularity=granularity,
+ data_source_type=data_source_type,
+ start_date=date.fromisoformat(start_date) if start_date else None,
+ end_date=date.fromisoformat(end_date) if end_date else None,
+ )
+
+ @router.get("/monitoring/metrics/feature_views", tags=["Monitoring"])
+ async def get_feature_view_metrics(
+ project: str = Query(...),
+ feature_view_name: Optional[str] = Query(None),
+ feature_service_name: Optional[str] = Query(None),
+ granularity: Optional[str] = Query(None),
+ data_source_type: Optional[str] = Query(None),
+ start_date: Optional[str] = Query(None),
+ end_date: Optional[str] = Query(None),
+ ):
+ store = _get_store()
+ if feature_view_name:
+ fv = store.registry.get_feature_view(
+ name=feature_view_name, project=project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.DESCRIBE])
+
+ svc = _get_monitoring_service()
+ return svc.get_feature_view_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ feature_view_name=feature_view_name,
+ granularity=granularity,
+ data_source_type=data_source_type,
+ start_date=date.fromisoformat(start_date) if start_date else None,
+ end_date=date.fromisoformat(end_date) if end_date else None,
+ )
+
+ @router.get("/monitoring/metrics/feature_services", tags=["Monitoring"])
+ async def get_feature_service_metrics(
+ project: str = Query(...),
+ feature_service_name: Optional[str] = Query(None),
+ granularity: Optional[str] = Query(None),
+ data_source_type: Optional[str] = Query(None),
+ start_date: Optional[str] = Query(None),
+ end_date: Optional[str] = Query(None),
+ ):
+ store = _get_store()
+ if feature_service_name:
+ fs = store.registry.get_feature_service(
+ name=feature_service_name, project=project
+ )
+ assert_permissions(fs, actions=[AuthzedAction.DESCRIBE])
+
+ svc = _get_monitoring_service()
+ return svc.get_feature_service_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ granularity=granularity,
+ data_source_type=data_source_type,
+ start_date=date.fromisoformat(start_date) if start_date else None,
+ end_date=date.fromisoformat(end_date) if end_date else None,
+ )
+
+ @router.get("/monitoring/metrics/baseline", tags=["Monitoring"])
+ async def get_baseline(
+ project: str = Query(...),
+ feature_view_name: Optional[str] = Query(None),
+ feature_name: Optional[str] = Query(None),
+ data_source_type: Optional[str] = Query(None),
+ ):
+ store = _get_store()
+ if feature_view_name:
+ fv = store.registry.get_feature_view(
+ name=feature_view_name, project=project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.DESCRIBE])
+
+ svc = _get_monitoring_service()
+ return svc.get_baseline(
+ project=project,
+ feature_view_name=feature_view_name,
+ feature_name=feature_name,
+ data_source_type=data_source_type,
+ )
+
+ @router.get("/monitoring/metrics/timeseries", tags=["Monitoring"])
+ async def get_timeseries(
+ project: str = Query(...),
+ feature_view_name: Optional[str] = Query(None),
+ feature_name: Optional[str] = Query(None),
+ feature_service_name: Optional[str] = Query(None),
+ granularity: Optional[str] = Query(None),
+ data_source_type: Optional[str] = Query(None),
+ start_date: Optional[str] = Query(None),
+ end_date: Optional[str] = Query(None),
+ ):
+ store = _get_store()
+ if feature_view_name:
+ fv = store.registry.get_feature_view(
+ name=feature_view_name, project=project
+ )
+ assert_permissions(fv, actions=[AuthzedAction.DESCRIBE])
+
+ svc = _get_monitoring_service()
+ return svc.get_timeseries(
+ project=project,
+ feature_view_name=feature_view_name,
+ feature_name=feature_name,
+ feature_service_name=feature_service_name,
+ granularity=granularity,
+ data_source_type=data_source_type,
+ start_date=date.fromisoformat(start_date) if start_date else None,
+ end_date=date.fromisoformat(end_date) if end_date else None,
+ )
+
+ return router
diff --git a/sdk/python/feast/cli/cli.py b/sdk/python/feast/cli/cli.py
index 886c91f69ae..726e215a1b9 100644
--- a/sdk/python/feast/cli/cli.py
+++ b/sdk/python/feast/cli/cli.py
@@ -35,6 +35,7 @@
get_historical_features,
get_online_features,
)
+from feast.cli.monitor import monitor_cmd
from feast.cli.on_demand_feature_views import on_demand_feature_views_cmd
from feast.cli.permissions import feast_permissions_cmd
from feast.cli.projects import projects_cmd
@@ -650,6 +651,7 @@ def demo_notebooks_command(ctx: click.Context, output_dir: str, overwrite: bool)
cli.add_command(serve_registry_command)
cli.add_command(serve_transformations_command)
cli.add_command(dbt_cmd)
+cli.add_command(monitor_cmd)
if __name__ == "__main__":
cli()
diff --git a/sdk/python/feast/cli/monitor.py b/sdk/python/feast/cli/monitor.py
new file mode 100644
index 00000000000..3741700cea0
--- /dev/null
+++ b/sdk/python/feast/cli/monitor.py
@@ -0,0 +1,206 @@
+from datetime import date
+from typing import List, Optional
+
+import click
+
+from feast.infra.offline_stores.offline_store import OfflineStore
+from feast.repo_operations import create_feature_store
+
+VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES
+
+
+@click.group(name="monitor")
+def monitor_cmd():
+ """Feature monitoring commands."""
+ pass
+
+
+@monitor_cmd.command("run")
+@click.option(
+ "--project",
+ "-p",
+ default=None,
+ help="Feast project name. Defaults to the project in feature_store.yaml.",
+)
+@click.option(
+ "--feature-view",
+ "-v",
+ default=None,
+ help="Feature view name. If omitted, all feature views are computed.",
+)
+@click.option(
+ "--feature-name",
+ "-f",
+ multiple=True,
+ help="Feature name(s) to compute. Can be specified multiple times.",
+)
+@click.option(
+ "--start-date",
+ default=None,
+ help="Start date (YYYY-MM-DD). If omitted, auto-detected from source data.",
+)
+@click.option(
+ "--end-date",
+ default=None,
+ help="End date (YYYY-MM-DD). If omitted, auto-detected from source data.",
+)
+@click.option(
+ "--granularity",
+ "-g",
+ default=None,
+ type=click.Choice(list(VALID_GRANULARITIES)),
+ help="Metric granularity. If omitted, all granularities are computed (auto mode).",
+)
+@click.option(
+ "--set-baseline",
+ is_flag=True,
+ default=False,
+ help="Mark this computation as the baseline for drift detection.",
+)
+@click.option(
+ "--source-type",
+ type=click.Choice(["batch", "log", "all"]),
+ default="batch",
+ help="Data source type: 'batch' (offline store), 'log' (serving logs), or 'all'.",
+)
+@click.pass_context
+def monitor_run(
+ ctx: click.Context,
+ project: Optional[str],
+ feature_view: Optional[str],
+ feature_name: tuple,
+ start_date: Optional[str],
+ end_date: Optional[str],
+ granularity: Optional[str],
+ set_baseline: bool,
+ source_type: str,
+):
+ """Compute feature quality metrics.
+
+ Without --start-date/--end-date/--granularity, runs in auto mode:
+ detects date ranges from source data and computes all granularities.
+
+ Use --source-type log to compute metrics from feature serving logs
+ (requires feature services with logging configured).
+ """
+ store = create_feature_store(ctx)
+
+ if project is None:
+ project = store.project
+
+ from feast.monitoring.monitoring_service import MonitoringService
+
+ svc = MonitoringService(store)
+
+ auto_mode = start_date is None and end_date is None and granularity is None
+ feat_names: Optional[List[str]] = list(feature_name) if feature_name else None
+
+ if source_type in ("batch", "all"):
+ _run_batch_monitoring(
+ svc,
+ project,
+ feature_view,
+ feat_names,
+ start_date,
+ end_date,
+ granularity,
+ set_baseline,
+ auto_mode,
+ )
+
+ if source_type in ("log", "all"):
+ _run_log_monitoring(
+ svc,
+ project,
+ feature_view,
+ start_date,
+ end_date,
+ granularity,
+ auto_mode,
+ )
+
+
+def _run_batch_monitoring(
+ svc,
+ project,
+ feature_view,
+ feat_names,
+ start_date,
+ end_date,
+ granularity,
+ set_baseline,
+ auto_mode,
+):
+ if auto_mode and not set_baseline:
+ click.echo("Auto-computing batch metrics for all granularities...")
+ result = svc.auto_compute(
+ project=project,
+ feature_view_name=feature_view,
+ )
+ click.echo(f"Status: {result['status']}")
+ click.echo(f"Feature views computed: {result['computed_feature_views']}")
+ click.echo(f"Features computed: {result['computed_features']}")
+ click.echo(f"Granularities: {', '.join(result['granularities'])}")
+ click.echo(f"Duration: {result['duration_ms']}ms")
+ else:
+ start_d = date.fromisoformat(start_date) if start_date else None
+ end_d = date.fromisoformat(end_date) if end_date else None
+
+ result = svc.compute_metrics(
+ project=project,
+ feature_view_name=feature_view,
+ feature_names=feat_names,
+ start_date=start_d,
+ end_date=end_d,
+ granularity=granularity or "daily",
+ set_baseline=set_baseline,
+ )
+
+ click.echo(f"Status: {result['status']}")
+ click.echo(f"Granularity: {result['granularity']}")
+ click.echo(f"Features computed: {result['computed_features']}")
+ click.echo(f"Feature views computed: {result['computed_feature_views']}")
+ click.echo(f"Feature services computed: {result['computed_feature_services']}")
+ click.echo(f"Metric dates: {', '.join(result['metric_dates'])}")
+ click.echo(f"Duration: {result['duration_ms']}ms")
+
+ if set_baseline:
+ click.echo("Baseline: SET")
+
+
+def _run_log_monitoring(
+ svc, project, feature_service_name, start_date, end_date, granularity, auto_mode
+):
+ if auto_mode:
+ click.echo("Auto-computing log metrics for all granularities...")
+ result = svc.auto_compute_log_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ )
+ click.echo(f"Status: {result['status']}")
+ click.echo(f"Feature services computed: {result['computed_feature_services']}")
+ click.echo(f"Features computed: {result['computed_features']}")
+ click.echo(f"Granularities: {', '.join(result['granularities'])}")
+ click.echo(f"Duration: {result['duration_ms']}ms")
+ else:
+ if not feature_service_name:
+ click.echo(
+ "Error: --feature-view (as feature service name) is required for log source with explicit dates."
+ )
+ return
+
+ start_d = date.fromisoformat(start_date) if start_date else None
+ end_d = date.fromisoformat(end_date) if end_date else None
+
+ result = svc.compute_log_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ start_date=start_d,
+ end_date=end_d,
+ granularity=granularity or "daily",
+ )
+
+ click.echo(f"Status: {result['status']}")
+ click.echo("Source: log")
+ click.echo(f"Features computed: {result.get('computed_features', 0)}")
+ click.echo(f"Duration: {result['duration_ms']}ms")
diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py
index f60eeb9d87d..58cd06f49e2 100644
--- a/sdk/python/feast/feature_server.py
+++ b/sdk/python/feast/feature_server.py
@@ -148,28 +148,71 @@ class ChatRequest(BaseModel):
messages: List[ChatMessage]
-def _resolve_feature_counts(
+def _parse_feature_info(
features: Union[List[str], "feast.FeatureService"],
) -> tuple:
- """Return (feature_count, feature_view_count) from the resolved features.
+ """Return ``(feature_view_names, feature_count)`` from resolved features.
``features`` is either a list of ``"feature_view:feature"`` strings or
a ``FeatureService`` with ``feature_view_projections``.
+
+ Returns:
+ (fv_names, feat_count) where fv_names is a list of unique feature
+ view name strings and feat_count is the total number of features.
"""
from feast.feature_service import FeatureService
+ from feast.utils import _parse_feature_ref
if isinstance(features, FeatureService):
projections = features.feature_view_projections
- fv_count = len(projections)
+ fv_names = [p.name for p in projections]
feat_count = sum(len(p.features) for p in projections)
elif isinstance(features, list):
feat_count = len(features)
- fv_names = {ref.split(":")[0].split("@")[0] for ref in features if ":" in ref}
- fv_count = len(fv_names)
+ fv_names = list({_parse_feature_ref(ref)[0] for ref in features if ":" in ref})
else:
+ fv_names = []
feat_count = 0
- fv_count = 0
- return str(feat_count), str(fv_count)
+ return fv_names, feat_count
+
+
+def _resolve_feature_counts(
+ features: Union[List[str], "feast.FeatureService"],
+) -> tuple:
+ """Return ``(feature_count_str, feature_view_count_str)`` for Prometheus labels."""
+ fv_names, feat_count = _parse_feature_info(features)
+ return str(feat_count), str(len(fv_names))
+
+
+def _emit_online_audit(
+ request: GetOnlineFeaturesRequest,
+ features: Union[List[str], "feast.FeatureService"],
+ entity_count: int,
+ status: str,
+ latency_ms: float,
+):
+ """Best-effort audit log emission for online feature requests."""
+ try:
+ from feast.permissions.security_manager import get_security_manager
+
+ requestor_id = "anonymous"
+ sm = get_security_manager()
+ if sm and sm.current_user:
+ requestor_id = sm.current_user.username or "anonymous"
+
+ fv_names, feat_count = _parse_feature_info(features)
+
+ feast_metrics.emit_online_audit_log(
+ requestor_id=requestor_id,
+ entity_keys=list(request.entities.keys()),
+ entity_count=entity_count,
+ feature_views=fv_names,
+ feature_count=feat_count,
+ status=status,
+ latency_ms=latency_ms,
+ )
+ except Exception:
+ logger.warning("Failed to emit online audit log", exc_info=True)
async def _get_features(
@@ -387,11 +430,22 @@ async def get_online_features(request: GetOnlineFeaturesRequest) -> Any:
include_feature_view_version_metadata=request.include_feature_view_version_metadata,
)
- if store._get_provider().async_supported.online.read:
- response = await store.get_online_features_async(**read_params) # type: ignore
- else:
- response = await run_in_threadpool(
- lambda: store.get_online_features(**read_params) # type: ignore
+ audit_start_ms = time.monotonic() * 1000
+ audit_status = "success"
+ try:
+ if store._get_provider().async_supported.online.read:
+ response = await store.get_online_features_async(**read_params) # type: ignore
+ else:
+ response = await run_in_threadpool(
+ lambda: store.get_online_features(**read_params) # type: ignore
+ )
+ except Exception:
+ audit_status = "error"
+ raise
+ finally:
+ audit_latency_ms = time.monotonic() * 1000 - audit_start_ms
+ _emit_online_audit(
+ request, features, entity_count, audit_status, audit_latency_ms
)
response_dict = await run_in_threadpool(
diff --git a/sdk/python/feast/infra/feature_servers/base_config.py b/sdk/python/feast/infra/feature_servers/base_config.py
index df324dc57d3..14ad2fe505e 100644
--- a/sdk/python/feast/infra/feature_servers/base_config.py
+++ b/sdk/python/feast/infra/feature_servers/base_config.py
@@ -82,6 +82,17 @@ class MetricsConfig(FeastConfigBaseModel):
"""Emit per-feature-view freshness gauges
(feast_feature_freshness_seconds)."""
+ offline_features: StrictBool = True
+ """Emit offline store retrieval metrics
+ (feast_offline_store_request_total,
+ feast_offline_store_request_latency_seconds,
+ feast_offline_store_row_count)."""
+
+ audit_logging: StrictBool = False
+ """Emit structured JSON audit log entries for online and offline
+ feature requests via the ``feast.audit`` logger. Captures requestor
+ identity, entity keys, feature views, row counts, and latency."""
+
class BaseFeatureServerConfig(FeastConfigBaseModel):
"""Base Feature Server config that should be extended"""
diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py
index 3d0f84bb3a5..7e563dde905 100644
--- a/sdk/python/feast/infra/offline_stores/bigquery.py
+++ b/sdk/python/feast/infra/offline_stores/bigquery.py
@@ -1,7 +1,8 @@
import contextlib
+import json
import tempfile
import uuid
-from datetime import date, datetime, timedelta
+from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from typing import (
Any,
@@ -42,6 +43,17 @@
RetrievalMetadata,
)
from feast.infra.registry.base_registry import BaseRegistry
+from feast.monitoring.monitoring_utils import (
+ MON_TABLE_FEATURE,
+ MON_TABLE_FEATURE_SERVICE,
+ MON_TABLE_FEATURE_VIEW,
+ MON_TABLE_JOB,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ monitoring_table_meta,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.on_demand_feature_view import OnDemandFeatureView
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
@@ -477,6 +489,645 @@ def offline_write_batch(
def _escape_query_columns(columns: List[str]) -> List[str]:
return [f"`{x}`" for x in columns]
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ assert isinstance(data_source, BigQuerySource)
+ return _bq_compute_monitoring_metrics(
+ config,
+ data_source,
+ feature_columns,
+ timestamp_field,
+ start_date=start_date,
+ end_date=end_date,
+ histogram_bins=histogram_bins,
+ top_n=top_n,
+ )
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ assert isinstance(data_source, BigQuerySource)
+ return _bq_get_monitoring_max_timestamp(config, data_source, timestamp_field)
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ _bq_ensure_monitoring_tables(config)
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ _bq_save_monitoring_metrics(config, metric_type, metrics)
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ return _bq_query_monitoring_metrics(
+ config, project, metric_type, filters, start_date, end_date
+ )
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ _bq_clear_monitoring_baseline(
+ config, project, feature_view_name, feature_name, data_source_type
+ )
+
+
+# ------------------------------------------------------------------ #
+# BigQuery monitoring metrics (native)
+# ------------------------------------------------------------------ #
+
+
+def _bq_monitoring_table_fqn(config: RepoConfig, table_name: str) -> str:
+ assert isinstance(config.offline_store, BigQueryOfflineStoreConfig)
+ project_id = config.offline_store.project_id
+ if not project_id:
+ client = _get_bigquery_client(
+ project=config.offline_store.billing_project_id,
+ location=config.offline_store.location,
+ )
+ project_id = client.project
+ return f"`{project_id}.{config.offline_store.dataset}.{table_name}`"
+
+
+def _bq_scalar_param_type(column: str) -> str:
+ if column == "is_baseline":
+ return "BOOL"
+ if column == "metric_date":
+ return "DATE"
+ if column == "computed_at":
+ return "TIMESTAMP"
+ if column in {
+ "row_count",
+ "null_count",
+ "total_row_count",
+ "total_features",
+ "features_with_nulls",
+ "total_feature_views",
+ }:
+ return "INT64"
+ if column in {
+ "null_rate",
+ "mean",
+ "stddev",
+ "min_val",
+ "max_val",
+ "p50",
+ "p75",
+ "p90",
+ "p95",
+ "p99",
+ "avg_null_rate",
+ "max_null_rate",
+ }:
+ return "FLOAT64"
+ return "STRING"
+
+
+def _bq_merge_row(
+ config: RepoConfig,
+ table_fqn: str,
+ columns: List[str],
+ pk_columns: List[str],
+ row: Dict[str, Any],
+) -> None:
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ non_pk = [c for c in columns if c not in pk_columns]
+ params: List[Any] = []
+ using_parts: List[str] = []
+ on_parts: List[str] = []
+ merge_idx = 0
+ for col in columns:
+ p = f"p{merge_idx}"
+ merge_idx += 1
+ val = row.get(col)
+ if col == "histogram" and val is not None and not isinstance(val, str):
+ val = json.dumps(val)
+ param_type = _bq_scalar_param_type(col)
+ params.append(bigquery.ScalarQueryParameter(p, param_type, val))
+ using_parts.append(f"@{p} AS {col}")
+ on_parts = [f"T.{col} = S.{col}" for col in pk_columns]
+ update_set = ", ".join(f"{c} = S.{c}" for c in non_pk)
+ merge_sql = f"""
+MERGE {table_fqn} T
+USING (SELECT {", ".join(using_parts)}) S
+ON {" AND ".join(on_parts)}
+WHEN MATCHED THEN UPDATE SET {update_set}
+WHEN NOT MATCHED THEN INSERT ({", ".join(columns)}) VALUES ({", ".join(f"S.{c}" for c in columns)})
+"""
+ job_config = bigquery.QueryJobConfig(query_parameters=params)
+ client.query(merge_sql, job_config=job_config).result()
+
+
+def _bq_save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+) -> None:
+ table_short, columns, pk_columns = monitoring_table_meta(metric_type)
+ table_fqn = _bq_monitoring_table_fqn(config, table_short)
+ for row in metrics:
+ _bq_merge_row(config, table_fqn, columns, pk_columns, row)
+
+
+def _bq_query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+) -> List[Dict[str, Any]]:
+ table_short, columns, _ = monitoring_table_meta(metric_type)
+ table_fqn = _bq_monitoring_table_fqn(config, table_short)
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ params: List[Any] = []
+ conditions: List[str] = []
+ if project:
+ params.append(
+ bigquery.ScalarQueryParameter("project", "STRING", project),
+ )
+ conditions.append("project_id = @project")
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ conditions.append(f"`{key}` = @{key}")
+ params.append(
+ bigquery.ScalarQueryParameter(
+ key, _bq_scalar_param_type(key), value
+ )
+ )
+ if start_date:
+ conditions.append("metric_date >= @start_date")
+ params.append(bigquery.ScalarQueryParameter("start_date", "DATE", start_date))
+ if end_date:
+ conditions.append("metric_date <= @end_date")
+ params.append(bigquery.ScalarQueryParameter("end_date", "DATE", end_date))
+ col_list = ", ".join(f"`{c}`" for c in columns)
+ where_sql = " AND ".join(conditions) if conditions else "TRUE"
+ order_col = "metric_date" if "metric_date" in columns else "job_id"
+ sql = f"SELECT {col_list} FROM {table_fqn} WHERE {where_sql} ORDER BY `{order_col}` ASC"
+ job_config = bigquery.QueryJobConfig(query_parameters=params)
+ job = client.query(sql, job_config=job_config)
+ job.result()
+ results: List[Dict[str, Any]] = []
+ for r in job:
+ record = {columns[i]: r[i] for i in range(len(columns))}
+ results.append(normalize_monitoring_row(record))
+ return results
+
+
+def _bq_clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+) -> None:
+ table_fqn = _bq_monitoring_table_fqn(config, MON_TABLE_FEATURE)
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ params: List[Any] = [
+ bigquery.ScalarQueryParameter("project", "STRING", project),
+ ]
+ conditions = ["project_id = @project", "is_baseline = TRUE"]
+ if feature_view_name:
+ conditions.append("feature_view_name = @feature_view_name")
+ params.append(
+ bigquery.ScalarQueryParameter(
+ "feature_view_name", "STRING", feature_view_name
+ )
+ )
+ if feature_name:
+ conditions.append("feature_name = @feature_name")
+ params.append(
+ bigquery.ScalarQueryParameter("feature_name", "STRING", feature_name)
+ )
+ if data_source_type:
+ conditions.append("data_source_type = @data_source_type")
+ params.append(
+ bigquery.ScalarQueryParameter(
+ "data_source_type", "STRING", data_source_type
+ )
+ )
+ sql = f"UPDATE {table_fqn} SET is_baseline = FALSE WHERE {' AND '.join(conditions)}"
+ job_config = bigquery.QueryJobConfig(query_parameters=params)
+ client.query(sql, job_config=job_config).result()
+
+
+def _bq_ensure_monitoring_tables(config: RepoConfig) -> None:
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ ds = config.offline_store.dataset
+ proj = config.offline_store.project_id or client.project
+ feature_ddl = f"""
+CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE}` (
+ project_id STRING NOT NULL,
+ feature_view_name STRING NOT NULL,
+ feature_name STRING NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity STRING NOT NULL,
+ data_source_type STRING NOT NULL,
+ computed_at TIMESTAMP NOT NULL,
+ is_baseline BOOL NOT NULL,
+ feature_type STRING NOT NULL,
+ row_count INT64,
+ null_count INT64,
+ null_rate FLOAT64,
+ mean FLOAT64,
+ stddev FLOAT64,
+ min_val FLOAT64,
+ max_val FLOAT64,
+ p50 FLOAT64,
+ p75 FLOAT64,
+ p90 FLOAT64,
+ p95 FLOAT64,
+ p99 FLOAT64,
+ histogram STRING
+)
+PRIMARY KEY (project_id, feature_view_name, feature_name, metric_date, granularity, data_source_type) NOT ENFORCED
+"""
+ view_ddl = f"""
+CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE_VIEW}` (
+ project_id STRING NOT NULL,
+ feature_view_name STRING NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity STRING NOT NULL,
+ data_source_type STRING NOT NULL,
+ computed_at TIMESTAMP NOT NULL,
+ is_baseline BOOL NOT NULL,
+ total_row_count INT64,
+ total_features INT64,
+ features_with_nulls INT64,
+ avg_null_rate FLOAT64,
+ max_null_rate FLOAT64
+)
+PRIMARY KEY (project_id, feature_view_name, metric_date, granularity, data_source_type) NOT ENFORCED
+"""
+ service_ddl = f"""
+CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE_SERVICE}` (
+ project_id STRING NOT NULL,
+ feature_service_name STRING NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity STRING NOT NULL,
+ data_source_type STRING NOT NULL,
+ computed_at TIMESTAMP NOT NULL,
+ is_baseline BOOL NOT NULL,
+ total_feature_views INT64,
+ total_features INT64,
+ avg_null_rate FLOAT64,
+ max_null_rate FLOAT64
+)
+PRIMARY KEY (project_id, feature_service_name, metric_date, granularity, data_source_type) NOT ENFORCED
+"""
+ job_ddl = f"""
+CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_JOB}` (
+ job_id STRING NOT NULL,
+ project_id STRING NOT NULL,
+ feature_view_name STRING,
+ job_type STRING NOT NULL,
+ status STRING NOT NULL,
+ parameters STRING,
+ metric_date DATE NOT NULL,
+ started_at TIMESTAMP,
+ completed_at TIMESTAMP,
+ error_message STRING,
+ result_summary STRING
+)
+PRIMARY KEY (job_id) NOT ENFORCED
+"""
+ for ddl in (feature_ddl, view_ddl, service_ddl, job_ddl):
+ client.query(ddl).result()
+
+
+def _bq_get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: BigQuerySource,
+ timestamp_field: str,
+) -> Optional[datetime]:
+ from_expression = data_source.get_table_query_string()
+ ts_col = f"`{timestamp_field}`"
+ sql = f"SELECT MAX({ts_col}) AS _max_ts FROM {from_expression}"
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ job = client.query(sql)
+ job.result()
+ rows = list(job)
+ if not rows or rows[0][0] is None:
+ return None
+ val = rows[0][0]
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ if isinstance(val, date):
+ return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc)
+ return val # type: ignore[no-any-return]
+
+
+def _bq_numeric_histogram(
+ config: RepoConfig,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = f"`{col_name}`"
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ if min_val == max_val:
+ sql = (
+ f"SELECT COUNT(*) AS cnt FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_filter}"
+ )
+ job = client.query(sql)
+ job.result()
+ hrows = list(job)
+ cnt = int(hrows[0][0]) if hrows else 0
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ bin_width = (max_val - min_val) / bins
+ sql = f"""
+SELECT
+ LEAST(
+ GREATEST(
+ CAST(FLOOR((CAST({q_col} AS FLOAT64) - {min_val}) / {bin_width}) AS INT64) + 1,
+ 1
+ ),
+ {bins}
+ ) AS bucket,
+ COUNT(*) AS cnt
+FROM {from_expression} AS _src
+WHERE {q_col} IS NOT NULL AND {ts_filter}
+GROUP BY bucket
+ORDER BY bucket
+"""
+ job = client.query(sql)
+ job.result()
+ rows = list(job)
+ counts = [0] * bins
+ for bucket, cnt in rows:
+ b = int(bucket)
+ if 1 <= b <= bins:
+ counts[b - 1] += int(cnt)
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _bq_numeric_stats(
+ config: RepoConfig,
+ from_expression: str,
+ feature_names: List[str],
+ ts_filter: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ select_parts: List[str] = ["COUNT(*) AS _row_count"]
+ for i, col in enumerate(feature_names):
+ q = f"`{col}`"
+ c = f"CAST({q} AS FLOAT64)"
+ select_parts.extend(
+ [
+ f"COUNT({q}) AS c{i}_nn",
+ f"AVG({c}) AS c{i}_avg",
+ f"STDDEV_SAMP({c}) AS c{i}_stddev",
+ f"MIN({c}) AS c{i}_min",
+ f"MAX({c}) AS c{i}_max",
+ f"APPROX_QUANTILES({c}, 100)[OFFSET(50)] AS c{i}_p50",
+ f"APPROX_QUANTILES({c}, 100)[OFFSET(75)] AS c{i}_p75",
+ f"APPROX_QUANTILES({c}, 100)[OFFSET(90)] AS c{i}_p90",
+ f"APPROX_QUANTILES({c}, 100)[OFFSET(95)] AS c{i}_p95",
+ f"APPROX_QUANTILES({c}, 100)[OFFSET(99)] AS c{i}_p99",
+ ]
+ )
+ query = (
+ f"SELECT {', '.join(select_parts)} "
+ f"FROM {from_expression} AS _src WHERE {ts_filter}"
+ )
+ job = client.query(query)
+ job.result()
+ rows = list(job)
+ if not rows:
+ return [empty_numeric_metric(n) for n in feature_names]
+ row = rows[0]
+ row_count = row["_row_count"] or 0
+ results: List[Dict[str, Any]] = []
+ for i, col in enumerate(feature_names):
+ base = f"c{i}_"
+ non_null = row[f"{base}nn"] or 0
+ null_count = int(row_count) - int(non_null)
+ min_v = opt_float(row[f"{base}min"])
+ max_v = opt_float(row[f"{base}max"])
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": int(row_count),
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(row[f"{base}avg"]),
+ "stddev": opt_float(row[f"{base}stddev"]),
+ "min_val": min_v,
+ "max_val": max_v,
+ "p50": opt_float(row[f"{base}p50"]),
+ "p75": opt_float(row[f"{base}p75"]),
+ "p90": opt_float(row[f"{base}p90"]),
+ "p95": opt_float(row[f"{base}p95"]),
+ "p99": opt_float(row[f"{base}p99"]),
+ "histogram": None,
+ }
+ if min_v is not None and max_v is not None and non_null and int(non_null) > 0:
+ result["histogram"] = _bq_numeric_histogram(
+ config,
+ from_expression,
+ col,
+ ts_filter,
+ histogram_bins,
+ min_v,
+ max_v,
+ )
+ results.append(result)
+ return results
+
+
+def _bq_categorical_stats(
+ config: RepoConfig,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = f"`{col_name}`"
+ project_id = (
+ config.offline_store.billing_project_id or config.offline_store.project_id
+ )
+ client = _get_bigquery_client(
+ project=project_id,
+ location=config.offline_store.location,
+ )
+ query = f"""
+WITH filtered AS (
+ SELECT * FROM {from_expression} AS _src WHERE {ts_filter}
+)
+SELECT
+ (SELECT COUNT(*) FROM filtered) AS row_count,
+ (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count,
+ (SELECT COUNT(DISTINCT {q_col}) FROM filtered WHERE {q_col} IS NOT NULL) AS unique_count,
+ CAST({q_col} AS STRING) AS value,
+ COUNT(*) AS cnt
+FROM filtered
+WHERE {q_col} IS NOT NULL
+GROUP BY CAST({q_col} AS STRING)
+ORDER BY cnt DESC
+LIMIT {int(top_n)}
+"""
+ job = client.query(query)
+ job.result()
+ rows = list(job)
+ if not rows:
+ return empty_categorical_metric(col_name)
+ row_count = rows[0]["row_count"]
+ null_count = rows[0]["null_count"]
+ unique_count = rows[0]["unique_count"]
+ top_entries = [{"value": r["value"], "count": r["cnt"]} for r in rows]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
+
+def _bq_compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: BigQuerySource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+) -> List[Dict[str, Any]]:
+ from_expression = data_source.get_table_query_string()
+ ts_filter = get_timestamp_filter_sql(
+ start_date,
+ end_date,
+ timestamp_field,
+ date_partition_column=data_source.date_partition_column,
+ quote_fields=False,
+ cast_style="timestamp_func",
+ )
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+ if numeric_features:
+ results.extend(
+ _bq_numeric_stats(
+ config,
+ from_expression,
+ numeric_features,
+ ts_filter,
+ histogram_bins,
+ )
+ )
+ for col_name in categorical_features:
+ results.append(
+ _bq_categorical_stats(config, from_expression, col_name, ts_filter, top_n)
+ )
+ return results
+
class BigQueryRetrievalJob(RetrievalJob):
def __init__(
diff --git a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py
index 43c37f8ec10..25517fa74c3 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py
@@ -1,6 +1,7 @@
-from datetime import datetime
+import json
+from datetime import date, datetime, timezone
from pathlib import Path
-from typing import Any, Callable, List, Literal, Optional, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
import ibis
import pandas as pd
@@ -22,7 +23,19 @@
write_logged_features_ibis,
)
from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob
+from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql
from feast.infra.registry.base_registry import BaseRegistry
+from feast.monitoring.monitoring_utils import (
+ MON_TABLE_FEATURE,
+ MON_TABLE_FEATURE_SERVICE,
+ MON_TABLE_FEATURE_VIEW,
+ MON_TABLE_JOB,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ monitoring_table_meta,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.utils import compute_non_entity_date_range
@@ -179,6 +192,299 @@ def _build_entity_df_from_feature_sources(
return pd.concat(entity_dfs, ignore_index=True).drop_duplicates()
+# ------------------------------------------------------------------ #
+# Oracle monitoring helpers
+# ------------------------------------------------------------------ #
+
+
+def _oracle_quote_ident(name: str) -> str:
+ return f'"{name}"'
+
+
+def _oracle_ts_where(ts_filter: str) -> str:
+ return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1"
+
+
+def _oracle_fetchall(con, sql: str):
+ cur = con.raw_sql(sql)
+ try:
+ return cur.fetchall()
+ finally:
+ if hasattr(cur, "close"):
+ cur.close()
+
+
+def _oracle_exec(con, sql: str) -> None:
+ cur = con.raw_sql(sql)
+ try:
+ pass
+ finally:
+ if hasattr(cur, "close"):
+ cur.close()
+
+
+def _oracle_numeric_histogram(
+ con,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = _oracle_quote_ident(col_name)
+
+ if min_val == max_val:
+ tw = _oracle_ts_where(ts_filter)
+ cnt_row = _oracle_fetchall(
+ con,
+ f"SELECT COUNT(*) FROM {from_expression} _src "
+ f"WHERE {q_col} IS NOT NULL AND {tw}",
+ )
+ cnt = (cnt_row[0][0] if cnt_row else 0) or 0
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ upper = max_val + (max_val - min_val) * 1e-10
+ bin_width = (max_val - min_val) / bins
+ bw = bin_width if bin_width != 0 else 1e-300
+
+ tw = _oracle_ts_where(ts_filter)
+ query = (
+ f"SELECT bucket, COUNT(*) AS cnt FROM ("
+ f" SELECT "
+ f" CASE WHEN {q_col} IS NULL THEN NULL "
+ f" WHEN {min_val} = {upper} THEN 1 "
+ f" ELSE LEAST(GREATEST("
+ f" FLOOR((CAST({q_col} AS NUMBER) - {min_val}) / {bw}) + 1, "
+ f" 1), {bins}) "
+ f" END AS bucket "
+ f" FROM {from_expression} _src "
+ f" WHERE {q_col} IS NOT NULL AND {tw}"
+ f") sub "
+ f"WHERE bucket IS NOT NULL "
+ f"GROUP BY bucket ORDER BY bucket"
+ )
+
+ rows = _oracle_fetchall(con, query)
+ counts = [0] * bins
+ for bucket, cnt in rows:
+ b = int(bucket)
+ if 1 <= b <= bins:
+ counts[b - 1] = cnt
+
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _oracle_numeric_stats(
+ con,
+ from_expression: str,
+ feature_names: List[str],
+ ts_filter: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ select_parts = ["COUNT(*)"]
+ for col in feature_names:
+ q = _oracle_quote_ident(col)
+ c = f"CAST({q} AS NUMBER)"
+ select_parts.extend(
+ [
+ f"COUNT({q})",
+ f"AVG({c})",
+ f"STDDEV_SAMP({c})",
+ f"MIN({c})",
+ f"MAX({c})",
+ f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})",
+ ]
+ )
+
+ tw = _oracle_ts_where(ts_filter)
+ query = f"SELECT {', '.join(select_parts)} FROM {from_expression} _src WHERE {tw}"
+
+ row = (_oracle_fetchall(con, query) or [None])[0]
+
+ if row is None:
+ return [empty_numeric_metric(n) for n in feature_names]
+
+ row_count = row[0]
+ results: List[Dict[str, Any]] = []
+
+ for i, col in enumerate(feature_names):
+ base = 1 + i * 10
+ non_null = row[base] or 0
+ null_count = row_count - non_null
+
+ min_val = opt_float(row[base + 3])
+ max_val = opt_float(row[base + 4])
+
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(row[base + 1]),
+ "stddev": opt_float(row[base + 2]),
+ "min_val": min_val,
+ "max_val": max_val,
+ "p50": opt_float(row[base + 5]),
+ "p75": opt_float(row[base + 6]),
+ "p90": opt_float(row[base + 7]),
+ "p95": opt_float(row[base + 8]),
+ "p99": opt_float(row[base + 9]),
+ "histogram": None,
+ }
+
+ if min_val is not None and max_val is not None and non_null > 0:
+ result["histogram"] = _oracle_numeric_histogram(
+ con,
+ from_expression,
+ col,
+ ts_filter,
+ histogram_bins,
+ min_val,
+ max_val,
+ )
+
+ results.append(result)
+
+ return results
+
+
+def _oracle_categorical_stats(
+ con,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = _oracle_quote_ident(col_name)
+
+ tw = _oracle_ts_where(ts_filter)
+ query = (
+ f"WITH filtered AS ("
+ f" SELECT * FROM {from_expression} _src WHERE {tw}"
+ f") "
+ f"SELECT "
+ f" (SELECT COUNT(*) FROM filtered) AS row_count, "
+ f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, "
+ f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered "
+ f" WHERE {q_col} IS NOT NULL) AS unique_count, "
+ f" TO_CHAR({q_col}) AS value, COUNT(*) AS cnt "
+ f"FROM filtered WHERE {q_col} IS NOT NULL "
+ f"GROUP BY {q_col} "
+ f"ORDER BY cnt DESC "
+ f"FETCH FIRST {int(top_n)} ROWS ONLY"
+ )
+
+ rows = _oracle_fetchall(con, query)
+
+ if not rows:
+ return empty_categorical_metric(col_name)
+
+ row_count = rows[0][0]
+ null_count = rows[0][1]
+ unique_count = rows[0][2]
+
+ top_entries = [{"value": r[3], "count": r[4]} for r in rows]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
+
+def _oracle_escape_literal(val: Any) -> str:
+ if val is None:
+ return "NULL"
+ if isinstance(val, bool):
+ return "1" if val else "0"
+ if isinstance(val, (int, float)) and not isinstance(val, bool):
+ return str(val)
+ if isinstance(val, datetime):
+ s = val.isoformat(sep=" ", timespec="seconds")
+ return f"TIMESTAMP '{s}'"
+ if isinstance(val, date):
+ return f"DATE '{val.isoformat()}'"
+ s = str(val).replace("'", "''")
+ return f"'{s}'"
+
+
+def _oracle_merge_metric_row(
+ con, table: str, columns: List[str], pk_cols: List[str], row: Dict[str, Any]
+) -> None:
+ def qid(c: str) -> str:
+ return f'"{c}"'
+
+ non_pk = [c for c in columns if c not in pk_cols]
+ vals = []
+ for c in columns:
+ v = row.get(c)
+ if c == "histogram" and v is not None and not isinstance(v, str):
+ v = json.dumps(v)
+ vals.append(_oracle_escape_literal(v))
+
+ join_cond = " AND ".join(f"t.{qid(c)} = s.{qid(c)}" for c in pk_cols)
+ insert_cols = ", ".join(qid(c) for c in columns)
+ insert_vals = ", ".join(f"s.{qid(c)}" for c in columns)
+ update_set = ", ".join(f"t.{qid(c)} = s.{qid(c)}" for c in non_pk)
+
+ src_select = ", ".join(
+ f"{vals[i]} AS {qid(columns[i])}" for i in range(len(columns))
+ )
+
+ sql = (
+ f"MERGE INTO {table} t "
+ f"USING (SELECT {src_select} FROM dual) s "
+ f"ON ({join_cond}) "
+ f"WHEN MATCHED THEN UPDATE SET {update_set} "
+ f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})"
+ )
+ _oracle_exec(con, sql)
+
+
+def _oracle_try_execute_ddl(con, ddl: str) -> None:
+ """Run DDL; ignore ORA-00955 (name already used)."""
+ escaped = ddl.strip().replace("'", "''")
+ plsql = (
+ "BEGIN\n"
+ f" EXECUTE IMMEDIATE '{escaped}';\n"
+ "EXCEPTION\n"
+ " WHEN OTHERS THEN\n"
+ " IF SQLCODE != -955 THEN RAISE;\n"
+ " END IF;\n"
+ "END;"
+ )
+ _oracle_exec(con, plsql)
+
+
class OracleOfflineStore(OfflineStore):
@staticmethod
def pull_latest_from_table_or_query(
@@ -306,3 +612,289 @@ def write_logged_features(
logging_config=logging_config,
registry=registry,
)
+
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, OracleOfflineStoreConfig)
+ assert isinstance(data_source, OracleSource)
+
+ from_expression = data_source.get_table_query_string()
+ ts_filter = get_timestamp_filter_sql(
+ start_date,
+ end_date,
+ timestamp_field,
+ tz=timezone.utc,
+ cast_style="timestamp",
+ date_time_separator=" ",
+ )
+
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+
+ con = get_ibis_connection(config)
+
+ if numeric_features:
+ results.extend(
+ _oracle_numeric_stats(
+ con,
+ from_expression,
+ numeric_features,
+ ts_filter,
+ histogram_bins,
+ )
+ )
+
+ for col_name in categorical_features:
+ results.append(
+ _oracle_categorical_stats(
+ con,
+ from_expression,
+ col_name,
+ ts_filter,
+ top_n,
+ )
+ )
+
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, OracleOfflineStoreConfig)
+ assert isinstance(data_source, OracleSource)
+
+ from_expression = data_source.get_table_query_string()
+ ts_col = _oracle_quote_ident(timestamp_field)
+
+ con = get_ibis_connection(config)
+ rows = _oracle_fetchall(
+ con,
+ f"SELECT MAX({ts_col}) FROM {from_expression} _src",
+ )
+ row = rows[0] if rows else None
+
+ if row is None or row[0] is None:
+ return None
+ val = row[0]
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ if isinstance(val, date):
+ return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc)
+ return None
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, OracleOfflineStoreConfig)
+ con = get_ibis_connection(config)
+
+ _oracle_try_execute_ddl(
+ con,
+ f"""
+ CREATE TABLE {MON_TABLE_FEATURE} (
+ project_id VARCHAR2(255) NOT NULL,
+ feature_view_name VARCHAR2(255) NOT NULL,
+ feature_name VARCHAR2(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL,
+ data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL,
+ computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
+ is_baseline NUMBER(1) DEFAULT 0 NOT NULL,
+ feature_type VARCHAR2(50) NOT NULL,
+ row_count NUMBER,
+ null_count NUMBER,
+ null_rate NUMBER,
+ mean NUMBER,
+ stddev NUMBER,
+ min_val NUMBER,
+ max_val NUMBER,
+ p50 NUMBER,
+ p75 NUMBER,
+ p90 NUMBER,
+ p95 NUMBER,
+ p99 NUMBER,
+ histogram VARCHAR2(4000),
+ CONSTRAINT pk_fm PRIMARY KEY (project_id, feature_view_name,
+ feature_name, metric_date, granularity, data_source_type)
+ )
+ """,
+ )
+
+ _oracle_try_execute_ddl(
+ con,
+ f"""
+ CREATE TABLE {MON_TABLE_FEATURE_VIEW} (
+ project_id VARCHAR2(255) NOT NULL,
+ feature_view_name VARCHAR2(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL,
+ data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL,
+ computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
+ is_baseline NUMBER(1) DEFAULT 0 NOT NULL,
+ total_row_count NUMBER,
+ total_features NUMBER,
+ features_with_nulls NUMBER,
+ avg_null_rate NUMBER,
+ max_null_rate NUMBER,
+ CONSTRAINT pk_fvm PRIMARY KEY (project_id, feature_view_name,
+ metric_date, granularity, data_source_type)
+ )
+ """,
+ )
+
+ _oracle_try_execute_ddl(
+ con,
+ f"""
+ CREATE TABLE {MON_TABLE_FEATURE_SERVICE} (
+ project_id VARCHAR2(255) NOT NULL,
+ feature_service_name VARCHAR2(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL,
+ data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL,
+ computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
+ is_baseline NUMBER(1) DEFAULT 0 NOT NULL,
+ total_feature_views NUMBER,
+ total_features NUMBER,
+ avg_null_rate NUMBER,
+ max_null_rate NUMBER,
+ CONSTRAINT pk_fsm PRIMARY KEY (project_id, feature_service_name,
+ metric_date, granularity, data_source_type)
+ )
+ """,
+ )
+
+ _oracle_try_execute_ddl(
+ con,
+ f"""
+ CREATE TABLE {MON_TABLE_JOB} (
+ job_id VARCHAR2(36) NOT NULL,
+ project_id VARCHAR2(255) NOT NULL,
+ feature_view_name VARCHAR2(255),
+ job_type VARCHAR2(50) NOT NULL,
+ status VARCHAR2(20) DEFAULT 'pending' NOT NULL,
+ parameters VARCHAR2(4000),
+ metric_date DATE NOT NULL,
+ started_at TIMESTAMP WITH TIME ZONE,
+ completed_at TIMESTAMP WITH TIME ZONE,
+ error_message VARCHAR2(4000),
+ result_summary VARCHAR2(4000),
+ CONSTRAINT pk_fmj PRIMARY KEY (job_id)
+ )
+ """,
+ )
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, OracleOfflineStoreConfig)
+
+ table, columns, pk_columns = monitoring_table_meta(metric_type)
+ con = get_ibis_connection(config)
+ for row in metrics:
+ _oracle_merge_metric_row(con, table, columns, pk_columns, row)
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, OracleOfflineStoreConfig)
+
+ table, columns, _ = monitoring_table_meta(metric_type)
+
+ conditions: list = []
+ if project:
+ conditions.append(
+ f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}"
+ )
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ conditions.append(
+ f"{_oracle_quote_ident(key)} = {_oracle_escape_literal(value)}"
+ )
+ if start_date:
+ conditions.append(
+ f"{_oracle_quote_ident('metric_date')} >= {_oracle_escape_literal(start_date)}"
+ )
+ if end_date:
+ conditions.append(
+ f"{_oracle_quote_ident('metric_date')} <= {_oracle_escape_literal(end_date)}"
+ )
+
+ where_sql = " AND ".join(conditions) if conditions else "1=1"
+ col_list = ", ".join(_oracle_quote_ident(c) for c in columns)
+ order_col = "metric_date" if "metric_date" in columns else "job_id"
+
+ con = get_ibis_connection(config)
+ rows = _oracle_fetchall(
+ con,
+ f"SELECT {col_list} FROM {table} WHERE {where_sql} "
+ f"ORDER BY {_oracle_quote_ident(order_col)} ASC",
+ )
+
+ results = []
+ for row in rows:
+ record = dict(zip(columns, row))
+ results.append(normalize_monitoring_row(record))
+
+ return results
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, OracleOfflineStoreConfig)
+
+ conditions = [
+ f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}"
+ ]
+ if feature_view_name:
+ conditions.append(
+ f"{_oracle_quote_ident('feature_view_name')} = "
+ f"{_oracle_escape_literal(feature_view_name)}"
+ )
+ if feature_name:
+ conditions.append(
+ f"{_oracle_quote_ident('feature_name')} = "
+ f"{_oracle_escape_literal(feature_name)}"
+ )
+ if data_source_type:
+ conditions.append(
+ f"{_oracle_quote_ident('data_source_type')} = "
+ f"{_oracle_escape_literal(data_source_type)}"
+ )
+ conditions.append(f"{_oracle_quote_ident('is_baseline')} = 1")
+
+ where_sql = " AND ".join(conditions)
+ con = get_ibis_connection(config)
+ _oracle_exec(
+ con,
+ f"UPDATE {MON_TABLE_FEATURE} SET {_oracle_quote_ident('is_baseline')} = 0 "
+ f"WHERE {where_sql}",
+ )
diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py
index 50e48208647..b9fccecba79 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py
@@ -1,6 +1,6 @@
import contextlib
from dataclasses import asdict
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
from enum import Enum
from typing import (
Any,
@@ -42,6 +42,17 @@
get_query_schema,
)
from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig
+from feast.monitoring.monitoring_utils import (
+ MON_TABLE_FEATURE,
+ MON_TABLE_FEATURE_SERVICE,
+ MON_TABLE_FEATURE_VIEW,
+ MON_TABLE_JOB,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ monitoring_table_meta,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.on_demand_feature_view import OnDemandFeatureView
from feast.repo_config import RepoConfig
from feast.saved_dataset import SavedDatasetStorage
@@ -289,6 +300,260 @@ def pull_all_from_table_or_query(
on_demand_feature_views=None,
)
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig)
+ assert isinstance(data_source, PostgreSQLSource)
+
+ from_expression = data_source.get_table_query_string()
+ ts_filter = get_timestamp_filter_sql(
+ start_date,
+ end_date,
+ timestamp_field,
+ tz=timezone.utc,
+ cast_style="timestamptz",
+ date_time_separator=" ",
+ )
+
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+
+ with _get_conn(config.offline_store) as conn:
+ conn.read_only = True
+
+ if numeric_features:
+ results.extend(
+ _sql_numeric_stats(
+ conn,
+ from_expression,
+ numeric_features,
+ ts_filter,
+ histogram_bins,
+ )
+ )
+
+ for col_name in categorical_features:
+ results.append(
+ _sql_categorical_stats(
+ conn,
+ from_expression,
+ col_name,
+ ts_filter,
+ top_n,
+ )
+ )
+
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig)
+ assert isinstance(data_source, PostgreSQLSource)
+
+ from_expression = data_source.get_table_query_string_with_alias("max_ts_alias")
+
+ with _get_conn(config.offline_store) as conn:
+ conn.read_only = True
+ with conn.cursor() as cur:
+ cur.execute(f'SELECT MAX("{timestamp_field}") FROM {from_expression}')
+ row = cur.fetchone()
+
+ if row is None or row[0] is None:
+ return None
+ val = row[0]
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc)
+
+ # ------------------------------------------------------------------ #
+ # Monitoring metrics storage (native PostgreSQL)
+ # ------------------------------------------------------------------ #
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig)
+ with _get_conn(config.offline_store) as conn, conn.cursor() as cur:
+ cur.execute(f"""
+ CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} (
+ project_id VARCHAR(255) NOT NULL,
+ feature_view_name VARCHAR(255) NOT NULL,
+ feature_name VARCHAR(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR(20) NOT NULL DEFAULT 'daily',
+ data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch',
+ computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+ is_baseline BOOLEAN NOT NULL DEFAULT FALSE,
+ feature_type VARCHAR(50) NOT NULL,
+ row_count BIGINT,
+ null_count BIGINT,
+ null_rate DOUBLE PRECISION,
+ mean DOUBLE PRECISION,
+ stddev DOUBLE PRECISION,
+ min_val DOUBLE PRECISION,
+ max_val DOUBLE PRECISION,
+ p50 DOUBLE PRECISION,
+ p75 DOUBLE PRECISION,
+ p90 DOUBLE PRECISION,
+ p95 DOUBLE PRECISION,
+ p99 DOUBLE PRECISION,
+ histogram JSONB,
+ PRIMARY KEY (project_id, feature_view_name, feature_name,
+ metric_date, granularity, data_source_type)
+ );
+ CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_project
+ ON {MON_TABLE_FEATURE} (project_id);
+ CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_view
+ ON {MON_TABLE_FEATURE} (project_id, feature_view_name);
+ CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_date
+ ON {MON_TABLE_FEATURE} (metric_date);
+ CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_granularity
+ ON {MON_TABLE_FEATURE} (granularity);
+ CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_baseline
+ ON {MON_TABLE_FEATURE} (project_id, feature_view_name, feature_name)
+ WHERE is_baseline = TRUE;
+ """)
+
+ cur.execute(f"""
+ CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} (
+ project_id VARCHAR(255) NOT NULL,
+ feature_view_name VARCHAR(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR(20) NOT NULL DEFAULT 'daily',
+ data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch',
+ computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+ is_baseline BOOLEAN NOT NULL DEFAULT FALSE,
+ total_row_count BIGINT,
+ total_features INTEGER,
+ features_with_nulls INTEGER,
+ avg_null_rate DOUBLE PRECISION,
+ max_null_rate DOUBLE PRECISION,
+ PRIMARY KEY (project_id, feature_view_name, metric_date,
+ granularity, data_source_type)
+ );
+ """)
+
+ cur.execute(f"""
+ CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} (
+ project_id VARCHAR(255) NOT NULL,
+ feature_service_name VARCHAR(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR(20) NOT NULL DEFAULT 'daily',
+ data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch',
+ computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+ is_baseline BOOLEAN NOT NULL DEFAULT FALSE,
+ total_feature_views INTEGER,
+ total_features INTEGER,
+ avg_null_rate DOUBLE PRECISION,
+ max_null_rate DOUBLE PRECISION,
+ PRIMARY KEY (project_id, feature_service_name, metric_date,
+ granularity, data_source_type)
+ );
+ """)
+
+ cur.execute(f"""
+ CREATE TABLE IF NOT EXISTS {MON_TABLE_JOB} (
+ job_id VARCHAR(36) PRIMARY KEY,
+ project_id VARCHAR(255) NOT NULL,
+ feature_view_name VARCHAR(255),
+ job_type VARCHAR(50) NOT NULL,
+ status VARCHAR(20) NOT NULL DEFAULT 'pending',
+ parameters TEXT,
+ metric_date DATE NOT NULL,
+ started_at TIMESTAMPTZ,
+ completed_at TIMESTAMPTZ,
+ error_message TEXT,
+ result_summary TEXT
+ );
+ CREATE INDEX IF NOT EXISTS idx_fm_jobs_status
+ ON {MON_TABLE_JOB} (status);
+ CREATE INDEX IF NOT EXISTS idx_fm_jobs_project
+ ON {MON_TABLE_JOB} (project_id);
+ """)
+ conn.commit()
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig)
+
+ table, columns, pk_columns = monitoring_table_meta(metric_type)
+ _mon_upsert(config.offline_store, table, columns, pk_columns, metrics)
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional["date"] = None,
+ end_date: Optional["date"] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig)
+
+ _, columns, _ = monitoring_table_meta(metric_type)
+ return _mon_query(
+ config.offline_store,
+ metric_type,
+ columns,
+ project,
+ filters,
+ start_date,
+ end_date,
+ )
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig)
+
+ conditions = [sql.SQL("project_id = %s")]
+ params: list = [project]
+
+ if feature_view_name:
+ conditions.append(sql.SQL("feature_view_name = %s"))
+ params.append(feature_view_name)
+ if feature_name:
+ conditions.append(sql.SQL("feature_name = %s"))
+ params.append(feature_name)
+ if data_source_type:
+ conditions.append(sql.SQL("data_source_type = %s"))
+ params.append(data_source_type)
+
+ conditions.append(sql.SQL("is_baseline = TRUE"))
+
+ query = sql.SQL("UPDATE {} SET is_baseline = FALSE WHERE {}").format(
+ sql.Identifier(MON_TABLE_FEATURE),
+ sql.SQL(" AND ").join(conditions),
+ )
+
+ with _get_conn(config.offline_store) as conn, conn.cursor() as cur:
+ cur.execute(query, params)
+ conn.commit()
+
class PostgreSQLRetrievalJob(RetrievalJob):
def __init__(
@@ -782,3 +1047,295 @@ def _get_entity_schema(
{% endfor %}
{% endif %}
"""
+
+
+# ------------------------------------------------------------------ #
+# Monitoring SQL push-down helpers
+# ------------------------------------------------------------------ #
+
+
+def _sql_numeric_stats(
+ conn,
+ from_expression: str,
+ feature_names: List[str],
+ ts_filter: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ """Batch-compute numeric statistics via one SQL query, then histograms."""
+ # 11 aggregate columns per feature (non_null, mean..p99) + 1 row_count
+ select_parts = ["COUNT(*)"]
+ for col in feature_names:
+ q = f'"{col}"'
+ c = f"{q}::float8"
+ select_parts.extend(
+ [
+ f"COUNT({q})",
+ f"AVG({c})",
+ f"STDDEV_SAMP({c})",
+ f"MIN({c})",
+ f"MAX({c})",
+ f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})",
+ ]
+ )
+
+ query = (
+ f"SELECT {', '.join(select_parts)} "
+ f"FROM {from_expression} AS _src WHERE {ts_filter}"
+ )
+
+ with conn.cursor() as cur:
+ cur.execute(query)
+ row = cur.fetchone()
+
+ if row is None:
+ return [empty_numeric_metric(n) for n in feature_names]
+
+ row_count = row[0]
+ results: List[Dict[str, Any]] = []
+
+ for i, col in enumerate(feature_names):
+ base = 1 + i * 10
+ non_null = row[base] or 0
+ null_count = row_count - non_null
+
+ min_val = opt_float(row[base + 3])
+ max_val = opt_float(row[base + 4])
+
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(row[base + 1]),
+ "stddev": opt_float(row[base + 2]),
+ "min_val": min_val,
+ "max_val": max_val,
+ "p50": opt_float(row[base + 5]),
+ "p75": opt_float(row[base + 6]),
+ "p90": opt_float(row[base + 7]),
+ "p95": opt_float(row[base + 8]),
+ "p99": opt_float(row[base + 9]),
+ "histogram": None,
+ }
+
+ if min_val is not None and max_val is not None and non_null > 0:
+ result["histogram"] = _sql_numeric_histogram(
+ conn,
+ from_expression,
+ col,
+ ts_filter,
+ histogram_bins,
+ min_val,
+ max_val,
+ )
+
+ results.append(result)
+
+ return results
+
+
+def _sql_numeric_histogram(
+ conn,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = f'"{col_name}"'
+
+ if min_val == max_val:
+ with conn.cursor() as cur:
+ cur.execute(
+ f"SELECT COUNT(*) FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_filter}"
+ )
+ cnt = (cur.fetchone() or (0,))[0]
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ upper = max_val + (max_val - min_val) * 1e-10
+ bin_width = (max_val - min_val) / bins
+
+ query = (
+ f"SELECT width_bucket({q_col}::float8, {min_val}, {upper}, {bins}) AS bucket, "
+ f"COUNT(*) AS cnt "
+ f"FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_filter} "
+ f"GROUP BY bucket ORDER BY bucket"
+ )
+
+ with conn.cursor() as cur:
+ cur.execute(query)
+ rows = cur.fetchall()
+
+ counts = [0] * bins
+ for bucket, cnt in rows:
+ if 1 <= bucket <= bins:
+ counts[bucket - 1] = cnt
+
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _sql_categorical_stats(
+ conn,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = f'"{col_name}"'
+
+ query = (
+ f"WITH filtered AS ("
+ f" SELECT * FROM {from_expression} AS _src WHERE {ts_filter}"
+ f") "
+ f"SELECT "
+ f" (SELECT COUNT(*) FROM filtered) AS row_count, "
+ f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, "
+ f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered "
+ f" WHERE {q_col} IS NOT NULL) AS unique_count, "
+ f" {q_col}::text AS value, COUNT(*) AS cnt "
+ f"FROM filtered WHERE {q_col} IS NOT NULL "
+ f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}"
+ )
+
+ with conn.cursor() as cur:
+ cur.execute(query)
+ rows = cur.fetchall()
+
+ if not rows:
+ return empty_categorical_metric(col_name)
+
+ row_count = rows[0][0]
+ null_count = rows[0][1]
+ unique_count = rows[0][2]
+
+ top_entries = [{"value": r[3], "count": r[4]} for r in rows]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
+
+# ------------------------------------------------------------------ #
+# Monitoring metrics storage helpers
+# ------------------------------------------------------------------ #
+
+
+def _mon_upsert(
+ pg_config: PostgreSQLConfig,
+ table: str,
+ columns: List[str],
+ pk_columns: List[str],
+ rows: List[Dict[str, Any]],
+) -> None:
+ import json as _json
+
+ non_pk = [c for c in columns if c not in pk_columns]
+ col_ids = sql.SQL(", ").join(sql.Identifier(c) for c in columns)
+ placeholders = sql.SQL(", ").join(sql.Placeholder() for _ in columns)
+ update_clause = sql.SQL(", ").join(
+ sql.SQL("{} = EXCLUDED.{}").format(sql.Identifier(c), sql.Identifier(c))
+ for c in non_pk
+ )
+ pk_ids = sql.SQL(", ").join(sql.Identifier(c) for c in pk_columns)
+
+ query = sql.SQL(
+ "INSERT INTO {} ({}) VALUES ({}) ON CONFLICT ({}) DO UPDATE SET {}"
+ ).format(sql.Identifier(table), col_ids, placeholders, pk_ids, update_clause)
+
+ with _get_conn(pg_config) as conn, conn.cursor() as cur:
+ for row in rows:
+ values = []
+ for col in columns:
+ val = row.get(col)
+ if col == "histogram" and val is not None:
+ val = _json.dumps(val)
+ values.append(val)
+ cur.execute(query, values)
+ conn.commit()
+
+
+def _mon_query(
+ pg_config: PostgreSQLConfig,
+ metric_type: str,
+ columns: List[str],
+ project: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional["date"] = None,
+ end_date: Optional["date"] = None,
+) -> List[Dict[str, Any]]:
+ table, _, _ = monitoring_table_meta(metric_type)
+
+ conditions: list = []
+ params: list = []
+
+ if project:
+ conditions.append(sql.SQL("project_id = %s"))
+ params.append(project)
+
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ conditions.append(sql.SQL("{} = %s").format(sql.Identifier(key)))
+ params.append(value)
+
+ if start_date:
+ conditions.append(sql.SQL("metric_date >= %s"))
+ params.append(start_date)
+ if end_date:
+ conditions.append(sql.SQL("metric_date <= %s"))
+ params.append(end_date)
+
+ col_ids = sql.SQL(", ").join(sql.Identifier(c) for c in columns)
+ where = sql.SQL(" AND ").join(conditions) if conditions else sql.SQL("TRUE")
+ order_col = "metric_date" if "metric_date" in columns else "job_id"
+ query = sql.SQL("SELECT {} FROM {} WHERE {} ORDER BY {} ASC").format(
+ col_ids,
+ sql.Identifier(table),
+ where,
+ sql.Identifier(order_col),
+ )
+
+ with _get_conn(pg_config) as conn, conn.cursor() as cur:
+ conn.read_only = True
+ cur.execute(query, params)
+ rows = cur.fetchall()
+
+ results = []
+ for row in rows:
+ record = dict(zip(columns, row))
+ results.append(normalize_monitoring_row(record))
+
+ return results
diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py
index 3fc675ea402..9ffcea1d3d0 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py
@@ -1,9 +1,11 @@
+import json
import os
import tempfile
import uuid
import warnings
from dataclasses import asdict, dataclass
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
+from datetime import time as dt_time
from typing import (
TYPE_CHECKING,
Any,
@@ -50,6 +52,17 @@
)
from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql
from feast.infra.registry.base_registry import BaseRegistry
+from feast.monitoring.monitoring_utils import (
+ MON_TABLE_FEATURE,
+ MON_TABLE_FEATURE_SERVICE,
+ MON_TABLE_FEATURE_VIEW,
+ MON_TABLE_JOB,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ monitoring_table_meta,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
from feast.type_map import spark_schema_to_np_dtypes
@@ -423,6 +436,494 @@ def pull_all_from_table_or_query(
config=config,
)
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, SparkOfflineStoreConfig)
+ assert isinstance(data_source, SparkSource)
+
+ spark_session = get_spark_session_or_start_new_with_repoconfig(
+ store_config=config.offline_store
+ )
+ from_expression = data_source.get_table_query_string()
+ ts_filter = get_timestamp_filter_sql(
+ start_date,
+ end_date,
+ timestamp_field,
+ tz=timezone.utc,
+ quote_fields=False,
+ )
+ ts_clause = ts_filter if ts_filter else "1=1"
+
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+
+ if numeric_features:
+ results.extend(
+ _spark_sql_numeric_stats(
+ spark_session,
+ from_expression,
+ numeric_features,
+ ts_clause,
+ histogram_bins,
+ )
+ )
+
+ for col_name in categorical_features:
+ results.append(
+ _spark_sql_categorical_stats(
+ spark_session,
+ from_expression,
+ col_name,
+ ts_clause,
+ top_n,
+ )
+ )
+
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, SparkOfflineStoreConfig)
+ assert isinstance(data_source, SparkSource)
+
+ spark_session = get_spark_session_or_start_new_with_repoconfig(
+ store_config=config.offline_store
+ )
+ from_expression = data_source.get_table_query_string()
+ q_ts = f"`{timestamp_field}`"
+ sql = f"SELECT MAX({q_ts}) AS max_ts FROM {from_expression} AS _src"
+ row = spark_session.sql(sql).collect()
+ if not row or row[0][0] is None:
+ return None
+ val = row[0][0]
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ if isinstance(val, date):
+ return datetime.combine(val, dt_time.min, tzinfo=timezone.utc)
+ return pandas.to_datetime(val, utc=True).to_pydatetime()
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, SparkOfflineStoreConfig)
+ spark_session = get_spark_session_or_start_new_with_repoconfig(
+ store_config=config.offline_store
+ )
+ for stmt in _SPARK_MONITORING_DDL_STATEMENTS:
+ spark_session.sql(stmt)
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, SparkOfflineStoreConfig)
+ table, columns, pk_columns = monitoring_table_meta(metric_type)
+ pdf_new = pd.DataFrame([{c: m.get(c) for c in columns} for m in metrics])
+ pdf_new = _spark_normalize_histogram_column(pdf_new)
+
+ spark_session = get_spark_session_or_start_new_with_repoconfig(
+ store_config=config.offline_store
+ )
+ if spark_session.catalog.tableExists(table):
+ pdf_old = spark_session.table(table).toPandas()
+ pdf_merged = _spark_pandas_upsert(pdf_old, pdf_new, pk_columns)
+ else:
+ pdf_merged = pdf_new
+
+ spark_session.createDataFrame(pdf_merged).write.mode("overwrite").saveAsTable(
+ table
+ )
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, SparkOfflineStoreConfig)
+ table, columns, _ = monitoring_table_meta(metric_type)
+ spark_session = get_spark_session_or_start_new_with_repoconfig(
+ store_config=config.offline_store
+ )
+ if not spark_session.catalog.tableExists(table):
+ return []
+
+ from pyspark.sql import functions as F
+
+ df = spark_session.table(table)
+ if project:
+ df = df.filter(F.col("project_id") == project)
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ df = df.filter(F.col(key) == value)
+ if start_date is not None and "metric_date" in df.columns:
+ df = df.filter(F.col("metric_date") >= F.lit(start_date))
+ if end_date is not None and "metric_date" in df.columns:
+ df = df.filter(F.col("metric_date") <= F.lit(end_date))
+
+ order_col = "metric_date" if "metric_date" in df.columns else "job_id"
+ rows = df.orderBy(order_col).collect()
+ return _spark_rows_to_metric_dicts(rows, columns)
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, SparkOfflineStoreConfig)
+ spark_session = get_spark_session_or_start_new_with_repoconfig(
+ store_config=config.offline_store
+ )
+ if not spark_session.catalog.tableExists(MON_TABLE_FEATURE):
+ return
+
+ pdf = spark_session.table(MON_TABLE_FEATURE).toPandas()
+ if pdf is None:
+ return
+ mask = (pdf["project_id"] == project) & (pdf["is_baseline"] == True) # noqa: E712
+ if feature_view_name is not None:
+ mask &= pdf["feature_view_name"] == feature_view_name
+ if feature_name is not None:
+ mask &= pdf["feature_name"] == feature_name
+ if data_source_type is not None:
+ mask &= pdf["data_source_type"] == data_source_type
+
+ pdf.loc[mask, "is_baseline"] = False
+ spark_session.createDataFrame(pdf).write.mode("overwrite").saveAsTable(
+ MON_TABLE_FEATURE
+ )
+
+
+_SPARK_MONITORING_DDL_STATEMENTS = [
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} (
+ project_id STRING NOT NULL,
+ feature_view_name STRING NOT NULL,
+ feature_name STRING NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity STRING NOT NULL,
+ data_source_type STRING NOT NULL,
+ computed_at TIMESTAMP NOT NULL,
+ is_baseline BOOLEAN NOT NULL,
+ feature_type STRING NOT NULL,
+ row_count BIGINT,
+ null_count BIGINT,
+ null_rate DOUBLE,
+ mean DOUBLE,
+ stddev DOUBLE,
+ min_val DOUBLE,
+ max_val DOUBLE,
+ p50 DOUBLE,
+ p75 DOUBLE,
+ p90 DOUBLE,
+ p95 DOUBLE,
+ p99 DOUBLE,
+ histogram STRING
+) USING PARQUET
+""",
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} (
+ project_id STRING NOT NULL,
+ feature_view_name STRING NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity STRING NOT NULL,
+ data_source_type STRING NOT NULL,
+ computed_at TIMESTAMP NOT NULL,
+ is_baseline BOOLEAN NOT NULL,
+ total_row_count BIGINT,
+ total_features INT,
+ features_with_nulls INT,
+ avg_null_rate DOUBLE,
+ max_null_rate DOUBLE
+) USING PARQUET
+""",
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} (
+ project_id STRING NOT NULL,
+ feature_service_name STRING NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity STRING NOT NULL,
+ data_source_type STRING NOT NULL,
+ computed_at TIMESTAMP NOT NULL,
+ is_baseline BOOLEAN NOT NULL,
+ total_feature_views INT,
+ total_features INT,
+ avg_null_rate DOUBLE,
+ max_null_rate DOUBLE
+) USING PARQUET
+""",
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_JOB} (
+ job_id STRING NOT NULL,
+ project_id STRING NOT NULL,
+ feature_view_name STRING,
+ job_type STRING NOT NULL,
+ status STRING NOT NULL,
+ parameters STRING,
+ metric_date DATE NOT NULL,
+ started_at TIMESTAMP,
+ completed_at TIMESTAMP,
+ error_message STRING,
+ result_summary STRING
+) USING PARQUET
+""",
+]
+
+
+def _spark_normalize_histogram_column(pdf: pd.DataFrame) -> pd.DataFrame:
+ if "histogram" not in pdf.columns:
+ return pdf
+ out = pdf.copy()
+
+ def _ser(x: Any) -> Any:
+ if x is None:
+ return None
+ if isinstance(x, str):
+ return x
+ return json.dumps(x)
+
+ out["histogram"] = out["histogram"].map(_ser)
+ return out
+
+
+def _spark_pandas_upsert(
+ pdf_old: pd.DataFrame,
+ pdf_new: pd.DataFrame,
+ pk_columns: List[str],
+) -> pd.DataFrame:
+ if pdf_old.empty:
+ return pdf_new
+ old_idx = pdf_old.set_index(pk_columns)
+ new_idx = pdf_new.set_index(pk_columns)
+ kept = old_idx.loc[~old_idx.index.isin(new_idx.index)]
+ kept_df = kept.reset_index()
+ return pd.concat([kept_df, pdf_new], ignore_index=True)
+
+
+def _spark_sql_numeric_stats(
+ spark_session: SparkSession,
+ from_expression: str,
+ feature_names: List[str],
+ ts_clause: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ select_parts = ["COUNT(*)"]
+ for col in feature_names:
+ q = f"`{col}`"
+ c = f"CAST({q} AS DOUBLE)"
+ select_parts.extend(
+ [
+ f"COUNT({q})",
+ f"AVG({c})",
+ f"STDDEV_SAMP({c})",
+ f"MIN({c})",
+ f"MAX({c})",
+ f"PERCENTILE_APPROX({c}, 0.50)",
+ f"PERCENTILE_APPROX({c}, 0.75)",
+ f"PERCENTILE_APPROX({c}, 0.90)",
+ f"PERCENTILE_APPROX({c}, 0.95)",
+ f"PERCENTILE_APPROX({c}, 0.99)",
+ ]
+ )
+
+ query = (
+ f"SELECT {', '.join(select_parts)} "
+ f"FROM {from_expression} AS _src WHERE {ts_clause}"
+ )
+ rows = spark_session.sql(query).collect()
+ if not rows or rows[0] is None:
+ return [empty_numeric_metric(n) for n in feature_names]
+
+ row = rows[0]
+ row_count = int(row[0] or 0)
+ results: List[Dict[str, Any]] = []
+
+ for i, col in enumerate(feature_names):
+ base = 1 + i * 10
+ non_null = int(row[base] or 0)
+ null_count = row_count - non_null
+
+ min_val = opt_float(row[base + 3])
+ max_val = opt_float(row[base + 4])
+
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(row[base + 1]),
+ "stddev": opt_float(row[base + 2]),
+ "min_val": min_val,
+ "max_val": max_val,
+ "p50": opt_float(row[base + 5]),
+ "p75": opt_float(row[base + 6]),
+ "p90": opt_float(row[base + 7]),
+ "p95": opt_float(row[base + 8]),
+ "p99": opt_float(row[base + 9]),
+ "histogram": None,
+ }
+
+ if min_val is not None and max_val is not None and non_null > 0:
+ result["histogram"] = _spark_sql_numeric_histogram(
+ spark_session,
+ from_expression,
+ col,
+ ts_clause,
+ histogram_bins,
+ min_val,
+ max_val,
+ )
+
+ results.append(result)
+
+ return results
+
+
+def _spark_sql_numeric_histogram(
+ spark_session: SparkSession,
+ from_expression: str,
+ col_name: str,
+ ts_clause: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = f"`{col_name}`"
+
+ if min_val == max_val:
+ sql = (
+ f"SELECT COUNT(*) FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_clause}"
+ )
+ cnt = int(spark_session.sql(sql).collect()[0][0] or 0)
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ bin_width = (max_val - min_val) / bins
+ cast_col = f"CAST({q_col} AS DOUBLE)"
+ inner = (
+ f"CASE WHEN {min_val} = {max_val} THEN 1 "
+ f"ELSE LEAST(GREATEST(FLOOR(({cast_col} - {min_val}) / {bin_width}) + 1, 1), {bins}) "
+ f"END AS bucket"
+ )
+
+ query = (
+ f"SELECT bucket, COUNT(*) AS cnt FROM ("
+ f" SELECT {inner} "
+ f" FROM {from_expression} AS _src "
+ f" WHERE {q_col} IS NOT NULL AND {ts_clause}"
+ f") AS _b WHERE bucket IS NOT NULL "
+ f"GROUP BY bucket ORDER BY bucket"
+ )
+ hrows = spark_session.sql(query).collect()
+ counts = [0] * bins
+ for hr in hrows:
+ bucket = int(hr[0] or 0)
+ cnt = int(hr[1] or 0)
+ if 1 <= bucket <= bins:
+ counts[bucket - 1] = cnt
+
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _spark_sql_categorical_stats(
+ spark_session: SparkSession,
+ from_expression: str,
+ col_name: str,
+ ts_clause: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = f"`{col_name}`"
+
+ query = (
+ f"WITH filtered AS ("
+ f" SELECT * FROM {from_expression} AS _src WHERE {ts_clause}"
+ f") "
+ f"SELECT "
+ f" (SELECT COUNT(*) FROM filtered) AS row_count, "
+ f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, "
+ f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered "
+ f" WHERE {q_col} IS NOT NULL) AS unique_count, "
+ f" CAST({q_col} AS STRING) AS value, COUNT(*) AS cnt "
+ f"FROM filtered WHERE {q_col} IS NOT NULL "
+ f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}"
+ )
+
+ rows = spark_session.sql(query).collect()
+ if not rows:
+ return empty_categorical_metric(col_name)
+
+ row_count = int(rows[0][0] or 0)
+ null_count = int(rows[0][1] or 0)
+ unique_count = int(rows[0][2] or 0)
+
+ top_entries = [{"value": r[3], "count": int(r[4] or 0)} for r in rows]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
+
+def _spark_rows_to_metric_dicts(
+ rows: List[Any],
+ columns: List[str],
+) -> List[Dict[str, Any]]:
+ out: List[Dict[str, Any]] = []
+ for r in rows:
+ d = r.asDict()
+ rec = {c: d.get(c) for c in columns}
+ out.append(normalize_monitoring_row(rec))
+ return out
+
class SparkRetrievalJob(RetrievalJob):
def __init__(
diff --git a/sdk/python/feast/infra/offline_stores/dask.py b/sdk/python/feast/infra/offline_stores/dask.py
index 3e640ce5af0..cc7b9dbe4df 100644
--- a/sdk/python/feast/infra/offline_stores/dask.py
+++ b/sdk/python/feast/infra/offline_stores/dask.py
@@ -1,15 +1,18 @@
+import json
import os
import uuid
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
import dask
import dask.dataframe as dd
+import numpy as np
import pandas as pd
import pyarrow
+import pyarrow.compute as pc
import pyarrow.dataset
-import pyarrow.parquet
+import pyarrow.parquet as pq
import pytz
from feast.data_source import DataSource
@@ -34,6 +37,18 @@
get_pyarrow_schema_from_batch_source,
)
from feast.infra.registry.base_registry import BaseRegistry
+from feast.monitoring.monitoring_utils import (
+ FEATURE_METRICS_COLUMNS,
+ FEATURE_METRICS_PK,
+ FEATURE_SERVICE_METRICS_COLUMNS,
+ FEATURE_SERVICE_METRICS_PK,
+ FEATURE_VIEW_METRICS_COLUMNS,
+ FEATURE_VIEW_METRICS_PK,
+ JOB_COLUMNS,
+ JOB_PK,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.on_demand_feature_view import OnDemandFeatureView
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
@@ -582,6 +597,398 @@ def offline_write_batch(
writer.write_table(new_table)
writer.close()
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, DaskOfflineStoreConfig)
+ assert isinstance(data_source, FileSource)
+
+ table = _dask_read_batch_arrow(data_source, config.repo_path)
+ table = _dask_filter_arrow_by_timestamp(
+ table, timestamp_field, start_date, end_date
+ )
+
+ results: List[Dict[str, Any]] = []
+ for name, ftype in feature_columns:
+ if name not in table.column_names:
+ continue
+ col = table[name]
+ if ftype == "numeric":
+ m = _dask_compute_numeric_metrics(col, histogram_bins)
+ elif ftype == "categorical":
+ m = _dask_compute_categorical_metrics(col, top_n)
+ else:
+ continue
+ m["feature_name"] = name
+ results.append(m)
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, DaskOfflineStoreConfig)
+ assert isinstance(data_source, FileSource)
+
+ absolute_path = FileSource.get_uri_for_file_path(
+ repo_path=config.repo_path,
+ uri=data_source.file_options.uri,
+ )
+ filesystem, path = FileSource.create_filesystem_and_path(
+ str(absolute_path), data_source.file_options.s3_endpoint_override
+ )
+ try:
+ t = pq.read_table(path, filesystem=filesystem, columns=[timestamp_field])
+ except Exception:
+ return None
+ if t.num_rows == 0:
+ return None
+ arr = t[timestamp_field]
+ mx = pc.max(arr) # type: ignore[attr-defined]
+ val = mx.as_py()
+ if val is None:
+ return None
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ if isinstance(val, date):
+ return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc)
+ return None
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, DaskOfflineStoreConfig)
+ base = os.path.join(_dask_monitoring_base(config), _DASK_MON_DIR)
+ os.makedirs(base, exist_ok=True)
+
+ tables = [
+ (_DASK_FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS),
+ (_DASK_VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS),
+ (_DASK_SERVICE_METRICS_FILE, FEATURE_SERVICE_METRICS_COLUMNS),
+ (_DASK_JOB_FILE, JOB_COLUMNS),
+ ]
+ for fname, columns in tables:
+ fpath = _dask_monitoring_path(config, fname)
+ if not os.path.isfile(fpath):
+ os.makedirs(os.path.dirname(fpath), exist_ok=True)
+ pd.DataFrame(columns=columns).to_parquet(fpath, index=False)
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, DaskOfflineStoreConfig)
+
+ fname, columns, pk = _dask_mon_table_meta(metric_type)
+ path = _dask_monitoring_path(config, fname)
+ _dask_parquet_upsert(path, columns, pk, metrics)
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, DaskOfflineStoreConfig)
+
+ fname, columns, _ = _dask_mon_table_meta(metric_type)
+ path = _dask_monitoring_path(config, fname)
+ return _dask_parquet_query(
+ path, columns, project, filters, start_date, end_date
+ )
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, DaskOfflineStoreConfig)
+
+ path = _dask_monitoring_path(config, _DASK_FEATURE_METRICS_FILE)
+ tab = _dask_read_parquet_if_exists(path)
+ if tab is None or tab.num_rows == 0:
+ return
+
+ df = tab.to_pandas()
+ mask = df["project_id"] == project
+ if feature_view_name is not None:
+ mask = mask & (df["feature_view_name"] == feature_view_name)
+ if feature_name is not None:
+ mask = mask & (df["feature_name"] == feature_name)
+ if data_source_type is not None:
+ mask = mask & (df["data_source_type"] == data_source_type)
+ mask = mask & (df["is_baseline"].isin([True, 1]))
+ df.loc[mask, "is_baseline"] = False
+ pq.write_table(pyarrow.Table.from_pandas(df, preserve_index=False), path)
+
+
+_DASK_MON_DIR = "feast_monitoring"
+_DASK_FEATURE_METRICS_FILE = "feature_metrics.parquet"
+_DASK_VIEW_METRICS_FILE = "feature_view_metrics.parquet"
+_DASK_SERVICE_METRICS_FILE = "feature_service_metrics.parquet"
+_DASK_JOB_FILE = "jobs.parquet"
+
+
+def _dask_monitoring_base(config: RepoConfig) -> str:
+ base = config.repo_path
+ return str(base) if base else "."
+
+
+def _dask_monitoring_path(config: RepoConfig, filename: str) -> str:
+ return os.path.join(_dask_monitoring_base(config), _DASK_MON_DIR, filename)
+
+
+def _dask_mon_table_meta(metric_type: str):
+ if metric_type == "feature":
+ return _DASK_FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK
+ if metric_type == "feature_view":
+ return (
+ _DASK_VIEW_METRICS_FILE,
+ FEATURE_VIEW_METRICS_COLUMNS,
+ FEATURE_VIEW_METRICS_PK,
+ )
+ if metric_type == "feature_service":
+ return (
+ _DASK_SERVICE_METRICS_FILE,
+ FEATURE_SERVICE_METRICS_COLUMNS,
+ FEATURE_SERVICE_METRICS_PK,
+ )
+ if metric_type == "job":
+ return _DASK_JOB_FILE, JOB_COLUMNS, JOB_PK
+ raise ValueError(f"Unknown metric_type '{metric_type}'")
+
+
+def _dask_read_parquet_if_exists(path: str) -> Optional[pyarrow.Table]:
+ if not os.path.isfile(path):
+ return None
+ return pq.read_table(path)
+
+
+def _dask_read_batch_arrow(
+ data_source: FileSource, repo_path: Optional[Path]
+) -> pyarrow.Table:
+ absolute_path = FileSource.get_uri_for_file_path(
+ repo_path=repo_path,
+ uri=data_source.file_options.uri,
+ )
+ filesystem, path = FileSource.create_filesystem_and_path(
+ str(absolute_path), data_source.file_options.s3_endpoint_override
+ )
+ return pq.read_table(path, filesystem=filesystem)
+
+
+def _dask_filter_arrow_by_timestamp(
+ table: pyarrow.Table,
+ timestamp_field: str,
+ start_date: Optional[datetime],
+ end_date: Optional[datetime],
+) -> pyarrow.Table:
+ if start_date is None and end_date is None:
+ return table
+ arr = table[timestamp_field]
+ mask = None
+ if start_date is not None:
+ mask = pc.greater_equal(arr, pyarrow.scalar(start_date)) # type: ignore[attr-defined]
+ if end_date is not None:
+ m2 = pc.less_equal(arr, pyarrow.scalar(end_date)) # type: ignore[attr-defined]
+ mask = m2 if mask is None else pc.and_(mask, m2) # type: ignore[attr-defined]
+ return table.filter(mask)
+
+
+def _dask_compute_numeric_metrics(
+ column: pyarrow.ChunkedArray, histogram_bins: int
+) -> Dict[str, Any]:
+ total = len(column)
+ null_count = column.null_count
+ result: Dict[str, Any] = {
+ "feature_type": "numeric",
+ "row_count": total,
+ "null_count": null_count,
+ "null_rate": null_count / total if total > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": None,
+ }
+
+ valid = pc.drop_null(column) # type: ignore[attr-defined]
+ if len(valid) == 0:
+ return result
+
+ float_array = pc.cast(valid, pyarrow.float64())
+ result["mean"] = opt_float(pc.mean(float_array).as_py()) # type: ignore[attr-defined]
+ result["stddev"] = opt_float(pc.stddev(float_array, ddof=1).as_py()) # type: ignore[attr-defined]
+
+ min_max = pc.min_max(float_array) # type: ignore[attr-defined]
+ result["min_val"] = min_max["min"].as_py()
+ result["max_val"] = min_max["max"].as_py()
+
+ quantiles = pc.quantile(float_array, q=[0.50, 0.75, 0.90, 0.95, 0.99]) # type: ignore[attr-defined]
+ q_values = quantiles.to_pylist()
+ result["p50"] = q_values[0]
+ result["p75"] = q_values[1]
+ result["p90"] = q_values[2]
+ result["p95"] = q_values[3]
+ result["p99"] = q_values[4]
+
+ np_array = float_array.to_numpy()
+ counts, bin_edges = np.histogram(np_array, bins=histogram_bins)
+ result["histogram"] = {
+ "bins": bin_edges.tolist(),
+ "counts": counts.tolist(),
+ "bin_width": float(bin_edges[1] - bin_edges[0]) if len(bin_edges) > 1 else 0,
+ }
+
+ return result
+
+
+def _dask_compute_categorical_metrics(
+ column: pyarrow.ChunkedArray, top_n: int
+) -> Dict[str, Any]:
+ total = len(column)
+ null_count = column.null_count
+ result: Dict[str, Any] = {
+ "feature_type": "categorical",
+ "row_count": total,
+ "null_count": null_count,
+ "null_rate": null_count / total if total > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": None,
+ }
+
+ valid = pc.drop_null(column) # type: ignore[attr-defined]
+ if len(valid) == 0:
+ return result
+
+ value_counts = pc.value_counts(valid) # type: ignore[attr-defined]
+ entries = [
+ {"value": vc["values"].as_py(), "count": vc["counts"].as_py()}
+ for vc in value_counts
+ ]
+ entries.sort(key=lambda x: x["count"], reverse=True)
+
+ unique_count = len(entries)
+ top_entries = entries[:top_n]
+ other_count = sum(e["count"] for e in entries[top_n:])
+
+ result["histogram"] = {
+ "values": top_entries,
+ "other_count": other_count,
+ "unique_count": unique_count,
+ }
+
+ return result
+
+
+def _dask_parquet_upsert(
+ path: str,
+ columns: List[str],
+ pk_cols: List[str],
+ new_rows: List[Dict[str, Any]],
+) -> None:
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+
+ prepared: List[Dict[str, Any]] = []
+ for row in new_rows:
+ r = dict(row)
+ if (
+ "histogram" in r
+ and r["histogram"] is not None
+ and not isinstance(r["histogram"], str)
+ ):
+ r["histogram"] = json.dumps(r["histogram"])
+ prepared.append(r)
+
+ new_df = pd.DataFrame(prepared, columns=columns)
+ existing = _dask_read_parquet_if_exists(path)
+ if existing is not None:
+ old_df = existing.to_pandas()
+ combined = pd.concat([old_df, new_df], ignore_index=True)
+ else:
+ combined = new_df
+
+ combined = combined.drop_duplicates(subset=pk_cols, keep="last")
+ table = pyarrow.Table.from_pandas(combined, preserve_index=False)
+ pq.write_table(table, path)
+
+
+def _dask_parquet_query(
+ path: str,
+ columns: List[str],
+ project: str,
+ filters: Optional[Dict[str, Any]],
+ start_date: Optional[date],
+ end_date: Optional[date],
+) -> List[Dict[str, Any]]:
+ tab = _dask_read_parquet_if_exists(path)
+ if tab is None or tab.num_rows == 0:
+ return []
+
+ df = tab.to_pandas()
+ if project:
+ df = df[df["project_id"] == project]
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ df = df[df[key] == value]
+ if "metric_date" in df.columns:
+ if start_date is not None:
+ df = df[df["metric_date"] >= start_date]
+ if end_date is not None:
+ df = df[df["metric_date"] <= end_date]
+ df = df.sort_values("metric_date", ascending=True)
+ else:
+ df = df.sort_values("job_id", ascending=True)
+
+ results = []
+ for _, row in df.iterrows():
+ record = {c: row.get(c) for c in columns}
+ normalize_monitoring_row(record)
+ for key in ("metric_date", "computed_at"):
+ val = record.get(key)
+ if (
+ val is not None
+ and not isinstance(val, str)
+ and hasattr(val, "isoformat")
+ ):
+ record[key] = val.isoformat()
+ results.append(record)
+
+ return results
+
def _get_entity_df_event_timestamp_range(
entity_df: Union[pd.DataFrame, str],
diff --git a/sdk/python/feast/infra/offline_stores/duckdb.py b/sdk/python/feast/infra/offline_stores/duckdb.py
index e0a69e53c56..7701e306ed3 100644
--- a/sdk/python/feast/infra/offline_stores/duckdb.py
+++ b/sdk/python/feast/infra/offline_stores/duckdb.py
@@ -1,11 +1,15 @@
+import json
import os
-from datetime import datetime
+from datetime import date, datetime, timezone
from pathlib import Path
-from typing import Any, Callable, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import duckdb
import ibis
import pandas as pd
import pyarrow
+import pyarrow as pa
+import pyarrow.parquet as pq
from ibis.expr.types import Table
from pydantic import StrictStr
@@ -23,7 +27,22 @@
write_logged_features_ibis,
)
from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob
+from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql
from feast.infra.registry.base_registry import BaseRegistry
+from feast.monitoring.monitoring_utils import (
+ FEATURE_METRICS_COLUMNS,
+ FEATURE_METRICS_PK,
+ FEATURE_SERVICE_METRICS_COLUMNS,
+ FEATURE_SERVICE_METRICS_PK,
+ FEATURE_VIEW_METRICS_COLUMNS,
+ FEATURE_VIEW_METRICS_PK,
+ JOB_COLUMNS,
+ JOB_PK,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.repo_config import FeastConfigBaseModel, RepoConfig
@@ -113,6 +132,336 @@ def _write_data_source(
)
+# ------------------------------------------------------------------ #
+# DuckDB monitoring (Parquet-backed)
+# ------------------------------------------------------------------ #
+
+MONITORING_DIR = "feast_monitoring"
+FEATURE_METRICS_FILE = "feature_metrics.parquet"
+VIEW_METRICS_FILE = "feature_view_metrics.parquet"
+SERVICE_METRICS_FILE = "feature_service_metrics.parquet"
+JOB_METRICS_FILE = "jobs.parquet"
+
+
+def _duckdb_monitoring_base(config: RepoConfig) -> str:
+ base = config.repo_path
+ return str(base) if base else "."
+
+
+def _duckdb_monitoring_path(config: RepoConfig, filename: str) -> str:
+ return os.path.join(_duckdb_monitoring_base(config), MONITORING_DIR, filename)
+
+
+def _duckdb_parquet_from_expression(config: RepoConfig, data_source: FileSource) -> str:
+ absolute_path = FileSource.get_uri_for_file_path(
+ repo_path=_duckdb_monitoring_base(config),
+ uri=data_source.file_options.uri,
+ )
+ return str(absolute_path).replace("'", "''")
+
+
+def _duckdb_quote_ident(name: str) -> str:
+ return f'"{name}"'
+
+
+def _duckdb_ts_where(ts_filter: str) -> str:
+ return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1"
+
+
+def _duckdb_numeric_stats(
+ conn: duckdb.DuckDBPyConnection,
+ from_expr: str,
+ feature_names: List[str],
+ ts_filter: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ select_parts = ["COUNT(*)"]
+ for col in feature_names:
+ q = _duckdb_quote_ident(col)
+ c = f"CAST({q} AS DOUBLE)"
+ select_parts.extend(
+ [
+ f"COUNT({q})",
+ f"AVG({c})",
+ f"STDDEV_SAMP({c})",
+ f"MIN({c})",
+ f"MAX({c})",
+ f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})",
+ ]
+ )
+
+ tw = _duckdb_ts_where(ts_filter)
+ query = f"SELECT {', '.join(select_parts)} FROM {from_expr} AS _src WHERE {tw}"
+ row = conn.execute(query).fetchone()
+
+ if row is None:
+ return [empty_numeric_metric(n) for n in feature_names]
+
+ row_count = row[0]
+ results: List[Dict[str, Any]] = []
+
+ for i, col in enumerate(feature_names):
+ base = 1 + i * 10
+ non_null = row[base] or 0
+ null_count = row_count - non_null
+
+ min_val = opt_float(row[base + 3])
+ max_val = opt_float(row[base + 4])
+
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(row[base + 1]),
+ "stddev": opt_float(row[base + 2]),
+ "min_val": min_val,
+ "max_val": max_val,
+ "p50": opt_float(row[base + 5]),
+ "p75": opt_float(row[base + 6]),
+ "p90": opt_float(row[base + 7]),
+ "p95": opt_float(row[base + 8]),
+ "p99": opt_float(row[base + 9]),
+ "histogram": None,
+ }
+
+ if min_val is not None and max_val is not None and non_null > 0:
+ result["histogram"] = _duckdb_numeric_histogram(
+ conn,
+ from_expr,
+ col,
+ ts_filter,
+ histogram_bins,
+ min_val,
+ max_val,
+ )
+
+ results.append(result)
+
+ return results
+
+
+def _duckdb_numeric_histogram(
+ conn: duckdb.DuckDBPyConnection,
+ from_expr: str,
+ col_name: str,
+ ts_filter: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = _duckdb_quote_ident(col_name)
+
+ tw = _duckdb_ts_where(ts_filter)
+ if min_val == max_val:
+ row = conn.execute(
+ f"SELECT COUNT(*) FROM {from_expr} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {tw}"
+ ).fetchone()
+ cnt = row[0] if row else 0
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ bin_width = (max_val - min_val) / bins
+
+ query = (
+ f"SELECT LEAST(FLOOR((CAST({q_col} AS DOUBLE) - {min_val}) / {bin_width}) + 1, {bins}) AS bucket, "
+ f"COUNT(*) AS cnt "
+ f"FROM {from_expr} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {tw} "
+ f"GROUP BY bucket ORDER BY bucket"
+ )
+ rows = conn.execute(query).fetchall()
+
+ counts = [0] * bins
+ for bucket, cnt in rows:
+ b = int(bucket)
+ if 1 <= b <= bins:
+ counts[b - 1] = cnt
+
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _duckdb_categorical_stats(
+ conn: duckdb.DuckDBPyConnection,
+ from_expr: str,
+ col_name: str,
+ ts_filter: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = _duckdb_quote_ident(col_name)
+
+ tw = _duckdb_ts_where(ts_filter)
+ query = (
+ f"WITH filtered AS ("
+ f" SELECT * FROM {from_expr} AS _src WHERE {tw}"
+ f") "
+ f"SELECT "
+ f" (SELECT COUNT(*) FROM filtered) AS row_count, "
+ f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, "
+ f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered "
+ f" WHERE {q_col} IS NOT NULL) AS unique_count, "
+ f" CAST({q_col} AS VARCHAR) AS value, COUNT(*) AS cnt "
+ f"FROM filtered WHERE {q_col} IS NOT NULL "
+ f"GROUP BY {q_col} "
+ f"ORDER BY cnt DESC LIMIT {int(top_n)}"
+ )
+
+ rows = conn.execute(query).fetchall()
+
+ if not rows:
+ return empty_categorical_metric(col_name)
+
+ row_count = rows[0][0]
+ null_count = rows[0][1]
+ unique_count = rows[0][2]
+
+ top_entries = [{"value": r[3], "count": r[4]} for r in rows]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
+
+def _duckdb_mon_table_meta(metric_type: str):
+ if metric_type == "feature":
+ return FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK
+ if metric_type == "feature_view":
+ return VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS, FEATURE_VIEW_METRICS_PK
+ if metric_type == "feature_service":
+ return (
+ SERVICE_METRICS_FILE,
+ FEATURE_SERVICE_METRICS_COLUMNS,
+ FEATURE_SERVICE_METRICS_PK,
+ )
+ if metric_type == "job":
+ return JOB_METRICS_FILE, JOB_COLUMNS, JOB_PK
+ raise ValueError(f"Unknown metric_type '{metric_type}'")
+
+
+def _duckdb_read_parquet_if_exists(path: str) -> Optional[pa.Table]:
+ if not os.path.isfile(path):
+ return None
+ return pq.read_table(path)
+
+
+def _duckdb_parquet_upsert(
+ path: str,
+ columns: List[str],
+ pk_cols: List[str],
+ new_rows: List[Dict[str, Any]],
+) -> None:
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+
+ prepared: List[Dict[str, Any]] = []
+ for row in new_rows:
+ r = dict(row)
+ if (
+ "histogram" in r
+ and r["histogram"] is not None
+ and not isinstance(r["histogram"], str)
+ ):
+ r["histogram"] = json.dumps(r["histogram"])
+ prepared.append(r)
+
+ new_df = pd.DataFrame(prepared, columns=columns)
+ existing = _duckdb_read_parquet_if_exists(path)
+ if existing is not None:
+ old_df = existing.to_pandas()
+ combined = pd.concat([old_df, new_df], ignore_index=True)
+ else:
+ combined = new_df
+
+ combined = combined.drop_duplicates(subset=pk_cols, keep="last")
+ table = pa.Table.from_pandas(combined, preserve_index=False)
+ pq.write_table(table, path)
+
+
+def _duckdb_parquet_query(
+ path: str,
+ columns: List[str],
+ project: str,
+ filters: Optional[Dict[str, Any]],
+ start_date: Optional[date],
+ end_date: Optional[date],
+) -> List[Dict[str, Any]]:
+ tab = _duckdb_read_parquet_if_exists(path)
+ if tab is None or tab.num_rows == 0:
+ return []
+
+ df = tab.to_pandas()
+ if project:
+ df = df[df["project_id"] == project]
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ df = df[df[key] == value]
+ if "metric_date" in df.columns:
+ if start_date is not None:
+ df = df[df["metric_date"] >= start_date]
+ if end_date is not None:
+ df = df[df["metric_date"] <= end_date]
+ df = df.sort_values("metric_date", ascending=True)
+ else:
+ df = df.sort_values("job_id", ascending=True)
+
+ results = []
+ for _, row in df.iterrows():
+ record = {c: row.get(c) for c in columns}
+ normalize_monitoring_row(record)
+ for key in ("metric_date", "computed_at"):
+ val = record.get(key)
+ if (
+ val is not None
+ and not isinstance(val, str)
+ and hasattr(val, "isoformat")
+ ):
+ record[key] = val.isoformat()
+ results.append(record)
+
+ return results
+
+
+def _duckdb_sql_from_expression(config: RepoConfig, data_source: FileSource) -> str:
+ p = _duckdb_parquet_from_expression(config, data_source)
+ if isinstance(data_source.file_format, ParquetFormat):
+ return f"read_parquet('{p}')"
+ if isinstance(data_source.file_format, DeltaFormat):
+ return f"delta_scan('{p}')"
+ raise NotImplementedError(
+ "DuckDB monitoring compute supports Parquet and Delta file sources only."
+ )
+
+
class DuckDBOfflineStoreConfig(FeastConfigBaseModel):
type: StrictStr = "duckdb"
# """ Offline store type selector"""
@@ -229,3 +578,157 @@ def write_logged_features(
logging_config=logging_config,
registry=registry,
)
+
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, DuckDBOfflineStoreConfig)
+ assert isinstance(data_source, FileSource)
+
+ from_expr = _duckdb_sql_from_expression(config, data_source)
+ ts_filter = get_timestamp_filter_sql(
+ start_date,
+ end_date,
+ timestamp_field,
+ tz=timezone.utc,
+ cast_style="timestamp",
+ date_time_separator=" ",
+ )
+
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+
+ conn = duckdb.connect()
+ if numeric_features:
+ results.extend(
+ _duckdb_numeric_stats(
+ conn,
+ from_expr,
+ numeric_features,
+ ts_filter,
+ histogram_bins,
+ )
+ )
+ for col_name in categorical_features:
+ results.append(
+ _duckdb_categorical_stats(
+ conn,
+ from_expr,
+ col_name,
+ ts_filter,
+ top_n,
+ )
+ )
+ conn.close()
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, DuckDBOfflineStoreConfig)
+ assert isinstance(data_source, FileSource)
+
+ from_expr = _duckdb_sql_from_expression(config, data_source)
+ ts_col = _duckdb_quote_ident(timestamp_field)
+ conn = duckdb.connect()
+ row = conn.execute(
+ f"SELECT MAX({ts_col}) AS m FROM {from_expr} AS _src"
+ ).fetchone()
+ conn.close()
+
+ if row is None or row[0] is None:
+ return None
+ val = row[0]
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ if isinstance(val, date):
+ return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc)
+ return None
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, DuckDBOfflineStoreConfig)
+ base = os.path.join(_duckdb_monitoring_base(config), MONITORING_DIR)
+ os.makedirs(base, exist_ok=True)
+
+ tables = [
+ (FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS),
+ (VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS),
+ (SERVICE_METRICS_FILE, FEATURE_SERVICE_METRICS_COLUMNS),
+ (JOB_METRICS_FILE, JOB_COLUMNS),
+ ]
+ for fname, columns in tables:
+ path = _duckdb_monitoring_path(config, fname)
+ if not os.path.isfile(path):
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ pd.DataFrame(columns=columns).to_parquet(path, index=False)
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, DuckDBOfflineStoreConfig)
+
+ fname, columns, pk = _duckdb_mon_table_meta(metric_type)
+ path = _duckdb_monitoring_path(config, fname)
+ _duckdb_parquet_upsert(path, columns, pk, metrics)
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, DuckDBOfflineStoreConfig)
+
+ fname, columns, _ = _duckdb_mon_table_meta(metric_type)
+ path = _duckdb_monitoring_path(config, fname)
+ return _duckdb_parquet_query(
+ path, columns, project, filters, start_date, end_date
+ )
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, DuckDBOfflineStoreConfig)
+
+ path = _duckdb_monitoring_path(config, FEATURE_METRICS_FILE)
+ tab = _duckdb_read_parquet_if_exists(path)
+ if tab is None or tab.num_rows == 0:
+ return
+
+ df = tab.to_pandas()
+ mask = df["project_id"] == project
+ if feature_view_name is not None:
+ mask = mask & (df["feature_view_name"] == feature_view_name)
+ if feature_name is not None:
+ mask = mask & (df["feature_name"] == feature_name)
+ if data_source_type is not None:
+ mask = mask & (df["data_source_type"] == data_source_type)
+ mask = mask & (df["is_baseline"].isin([True, 1]))
+ df.loc[mask, "is_baseline"] = False
+ pq.write_table(pa.Table.from_pandas(df, preserve_index=False), path)
diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py
index 4ae0c680c3b..816b8f454af 100644
--- a/sdk/python/feast/infra/offline_stores/offline_store.py
+++ b/sdk/python/feast/infra/offline_stores/offline_store.py
@@ -11,9 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
+import time
import warnings
from abc import ABC
-from datetime import datetime
+from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from typing import (
TYPE_CHECKING,
@@ -70,6 +72,21 @@ def __init__(
self.max_event_timestamp = max_event_timestamp
+def _extract_retrieval_metadata(job: "RetrievalJob") -> tuple:
+ """Return ``(feature_view_names, feature_count)`` from a RetrievalJob's metadata."""
+ try:
+ meta = job.metadata
+ if meta:
+ feature_count = len(meta.features)
+ feature_views = list(
+ {ref.split(":")[0] for ref in meta.features if ":" in ref}
+ )
+ return feature_views, feature_count
+ except (NotImplementedError, AttributeError):
+ pass
+ return [], 0
+
+
class RetrievalJob(ABC):
"""A RetrievalJob manages the execution of a query to retrieve data from the offline store."""
@@ -152,7 +169,51 @@ def to_arrow(
validation_reference (optional): The validation to apply against the retrieved dataframe.
timeout (optional): The query timeout if applicable.
"""
- features_table = self._to_arrow_internal(timeout=timeout)
+ start_wall = time.monotonic()
+ status_label = "success"
+ row_count = 0
+ try:
+ features_table = self._to_arrow_internal(timeout=timeout)
+ row_count = features_table.num_rows
+ except Exception:
+ status_label = "error"
+ raise
+ finally:
+ try:
+ from feast import metrics as feast_metrics
+
+ elapsed = time.monotonic() - start_wall
+
+ if feast_metrics._config.offline_features:
+ feast_metrics.offline_store_request_total.labels(
+ method="to_arrow", status=status_label
+ ).inc()
+ feast_metrics.offline_store_request_latency_seconds.labels(
+ method="to_arrow"
+ ).observe(elapsed)
+ feast_metrics.offline_store_row_count.labels(
+ method="to_arrow"
+ ).observe(row_count)
+
+ if feast_metrics._config.audit_logging:
+ feature_views, feature_count = _extract_retrieval_metadata(self)
+ end_dt = datetime.now(tz=timezone.utc)
+ start_dt = end_dt - timedelta(seconds=elapsed)
+ feast_metrics.emit_offline_audit_log(
+ method="to_arrow",
+ feature_views=feature_views,
+ feature_count=feature_count,
+ row_count=row_count,
+ status=status_label,
+ start_time=start_dt.isoformat(),
+ end_time=end_dt.isoformat(),
+ duration_ms=elapsed * 1000,
+ )
+ except Exception:
+ logging.getLogger(__name__).debug(
+ "Failed to record offline store metrics", exc_info=True
+ )
+
if self.on_demand_feature_views:
# Build a mapping of ODFV name to requested feature names
# This ensures we only return the features that were explicitly requested
@@ -559,3 +620,137 @@ def get_table_column_names_and_types_from_data_source(
data_source: DataSource object
"""
return data_source.get_table_column_names_and_types(config=config)
+
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ """
+ Compute monitoring metrics (stats, percentiles, histograms) directly
+ in the offline store using its native compute engine.
+
+ Backends that don't support this should leave it unimplemented;
+ the monitoring service will fall back to Python-based computation.
+
+ Args:
+ config: The config for the current feature store.
+ data_source: The data source to compute metrics from.
+ feature_columns: List of (feature_name, feature_type) where
+ feature_type is "numeric" or "categorical".
+ timestamp_field: Column used for time-range filtering.
+ start_date: Start of the time range.
+ end_date: End of the time range.
+ histogram_bins: Number of bins for numeric histograms.
+ top_n: Number of top values for categorical histograms.
+
+ Returns:
+ A list of metric dicts, one per feature, matching the format
+ produced by MetricsCalculator.compute_all.
+ """
+ raise NotImplementedError
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ """
+ Return the maximum event timestamp from the data source.
+
+ Used by the monitoring service to determine date ranges for
+ auto-compute. Backends that don't support this should leave it
+ unimplemented; the caller will fall back to a full-table scan.
+
+ Args:
+ config: The config for the current feature store.
+ data_source: The data source to query.
+ timestamp_field: The timestamp column name.
+
+ Returns:
+ The maximum timestamp, or None if no data exists.
+ """
+ raise NotImplementedError
+
+ # ------------------------------------------------------------------ #
+ # Monitoring metrics storage (native)
+ # ------------------------------------------------------------------ #
+
+ MONITORING_VALID_GRANULARITIES = (
+ "daily",
+ "weekly",
+ "biweekly",
+ "monthly",
+ "quarterly",
+ )
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ """Create the monitoring metrics tables if they do not exist.
+
+ Backends that don't support native monitoring storage should
+ leave this unimplemented; the monitoring service will raise an
+ error indicating the backend lacks storage support.
+ """
+ raise NotImplementedError
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ """Persist monitoring metrics (upsert semantics).
+
+ Args:
+ config: The config for the current feature store.
+ metric_type: One of "feature", "feature_view", "feature_service".
+ metrics: List of metric dicts to upsert.
+ """
+ raise NotImplementedError
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ """Read monitoring metrics with optional filtering.
+
+ Args:
+ config: The config for the current feature store.
+ project: Feast project name.
+ metric_type: One of "feature", "feature_view", "feature_service".
+ filters: Column-value pairs for WHERE clauses.
+ start_date: Inclusive lower bound on metric_date.
+ end_date: Inclusive upper bound on metric_date.
+
+ Returns:
+ List of metric dicts ordered by metric_date ascending.
+ """
+ raise NotImplementedError
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ """Set is_baseline=FALSE for matching feature metric rows.
+
+ Used to ensure only one baseline exists per feature before
+ writing a new baseline.
+ """
+ raise NotImplementedError
diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py
index 900dfcfab80..ec708ccf798 100644
--- a/sdk/python/feast/infra/offline_stores/redshift.py
+++ b/sdk/python/feast/infra/offline_stores/redshift.py
@@ -1,6 +1,7 @@
import contextlib
+import json
import uuid
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
from pathlib import Path
from typing import (
Any,
@@ -40,6 +41,17 @@
)
from feast.infra.registry.base_registry import BaseRegistry
from feast.infra.utils import aws_utils
+from feast.monitoring.monitoring_utils import (
+ MON_TABLE_FEATURE,
+ MON_TABLE_FEATURE_SERVICE,
+ MON_TABLE_FEATURE_VIEW,
+ MON_TABLE_JOB,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ monitoring_table_meta,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
@@ -378,6 +390,584 @@ def offline_write_batch(
fail_if_exists=False,
)
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, RedshiftOfflineStoreConfig)
+ assert isinstance(data_source, RedshiftSource)
+
+ from_expression = data_source.get_table_query_string()
+ ts_filter = get_timestamp_filter_sql(
+ start_date,
+ end_date,
+ timestamp_field,
+ tz=timezone.utc,
+ )
+ ts_clause = ts_filter if ts_filter else "1=1"
+
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+
+ if numeric_features:
+ results.extend(
+ _redshift_sql_numeric_stats(
+ config,
+ from_expression,
+ numeric_features,
+ ts_clause,
+ histogram_bins,
+ )
+ )
+
+ for col_name in categorical_features:
+ results.append(
+ _redshift_sql_categorical_stats(
+ config, from_expression, col_name, ts_clause, top_n
+ )
+ )
+
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, RedshiftOfflineStoreConfig)
+ assert isinstance(data_source, RedshiftSource)
+
+ from_expression = data_source.get_table_query_string()
+ q_ts = f'"{timestamp_field}"'
+ sql = f"SELECT MAX({q_ts}) AS max_ts FROM {from_expression} AS _src"
+ rows = _redshift_execute_fetch_rows(config, sql)
+ if not rows or not rows[0]:
+ return None
+ val = _redshift_field_value(rows[0][0])
+ if val is None:
+ return None
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ return parser.parse(str(val))
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, RedshiftOfflineStoreConfig)
+ for stmt in _REDSHIFT_MONITORING_DDL_STATEMENTS:
+ _redshift_execute_statement(config, stmt)
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, RedshiftOfflineStoreConfig)
+ table, columns, pk_columns = monitoring_table_meta(metric_type)
+ for row in metrics:
+ _redshift_merge_metric_row(config, table, columns, pk_columns, row)
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, RedshiftOfflineStoreConfig)
+ _, columns, _ = monitoring_table_meta(metric_type)
+ return _redshift_mon_query(
+ config, metric_type, columns, project, filters, start_date, end_date
+ )
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, RedshiftOfflineStoreConfig)
+ parts = [
+ f"project_id = {_redshift_sql_literal(project)}",
+ "is_baseline = TRUE",
+ ]
+ if feature_view_name is not None:
+ parts.append(
+ f"feature_view_name = {_redshift_sql_literal(feature_view_name)}"
+ )
+ if feature_name is not None:
+ parts.append(f"feature_name = {_redshift_sql_literal(feature_name)}")
+ if data_source_type is not None:
+ parts.append(
+ f"data_source_type = {_redshift_sql_literal(data_source_type)}"
+ )
+ where_sql = " AND ".join(parts)
+ sql = f"UPDATE {MON_TABLE_FEATURE} SET is_baseline = FALSE WHERE {where_sql}"
+ _redshift_execute_statement(config, sql)
+
+
+_REDSHIFT_MONITORING_DDL_STATEMENTS = [
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} (
+ project_id VARCHAR(255) NOT NULL,
+ feature_view_name VARCHAR(255) NOT NULL,
+ feature_name VARCHAR(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR(20) NOT NULL DEFAULT 'daily',
+ data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch',
+ computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ is_baseline BOOLEAN NOT NULL DEFAULT FALSE,
+ feature_type VARCHAR(50) NOT NULL,
+ row_count BIGINT,
+ null_count BIGINT,
+ null_rate DOUBLE PRECISION,
+ mean DOUBLE PRECISION,
+ stddev DOUBLE PRECISION,
+ min_val DOUBLE PRECISION,
+ max_val DOUBLE PRECISION,
+ p50 DOUBLE PRECISION,
+ p75 DOUBLE PRECISION,
+ p90 DOUBLE PRECISION,
+ p95 DOUBLE PRECISION,
+ p99 DOUBLE PRECISION,
+ histogram VARCHAR(65535),
+ PRIMARY KEY (project_id, feature_view_name, feature_name,
+ metric_date, granularity, data_source_type)
+);
+""",
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} (
+ project_id VARCHAR(255) NOT NULL,
+ feature_view_name VARCHAR(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR(20) NOT NULL DEFAULT 'daily',
+ data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch',
+ computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ is_baseline BOOLEAN NOT NULL DEFAULT FALSE,
+ total_row_count BIGINT,
+ total_features INTEGER,
+ features_with_nulls INTEGER,
+ avg_null_rate DOUBLE PRECISION,
+ max_null_rate DOUBLE PRECISION,
+ PRIMARY KEY (project_id, feature_view_name, metric_date,
+ granularity, data_source_type)
+);
+""",
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} (
+ project_id VARCHAR(255) NOT NULL,
+ feature_service_name VARCHAR(255) NOT NULL,
+ metric_date DATE NOT NULL,
+ granularity VARCHAR(20) NOT NULL DEFAULT 'daily',
+ data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch',
+ computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ is_baseline BOOLEAN NOT NULL DEFAULT FALSE,
+ total_feature_views INTEGER,
+ total_features INTEGER,
+ avg_null_rate DOUBLE PRECISION,
+ max_null_rate DOUBLE PRECISION,
+ PRIMARY KEY (project_id, feature_service_name, metric_date,
+ granularity, data_source_type)
+);
+""",
+ f"""
+CREATE TABLE IF NOT EXISTS {MON_TABLE_JOB} (
+ job_id VARCHAR(36) NOT NULL,
+ project_id VARCHAR(255) NOT NULL,
+ feature_view_name VARCHAR(255),
+ job_type VARCHAR(50) NOT NULL,
+ status VARCHAR(20) NOT NULL DEFAULT 'pending',
+ parameters VARCHAR(65535),
+ metric_date DATE NOT NULL,
+ started_at TIMESTAMPTZ,
+ completed_at TIMESTAMPTZ,
+ error_message VARCHAR(65535),
+ result_summary VARCHAR(65535),
+ PRIMARY KEY (job_id)
+);
+""",
+]
+
+
+def _redshift_execute_statement(config: RepoConfig, sql: str) -> str:
+ client = aws_utils.get_redshift_data_client(config.offline_store.region)
+ return aws_utils.execute_redshift_statement(
+ client,
+ config.offline_store.cluster_id,
+ config.offline_store.workgroup,
+ config.offline_store.database,
+ config.offline_store.user,
+ sql,
+ )
+
+
+def _redshift_get_statement_pages(
+ client: Any, statement_id: str
+) -> Tuple[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
+ column_metadata: List[Dict[str, Any]] = []
+ all_records: List[List[Dict[str, Any]]] = []
+ next_token: Optional[str] = None
+ while True:
+ kwargs: Dict[str, Any] = {"Id": statement_id}
+ if next_token:
+ kwargs["NextToken"] = next_token
+ resp = client.get_statement_result(**kwargs)
+ if not column_metadata:
+ column_metadata = resp.get("ColumnMetadata", [])
+ all_records.extend(resp.get("Records", []))
+ next_token = resp.get("NextToken")
+ if not next_token:
+ break
+ return column_metadata, all_records
+
+
+def _redshift_execute_fetch_rows(
+ config: RepoConfig, sql: str
+) -> List[List[Dict[str, Any]]]:
+ client = aws_utils.get_redshift_data_client(config.offline_store.region)
+ sid = aws_utils.execute_redshift_statement(
+ client,
+ config.offline_store.cluster_id,
+ config.offline_store.workgroup,
+ config.offline_store.database,
+ config.offline_store.user,
+ sql,
+ )
+ _, records = _redshift_get_statement_pages(client, sid)
+ return records
+
+
+def _redshift_field_value(field: Dict[str, Any]) -> Any:
+ if field.get("isNull"):
+ return None
+ if "stringValue" in field:
+ return field["stringValue"]
+ if "longValue" in field:
+ return field["longValue"]
+ if "doubleValue" in field:
+ return field["doubleValue"]
+ if "booleanValue" in field:
+ return field["booleanValue"]
+ return None
+
+
+def _redshift_sql_literal(val: Any) -> str:
+ if val is None:
+ return "NULL"
+ if isinstance(val, bool):
+ return "TRUE" if val else "FALSE"
+ if isinstance(val, (int, float)) and not isinstance(val, bool):
+ return str(val)
+ if isinstance(val, date) and not isinstance(val, datetime):
+ return f"DATE '{val.isoformat()}'"
+ if isinstance(val, datetime):
+ s = val.isoformat(sep=" ", timespec="seconds")
+ return f"TIMESTAMP '{s}'"
+ s = str(val).replace("'", "''")
+ return f"'{s}'"
+
+
+def _redshift_merge_metric_row(
+ config: RepoConfig,
+ table: str,
+ columns: List[str],
+ pk_columns: List[str],
+ row: Dict[str, Any],
+) -> None:
+ non_pk = [c for c in columns if c not in pk_columns]
+ client = aws_utils.get_redshift_data_client(config.offline_store.region)
+
+ select_parts = ", ".join(
+ f"{_redshift_sql_literal_for_column(c, row.get(c))} AS {c}" for c in columns
+ )
+ on_clause = " AND ".join(f"t.{c} = s.{c}" for c in pk_columns)
+ update_set = ", ".join(f"{c} = s.{c}" for c in non_pk)
+ insert_cols = ", ".join(columns)
+ insert_vals = ", ".join(f"s.{c}" for c in columns)
+
+ merge_sql = f"""
+MERGE INTO {table} AS t
+USING (SELECT {select_parts}) AS s
+ON {on_clause}
+WHEN MATCHED THEN UPDATE SET {update_set}
+WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})
+""".strip()
+ aws_utils.execute_redshift_statement(
+ client,
+ config.offline_store.cluster_id,
+ config.offline_store.workgroup,
+ config.offline_store.database,
+ config.offline_store.user,
+ merge_sql,
+ )
+
+
+def _redshift_sql_literal_for_column(column: str, val: Any) -> str:
+ if val is None:
+ return "NULL"
+ if column == "histogram" and val is not None:
+ dumped = json.dumps(val).replace("'", "''")
+ return f"'{dumped}'"
+ return _redshift_sql_literal(val)
+
+
+def _redshift_mon_query(
+ config: RepoConfig,
+ metric_type: str,
+ columns: List[str],
+ project: str,
+ filters: Optional[Dict[str, Any]],
+ start_date: Optional[date],
+ end_date: Optional[date],
+) -> List[Dict[str, Any]]:
+ table, _, _ = monitoring_table_meta(metric_type)
+ conditions: list = []
+ if project:
+ conditions.append(f"project_id = {_redshift_sql_literal(project)}")
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ conditions.append(f'"{key}" = {_redshift_sql_literal(value)}')
+ if start_date:
+ conditions.append(f"metric_date >= DATE '{start_date.isoformat()}'")
+ if end_date:
+ conditions.append(f"metric_date <= DATE '{end_date.isoformat()}'")
+ where_sql = " AND ".join(conditions) if conditions else "TRUE"
+ col_sql = ", ".join(f'"{c}"' for c in columns)
+ order_col = "metric_date" if "metric_date" in columns else "job_id"
+ sql = (
+ f'SELECT {col_sql} FROM "{table}" WHERE {where_sql} ORDER BY "{order_col}" ASC'
+ )
+
+ client = aws_utils.get_redshift_data_client(config.offline_store.region)
+ sid = aws_utils.execute_redshift_statement(
+ client,
+ config.offline_store.cluster_id,
+ config.offline_store.workgroup,
+ config.offline_store.database,
+ config.offline_store.user,
+ sql,
+ )
+ meta, rows = _redshift_get_statement_pages(client, sid)
+ col_names = [c["name"] for c in meta]
+ out: List[Dict[str, Any]] = []
+ for rec in rows:
+ record = {col_names[i]: _redshift_field_value(rec[i]) for i in range(len(rec))}
+ out.append(normalize_monitoring_row(record))
+ return out
+
+
+def _redshift_sql_numeric_stats(
+ config: RepoConfig,
+ from_expression: str,
+ feature_names: List[str],
+ ts_clause: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ select_parts = ["COUNT(*)"]
+ for col in feature_names:
+ q = f'"{col}"'
+ c = f"CAST({q} AS DOUBLE PRECISION)"
+ select_parts.extend(
+ [
+ f"COUNT({q})",
+ f"AVG({c})",
+ f"STDDEV_SAMP({c})",
+ f"MIN({c})",
+ f"MAX({c})",
+ f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})",
+ ]
+ )
+
+ query = (
+ f"SELECT {', '.join(select_parts)} "
+ f"FROM {from_expression} AS _src WHERE {ts_clause}"
+ )
+ rows = _redshift_execute_fetch_rows(config, query)
+ if not rows or not rows[0]:
+ return [empty_numeric_metric(n) for n in feature_names]
+
+ row = rows[0]
+ row_count = int(_redshift_field_value(row[0]) or 0)
+ results: List[Dict[str, Any]] = []
+
+ for i, col in enumerate(feature_names):
+ base = 1 + i * 10
+ non_null = int(_redshift_field_value(row[base]) or 0)
+ null_count = row_count - non_null
+
+ min_val = opt_float(_redshift_field_value(row[base + 3]))
+ max_val = opt_float(_redshift_field_value(row[base + 4]))
+
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(_redshift_field_value(row[base + 1])),
+ "stddev": opt_float(_redshift_field_value(row[base + 2])),
+ "min_val": min_val,
+ "max_val": max_val,
+ "p50": opt_float(_redshift_field_value(row[base + 5])),
+ "p75": opt_float(_redshift_field_value(row[base + 6])),
+ "p90": opt_float(_redshift_field_value(row[base + 7])),
+ "p95": opt_float(_redshift_field_value(row[base + 8])),
+ "p99": opt_float(_redshift_field_value(row[base + 9])),
+ "histogram": None,
+ }
+
+ if min_val is not None and max_val is not None and non_null > 0:
+ result["histogram"] = _redshift_sql_numeric_histogram(
+ config,
+ from_expression,
+ col,
+ ts_clause,
+ histogram_bins,
+ min_val,
+ max_val,
+ )
+
+ results.append(result)
+
+ return results
+
+
+def _redshift_sql_numeric_histogram(
+ config: RepoConfig,
+ from_expression: str,
+ col_name: str,
+ ts_clause: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = f'"{col_name}"'
+
+ if min_val == max_val:
+ sql = (
+ f"SELECT COUNT(*) FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_clause}"
+ )
+ r = _redshift_execute_fetch_rows(config, sql)
+ cnt = int(_redshift_field_value(r[0][0]) or 0) if r and r[0] else 0
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ bin_width = (max_val - min_val) / bins
+ cast_col = f"CAST({q_col} AS DOUBLE PRECISION)"
+
+ inner = (
+ f"CASE WHEN {min_val} = {max_val} THEN 1 "
+ f"ELSE LEAST(GREATEST(FLOOR(({cast_col} - {min_val}) / {bin_width}) + 1, 1), {bins}) "
+ f"END AS bucket"
+ )
+
+ query = (
+ f"SELECT bucket, COUNT(*) AS cnt FROM ("
+ f" SELECT {inner} "
+ f" FROM {from_expression} AS _src "
+ f" WHERE {q_col} IS NOT NULL AND {ts_clause}"
+ f") AS _b WHERE bucket IS NOT NULL "
+ f"GROUP BY bucket ORDER BY bucket"
+ )
+ hrows = _redshift_execute_fetch_rows(config, query)
+ counts = [0] * bins
+ for hr in hrows:
+ bucket = int(_redshift_field_value(hr[0]) or 0)
+ cnt = int(_redshift_field_value(hr[1]) or 0)
+ if 1 <= bucket <= bins:
+ counts[bucket - 1] = cnt
+
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _redshift_sql_categorical_stats(
+ config: RepoConfig,
+ from_expression: str,
+ col_name: str,
+ ts_clause: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = f'"{col_name}"'
+
+ query = (
+ f"WITH filtered AS ("
+ f" SELECT * FROM {from_expression} AS _src WHERE {ts_clause}"
+ f") "
+ f"SELECT "
+ f" (SELECT COUNT(*) FROM filtered) AS row_count, "
+ f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, "
+ f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered "
+ f" WHERE {q_col} IS NOT NULL) AS unique_count, "
+ f" CAST({q_col} AS VARCHAR(65535)) AS value, COUNT(*) AS cnt "
+ f"FROM filtered WHERE {q_col} IS NOT NULL "
+ f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}"
+ )
+
+ rows = _redshift_execute_fetch_rows(config, query)
+ if not rows:
+ return empty_categorical_metric(col_name)
+
+ row_count = int(_redshift_field_value(rows[0][0]) or 0)
+ null_count = int(_redshift_field_value(rows[0][1]) or 0)
+ unique_count = int(_redshift_field_value(rows[0][2]) or 0)
+
+ top_entries = [
+ {
+ "value": _redshift_field_value(r[3]),
+ "count": int(_redshift_field_value(r[4]) or 0),
+ }
+ for r in rows
+ ]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
class RedshiftRetrievalJob(RetrievalJob):
def __init__(
diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py
index 7226c908d13..a369d23bec2 100644
--- a/sdk/python/feast/infra/offline_stores/snowflake.py
+++ b/sdk/python/feast/infra/offline_stores/snowflake.py
@@ -3,7 +3,8 @@
import os
import uuid
import warnings
-from datetime import datetime, timezone
+from datetime import date, datetime, timezone
+from decimal import Decimal
from functools import reduce
from pathlib import Path
from typing import (
@@ -50,6 +51,17 @@
write_pandas,
write_parquet,
)
+from feast.monitoring.monitoring_utils import (
+ MON_TABLE_FEATURE,
+ MON_TABLE_FEATURE_SERVICE,
+ MON_TABLE_FEATURE_VIEW,
+ MON_TABLE_JOB,
+ empty_categorical_metric,
+ empty_numeric_metric,
+ monitoring_table_meta,
+ normalize_monitoring_row,
+ opt_float,
+)
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
from feast.types import (
@@ -421,6 +433,249 @@ def offline_write_batch(
auto_create_table=True,
)
+ @staticmethod
+ def compute_monitoring_metrics(
+ config: RepoConfig,
+ data_source: DataSource,
+ feature_columns: List[Tuple[str, str]],
+ timestamp_field: str,
+ start_date: Optional[datetime] = None,
+ end_date: Optional[datetime] = None,
+ histogram_bins: int = 20,
+ top_n: int = 10,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig)
+ assert isinstance(data_source, SnowflakeSource)
+
+ from_expression = data_source.get_table_query_string()
+ from_expression = _qualify_snowflake_from_expression(
+ config, data_source, from_expression
+ )
+ ts_filter = get_timestamp_filter_sql(
+ start_date, end_date, timestamp_field, tz=timezone.utc
+ )
+
+ numeric_features = [n for n, t in feature_columns if t == "numeric"]
+ categorical_features = [n for n, t in feature_columns if t == "categorical"]
+ results: List[Dict[str, Any]] = []
+
+ with GetSnowflakeConnection(config.offline_store) as conn:
+ if numeric_features:
+ results.extend(
+ _snowflake_sql_numeric_stats(
+ conn,
+ from_expression,
+ numeric_features,
+ ts_filter,
+ histogram_bins,
+ )
+ )
+
+ for col_name in categorical_features:
+ results.append(
+ _snowflake_sql_categorical_stats(
+ conn, from_expression, col_name, ts_filter, top_n
+ )
+ )
+
+ return results
+
+ @staticmethod
+ def get_monitoring_max_timestamp(
+ config: RepoConfig,
+ data_source: DataSource,
+ timestamp_field: str,
+ ) -> Optional[datetime]:
+ assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig)
+ assert isinstance(data_source, SnowflakeSource)
+
+ from_expression = data_source.get_table_query_string()
+ from_expression = _qualify_snowflake_from_expression(
+ config, data_source, from_expression
+ )
+
+ with GetSnowflakeConnection(config.offline_store) as conn:
+ cursor = execute_snowflake_statement(
+ conn,
+ f'SELECT MAX("{timestamp_field}") FROM {from_expression} AS _src',
+ )
+ row = cursor.fetchone()
+
+ if row is None or row[0] is None:
+ return None
+ val = row[0]
+ if isinstance(val, pd.Timestamp):
+ val = val.to_pydatetime()
+ if isinstance(val, datetime):
+ return val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc)
+
+ @staticmethod
+ def ensure_monitoring_tables(config: RepoConfig) -> None:
+ assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig)
+
+ fq_feature = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE)
+ fq_view = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE_VIEW)
+ fq_service = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE_SERVICE)
+
+ ddl_feature = f"""
+ CREATE TABLE IF NOT EXISTS {fq_feature} (
+ "project_id" VARCHAR(255) NOT NULL,
+ "feature_view_name" VARCHAR(255) NOT NULL,
+ "feature_name" VARCHAR(255) NOT NULL,
+ "metric_date" DATE NOT NULL,
+ "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily',
+ "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch',
+ "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(),
+ "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE,
+ "feature_type" VARCHAR(50) NOT NULL,
+ "row_count" BIGINT,
+ "null_count" BIGINT,
+ "null_rate" DOUBLE,
+ "mean" DOUBLE,
+ "stddev" DOUBLE,
+ "min_val" DOUBLE,
+ "max_val" DOUBLE,
+ "p50" DOUBLE,
+ "p75" DOUBLE,
+ "p90" DOUBLE,
+ "p95" DOUBLE,
+ "p99" DOUBLE,
+ "histogram" VARIANT,
+ PRIMARY KEY ("project_id", "feature_view_name", "feature_name",
+ "metric_date", "granularity", "data_source_type")
+ )
+ """
+ ddl_view = f"""
+ CREATE TABLE IF NOT EXISTS {fq_view} (
+ "project_id" VARCHAR(255) NOT NULL,
+ "feature_view_name" VARCHAR(255) NOT NULL,
+ "metric_date" DATE NOT NULL,
+ "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily',
+ "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch',
+ "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(),
+ "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE,
+ "total_row_count" BIGINT,
+ "total_features" INTEGER,
+ "features_with_nulls" INTEGER,
+ "avg_null_rate" DOUBLE,
+ "max_null_rate" DOUBLE,
+ PRIMARY KEY ("project_id", "feature_view_name", "metric_date",
+ "granularity", "data_source_type")
+ )
+ """
+ ddl_service = f"""
+ CREATE TABLE IF NOT EXISTS {fq_service} (
+ "project_id" VARCHAR(255) NOT NULL,
+ "feature_service_name" VARCHAR(255) NOT NULL,
+ "metric_date" DATE NOT NULL,
+ "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily',
+ "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch',
+ "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(),
+ "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE,
+ "total_feature_views" INTEGER,
+ "total_features" INTEGER,
+ "avg_null_rate" DOUBLE,
+ "max_null_rate" DOUBLE,
+ PRIMARY KEY ("project_id", "feature_service_name", "metric_date",
+ "granularity", "data_source_type")
+ )
+ """
+
+ fq_job = _snowflake_monitoring_table_fqn(config, MON_TABLE_JOB)
+ ddl_job = f"""
+ CREATE TABLE IF NOT EXISTS {fq_job} (
+ "job_id" VARCHAR(36) NOT NULL,
+ "project_id" VARCHAR(255) NOT NULL,
+ "feature_view_name" VARCHAR(255),
+ "job_type" VARCHAR(50) NOT NULL,
+ "status" VARCHAR(20) NOT NULL DEFAULT 'pending',
+ "parameters" VARCHAR,
+ "metric_date" DATE NOT NULL,
+ "started_at" TIMESTAMP_TZ,
+ "completed_at" TIMESTAMP_TZ,
+ "error_message" VARCHAR,
+ "result_summary" VARCHAR,
+ PRIMARY KEY ("job_id")
+ )
+ """
+
+ with GetSnowflakeConnection(config.offline_store) as conn:
+ execute_snowflake_statement(conn, ddl_feature)
+ execute_snowflake_statement(conn, ddl_view)
+ execute_snowflake_statement(conn, ddl_service)
+ execute_snowflake_statement(conn, ddl_job)
+
+ @staticmethod
+ def save_monitoring_metrics(
+ config: RepoConfig,
+ metric_type: str,
+ metrics: List[Dict[str, Any]],
+ ) -> None:
+ if not metrics:
+ return
+ assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig)
+
+ table, columns, pk_columns = monitoring_table_meta(metric_type)
+ _snowflake_mon_merge_upsert(
+ config.offline_store, table, columns, pk_columns, metrics
+ )
+
+ @staticmethod
+ def query_monitoring_metrics(
+ config: RepoConfig,
+ project: str,
+ metric_type: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig)
+
+ _, columns, _ = monitoring_table_meta(metric_type)
+ return _snowflake_mon_query(
+ config.offline_store,
+ metric_type,
+ columns,
+ project,
+ filters,
+ start_date,
+ end_date,
+ )
+
+ @staticmethod
+ def clear_monitoring_baseline(
+ config: RepoConfig,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> None:
+ assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig)
+
+ fq_table = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE)
+ conditions = [f'"project_id" = {_snowflake_sql_literal(project)}']
+ if feature_view_name:
+ conditions.append(
+ f'"feature_view_name" = {_snowflake_sql_literal(feature_view_name)}'
+ )
+ if feature_name:
+ conditions.append(
+ f'"feature_name" = {_snowflake_sql_literal(feature_name)}'
+ )
+ if data_source_type:
+ conditions.append(
+ f'"data_source_type" = {_snowflake_sql_literal(data_source_type)}'
+ )
+ conditions.append('"is_baseline" = TRUE')
+
+ sql = f'UPDATE {fq_table} SET "is_baseline" = FALSE WHERE ' + " AND ".join(
+ conditions
+ )
+
+ with GetSnowflakeConnection(config.offline_store) as conn:
+ execute_snowflake_statement(conn, sql)
+
class SnowflakeRetrievalJob(RetrievalJob):
def __init__(
@@ -640,6 +895,335 @@ def _get_file_names_from_copy_into(self, cursor, native_export_path) -> List[str
]
+# ------------------------------------------------------------------ #
+# Snowflake monitoring SQL push-down & storage helpers
+# ------------------------------------------------------------------ #
+
+
+def _escape_snowflake_sql_string(value: str) -> str:
+ return value.replace("'", "''")
+
+
+def _snowflake_sql_literal(val: Any) -> str:
+ if val is None:
+ return "NULL"
+ if isinstance(val, bool):
+ return "TRUE" if val else "FALSE"
+ if isinstance(val, (int, float)) and not isinstance(val, bool):
+ if isinstance(val, float) and (np.isnan(val) or np.isinf(val)):
+ return "NULL"
+ return str(val)
+ if isinstance(val, Decimal):
+ return str(val)
+ if isinstance(val, date) and not isinstance(val, datetime):
+ return f"DATE '{val.isoformat()}'"
+ if isinstance(val, datetime):
+ dt = val if val.tzinfo else val.replace(tzinfo=timezone.utc)
+ return f"TIMESTAMP_TZ '{dt.isoformat()}'"
+ if isinstance(val, str):
+ return f"'{_escape_snowflake_sql_string(val)}'"
+ return f"'{_escape_snowflake_sql_string(str(val))}'"
+
+
+def _qualify_snowflake_from_expression(
+ config: RepoConfig,
+ data_source: SnowflakeSource,
+ from_expression: str,
+) -> str:
+ if not data_source.database and not data_source.schema and data_source.table:
+ return (
+ f'"{config.offline_store.database}"."{config.offline_store.schema_}".'
+ f"{from_expression}"
+ )
+ if not data_source.database and data_source.schema and data_source.table:
+ return f'"{config.offline_store.database}".{from_expression}'
+ return from_expression
+
+
+def _snowflake_monitoring_table_fqn(
+ config: RepoConfig,
+ table_name: str,
+) -> str:
+ os = config.offline_store
+ assert isinstance(os, SnowflakeOfflineStoreConfig)
+ return f'"{os.database}"."{os.schema_}"."{table_name}"'
+
+
+def _snowflake_sql_numeric_histogram(
+ conn: SnowflakeConnection,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ bins: int,
+ min_val: float,
+ max_val: float,
+) -> Dict[str, Any]:
+ q_col = f'"{col_name}"'
+
+ if min_val == max_val:
+ cursor = execute_snowflake_statement(
+ conn,
+ f"SELECT COUNT(*) FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_filter}",
+ )
+ row = cursor.fetchone()
+ cnt = (row or (0,))[0]
+ return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0}
+
+ upper = max_val + (max_val - min_val) * 1e-10
+ bin_width = (max_val - min_val) / bins
+
+ query = (
+ f"SELECT WIDTH_BUCKET(CAST({q_col} AS DOUBLE), {min_val}, {upper}, {bins}) "
+ f"AS bucket, COUNT(*) AS cnt "
+ f"FROM {from_expression} AS _src "
+ f"WHERE {q_col} IS NOT NULL AND {ts_filter} "
+ f"GROUP BY bucket ORDER BY bucket"
+ )
+
+ cursor = execute_snowflake_statement(conn, query)
+ rows = cursor.fetchall()
+
+ counts = [0] * bins
+ for bucket, cnt in rows:
+ if bucket is not None and 1 <= int(bucket) <= bins:
+ counts[int(bucket) - 1] = cnt
+
+ bin_edges = [min_val + i * bin_width for i in range(bins + 1)]
+ return {
+ "bins": [float(b) for b in bin_edges],
+ "counts": counts,
+ "bin_width": float(bin_width),
+ }
+
+
+def _snowflake_sql_numeric_stats(
+ conn: SnowflakeConnection,
+ from_expression: str,
+ feature_names: List[str],
+ ts_filter: str,
+ histogram_bins: int,
+) -> List[Dict[str, Any]]:
+ select_parts = ["COUNT(*)"]
+ for col in feature_names:
+ q = f'"{col}"'
+ c = f"CAST({q} AS DOUBLE)"
+ select_parts.extend(
+ [
+ f"COUNT({q})",
+ f"AVG({c})",
+ f"STDDEV_SAMP({c})",
+ f"MIN({c})",
+ f"MAX({c})",
+ f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})",
+ f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})",
+ ]
+ )
+
+ query = (
+ f"SELECT {', '.join(select_parts)} "
+ f"FROM {from_expression} AS _src WHERE {ts_filter}"
+ )
+
+ cursor = execute_snowflake_statement(conn, query)
+ row = cursor.fetchone()
+
+ if row is None:
+ return [empty_numeric_metric(n) for n in feature_names]
+
+ row_count = row[0]
+ results: List[Dict[str, Any]] = []
+
+ for i, col in enumerate(feature_names):
+ base = 1 + i * 10
+ non_null = row[base] or 0
+ null_count = row_count - non_null
+
+ min_val = opt_float(row[base + 3])
+ max_val = opt_float(row[base + 4])
+
+ result: Dict[str, Any] = {
+ "feature_name": col,
+ "feature_type": "numeric",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": opt_float(row[base + 1]),
+ "stddev": opt_float(row[base + 2]),
+ "min_val": min_val,
+ "max_val": max_val,
+ "p50": opt_float(row[base + 5]),
+ "p75": opt_float(row[base + 6]),
+ "p90": opt_float(row[base + 7]),
+ "p95": opt_float(row[base + 8]),
+ "p99": opt_float(row[base + 9]),
+ "histogram": None,
+ }
+
+ if min_val is not None and max_val is not None and non_null > 0:
+ result["histogram"] = _snowflake_sql_numeric_histogram(
+ conn,
+ from_expression,
+ col,
+ ts_filter,
+ histogram_bins,
+ min_val,
+ max_val,
+ )
+
+ results.append(result)
+
+ return results
+
+
+def _snowflake_sql_categorical_stats(
+ conn: SnowflakeConnection,
+ from_expression: str,
+ col_name: str,
+ ts_filter: str,
+ top_n: int,
+) -> Dict[str, Any]:
+ q_col = f'"{col_name}"'
+
+ query = (
+ f"WITH filtered AS ("
+ f" SELECT * FROM {from_expression} AS _src WHERE {ts_filter}"
+ f") "
+ f"SELECT "
+ f" (SELECT COUNT(*) FROM filtered) AS row_count, "
+ f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, "
+ f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered "
+ f" WHERE {q_col} IS NOT NULL) AS unique_count, "
+ f" TO_VARCHAR({q_col}) AS value, COUNT(*) AS cnt "
+ f"FROM filtered WHERE {q_col} IS NOT NULL "
+ f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}"
+ )
+
+ cursor = execute_snowflake_statement(conn, query)
+ rows = cursor.fetchall()
+
+ if not rows:
+ return empty_categorical_metric(col_name)
+
+ row_count = rows[0][0]
+ null_count = rows[0][1]
+ unique_count = rows[0][2]
+
+ top_entries = [{"value": r[3], "count": r[4]} for r in rows]
+ top_total = sum(e["count"] for e in top_entries)
+ other_count = (row_count - null_count) - top_total
+
+ return {
+ "feature_name": col_name,
+ "feature_type": "categorical",
+ "row_count": row_count,
+ "null_count": null_count,
+ "null_rate": null_count / row_count if row_count > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": {
+ "values": top_entries,
+ "other_count": max(other_count, 0),
+ "unique_count": unique_count,
+ },
+ }
+
+
+def _snowflake_mon_merge_upsert(
+ offline_store: SnowflakeOfflineStoreConfig,
+ table: str,
+ columns: List[str],
+ pk_columns: List[str],
+ rows: List[Dict[str, Any]],
+) -> None:
+ fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"'
+ non_pk = [c for c in columns if c not in pk_columns]
+
+ with GetSnowflakeConnection(offline_store) as conn:
+ for row in rows:
+ select_parts: List[str] = []
+ for col in columns:
+ val = row.get(col)
+ if col == "histogram":
+ if val is not None:
+ json_str = json.dumps(val)
+ select_parts.append(
+ f'PARSE_JSON({_snowflake_sql_literal(json_str)}) AS "{col}"'
+ )
+ else:
+ select_parts.append(f'NULL AS "{col}"')
+ else:
+ select_parts.append(f'{_snowflake_sql_literal(val)} AS "{col}"')
+
+ using = ", ".join(select_parts)
+ on_parts = [f't."{pk}" = s."{pk}"' for pk in pk_columns]
+ update_parts = [f't."{c}" = s."{c}"' for c in non_pk]
+ insert_cols = ", ".join(f'"{c}"' for c in columns)
+ insert_vals = ", ".join(f's."{c}"' for c in columns)
+
+ sql = (
+ f"MERGE INTO {fq} AS t "
+ f"USING (SELECT {using}) AS s "
+ f"ON {' AND '.join(on_parts)} "
+ f"WHEN MATCHED THEN UPDATE SET {', '.join(update_parts)} "
+ f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})"
+ )
+
+ execute_snowflake_statement(conn, sql)
+
+
+def _snowflake_mon_query(
+ offline_store: SnowflakeOfflineStoreConfig,
+ metric_type: str,
+ columns: List[str],
+ project: str,
+ filters: Optional[Dict[str, Any]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+) -> List[Dict[str, Any]]:
+ table, _, _ = monitoring_table_meta(metric_type)
+ fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"'
+
+ conditions: List[str] = []
+ if project:
+ conditions.append(f'"project_id" = {_snowflake_sql_literal(project)}')
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ conditions.append(f'"{key}" = {_snowflake_sql_literal(value)}')
+
+ if start_date:
+ conditions.append(f'"metric_date" >= {_snowflake_sql_literal(start_date)}')
+ if end_date:
+ conditions.append(f'"metric_date" <= {_snowflake_sql_literal(end_date)}')
+
+ col_list = ", ".join(f'"{c}"' for c in columns)
+ where_clause = " AND ".join(conditions) if conditions else "TRUE"
+ order_col = "metric_date" if "metric_date" in columns else "job_id"
+ sql = f'SELECT {col_list} FROM {fq} WHERE {where_clause} ORDER BY "{order_col}" ASC'
+
+ with GetSnowflakeConnection(offline_store) as conn:
+ cursor = execute_snowflake_statement(conn, sql)
+ rows = cursor.fetchall()
+
+ results: List[Dict[str, Any]] = []
+ for row in rows:
+ record = dict(zip(columns, row))
+ results.append(normalize_monitoring_row(record))
+
+ return results
+
+
def _get_entity_schema(
entity_df: Union[pd.DataFrame, str],
snowflake_conn: SnowflakeConnection,
diff --git a/sdk/python/feast/metrics.py b/sdk/python/feast/metrics.py
index 694f25a687e..13a855d587b 100644
--- a/sdk/python/feast/metrics.py
+++ b/sdk/python/feast/metrics.py
@@ -42,6 +42,7 @@
"""
import atexit
+import json
import logging
import os
import shutil
@@ -51,7 +52,7 @@
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, List, Optional
import psutil
@@ -123,6 +124,8 @@ class _MetricsFlags:
push: bool = False
materialization: bool = False
freshness: bool = False
+ offline_features: bool = False
+ audit_logging: bool = False
_config = _MetricsFlags()
@@ -144,6 +147,8 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag
push=True,
materialization=True,
freshness=True,
+ offline_features=True,
+ audit_logging=False,
)
return _MetricsFlags(
enabled=True,
@@ -153,6 +158,8 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag
push=getattr(metrics_config, "push", True),
materialization=getattr(metrics_config, "materialization", True),
freshness=getattr(metrics_config, "freshness", True),
+ offline_features=getattr(metrics_config, "offline_features", True),
+ audit_logging=getattr(metrics_config, "audit_logging", False),
)
@@ -260,6 +267,33 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag
multiprocess_mode="max",
)
+# ---------------------------------------------------------------------------
+# Offline store retrieval metrics
+# ---------------------------------------------------------------------------
+offline_store_request_total = Counter(
+ "feast_offline_store_request_total",
+ "Total offline store retrieval requests",
+ ["method", "status"],
+)
+offline_store_request_latency_seconds = Histogram(
+ "feast_offline_store_request_latency_seconds",
+ "Latency of offline store retrieval operations in seconds",
+ ["method"],
+ buckets=(0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0),
+)
+offline_store_row_count = Histogram(
+ "feast_offline_store_row_count",
+ "Number of rows returned by offline store retrieval",
+ ["method"],
+ buckets=(100, 1000, 10000, 100000, 500000, 1000000, 5000000),
+)
+
+# ---------------------------------------------------------------------------
+# Audit logger — separate from the main feast logger so operators can
+# route SOX-style audit entries to a dedicated sink.
+# ---------------------------------------------------------------------------
+audit_logger = logging.getLogger("feast.audit")
+
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
@@ -388,6 +422,72 @@ def track_materialization(
)
+def emit_online_audit_log(
+ *,
+ requestor_id: str,
+ entity_keys: List[str],
+ entity_count: int,
+ feature_views: List[str],
+ feature_count: int,
+ status: str,
+ latency_ms: float,
+):
+ """Emit a structured JSON audit log entry for an online feature request."""
+ if not _config.audit_logging:
+ return
+ audit_logger.info(
+ _json_dumps(
+ {
+ "event": "online_feature_request",
+ "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+ "requestor_id": requestor_id,
+ "entity_keys": entity_keys,
+ "entity_count": entity_count,
+ "feature_views": feature_views,
+ "feature_count": feature_count,
+ "status": status,
+ "latency_ms": round(latency_ms, 2),
+ }
+ )
+ )
+
+
+def emit_offline_audit_log(
+ *,
+ method: str,
+ feature_views: List[str],
+ feature_count: int,
+ row_count: int,
+ status: str,
+ start_time: str,
+ end_time: str,
+ duration_ms: float,
+):
+ """Emit a structured JSON audit log entry for an offline feature retrieval."""
+ if not _config.audit_logging:
+ return
+ audit_logger.info(
+ _json_dumps(
+ {
+ "event": "offline_feature_retrieval",
+ "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+ "method": method,
+ "start_time": start_time,
+ "end_time": end_time,
+ "feature_views": feature_views,
+ "feature_count": feature_count,
+ "row_count": row_count,
+ "status": status,
+ "duration_ms": round(duration_ms, 2),
+ }
+ )
+ )
+
+
+def _json_dumps(obj: dict) -> str:
+ return json.dumps(obj, separators=(",", ":"))
+
+
def update_feature_freshness(
store: "FeatureStore",
) -> None:
@@ -507,6 +607,8 @@ def start_metrics_server(
push=True,
materialization=True,
freshness=True,
+ offline_features=True,
+ audit_logging=False,
)
from prometheus_client import CollectorRegistry, make_wsgi_app
diff --git a/sdk/python/feast/monitoring/__init__.py b/sdk/python/feast/monitoring/__init__.py
new file mode 100644
index 00000000000..69a921060a5
--- /dev/null
+++ b/sdk/python/feast/monitoring/__init__.py
@@ -0,0 +1,7 @@
+from feast.monitoring.dqm_job_manager import DQMJobManager
+from feast.monitoring.metrics_calculator import MetricsCalculator
+
+__all__ = [
+ "DQMJobManager",
+ "MetricsCalculator",
+]
diff --git a/sdk/python/feast/monitoring/dqm_job_manager.py b/sdk/python/feast/monitoring/dqm_job_manager.py
new file mode 100644
index 00000000000..76103a169a7
--- /dev/null
+++ b/sdk/python/feast/monitoring/dqm_job_manager.py
@@ -0,0 +1,148 @@
+import json
+import logging
+import uuid
+from datetime import date, datetime, timezone
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+JOB_STATUS_PENDING = "pending"
+JOB_STATUS_RUNNING = "running"
+JOB_STATUS_COMPLETED = "completed"
+JOB_STATUS_FAILED = "failed"
+
+
+class DQMJobManager:
+ """DQM job manager that persists jobs via the offline store abstraction."""
+
+ def __init__(self, offline_store, config):
+ self._offline_store = offline_store
+ self._config = config
+
+ def ensure_table(self) -> None:
+ self._offline_store.ensure_monitoring_tables(self._config)
+
+ def submit(
+ self,
+ project: str,
+ job_type: str,
+ feature_view_name: Optional[str] = None,
+ parameters: Optional[Dict[str, Any]] = None,
+ ) -> str:
+ job_id = str(uuid.uuid4())
+ now = datetime.now(timezone.utc)
+ row = {
+ "job_id": job_id,
+ "project_id": project,
+ "feature_view_name": feature_view_name,
+ "job_type": job_type,
+ "status": JOB_STATUS_PENDING,
+ "parameters": json.dumps(parameters) if parameters else None,
+ "metric_date": now.date(),
+ "started_at": None,
+ "completed_at": None,
+ "error_message": None,
+ "result_summary": None,
+ }
+ self._offline_store.save_monitoring_metrics(self._config, "job", [row])
+ return job_id
+
+ def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
+ rows = self._offline_store.query_monitoring_metrics(
+ config=self._config,
+ project="",
+ metric_type="job",
+ filters={"job_id": job_id},
+ )
+ if not rows:
+ return None
+ record = rows[0]
+ for key in ("parameters", "result_summary"):
+ val = record.get(key)
+ if isinstance(val, str):
+ try:
+ record[key] = json.loads(val)
+ except (json.JSONDecodeError, TypeError):
+ pass
+ return record
+
+ def update_status(
+ self,
+ job_id: str,
+ status: str,
+ error_message: Optional[str] = None,
+ result_summary: Optional[Dict[str, Any]] = None,
+ ) -> None:
+ job = self.get_job(job_id)
+ if job is None:
+ return
+
+ now = datetime.now(timezone.utc)
+ job["status"] = status
+
+ if status == JOB_STATUS_RUNNING:
+ job["started_at"] = now
+ elif status in (JOB_STATUS_COMPLETED, JOB_STATUS_FAILED):
+ job["completed_at"] = now
+
+ if error_message is not None:
+ job["error_message"] = error_message
+ if result_summary is not None:
+ job["result_summary"] = json.dumps(result_summary)
+
+ if "parameters" in job and not isinstance(job["parameters"], str):
+ job["parameters"] = (
+ json.dumps(job["parameters"]) if job["parameters"] else None
+ )
+
+ if isinstance(job.get("metric_date"), str):
+ job["metric_date"] = date.fromisoformat(job["metric_date"])
+
+ self._offline_store.save_monitoring_metrics(self._config, "job", [job])
+
+ def execute_job(self, job_id: str, monitoring_service) -> Dict[str, Any]:
+ """Execute a DQM job synchronously. Manages status transitions."""
+ job = self.get_job(job_id)
+ if job is None:
+ raise ValueError(f"Failed to find DQM job '{job_id}'")
+
+ self.update_status(job_id, JOB_STATUS_RUNNING)
+
+ try:
+ params = job.get("parameters") or {}
+ job_type = job["job_type"]
+ project = job["project_id"]
+
+ if job_type == "auto_compute":
+ result = monitoring_service.auto_compute(
+ project=project,
+ feature_view_name=job.get("feature_view_name"),
+ )
+ elif job_type == "baseline":
+ result = monitoring_service.compute_baseline(
+ project=project,
+ feature_view_name=job.get("feature_view_name"),
+ feature_names=params.get("feature_names"),
+ )
+ elif job_type == "compute":
+ result = monitoring_service.compute_metrics(
+ project=project,
+ feature_view_name=job.get("feature_view_name"),
+ feature_names=params.get("feature_names"),
+ start_date=date.fromisoformat(params["start_date"])
+ if params.get("start_date")
+ else None,
+ end_date=date.fromisoformat(params["end_date"])
+ if params.get("end_date")
+ else None,
+ granularity=params.get("granularity", "daily"),
+ )
+ else:
+ raise ValueError(f"Unknown job type '{job_type}'")
+
+ self.update_status(job_id, JOB_STATUS_COMPLETED, result_summary=result)
+ return result
+
+ except Exception as e:
+ self.update_status(job_id, JOB_STATUS_FAILED, error_message=str(e))
+ raise
diff --git a/sdk/python/feast/monitoring/metrics_calculator.py b/sdk/python/feast/monitoring/metrics_calculator.py
new file mode 100644
index 00000000000..1b8b3b3e7ca
--- /dev/null
+++ b/sdk/python/feast/monitoring/metrics_calculator.py
@@ -0,0 +1,187 @@
+import logging
+import math
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from feast.types import PrimitiveFeastType
+
+logger = logging.getLogger(__name__)
+
+
+def _safe_float(val):
+ """Return None for None/NaN/Inf, otherwise float."""
+ if val is None:
+ return None
+ f = float(val)
+ if math.isnan(f) or math.isinf(f):
+ return None
+ return f
+
+
+_NUMERIC_TYPES = {
+ PrimitiveFeastType.INT32,
+ PrimitiveFeastType.INT64,
+ PrimitiveFeastType.FLOAT32,
+ PrimitiveFeastType.FLOAT64,
+ PrimitiveFeastType.DECIMAL,
+}
+
+_CATEGORICAL_TYPES = {
+ PrimitiveFeastType.STRING,
+ PrimitiveFeastType.BOOL,
+}
+
+
+class MetricsCalculator:
+ def __init__(self, histogram_bins: int = 20, top_n: int = 10):
+ self.histogram_bins = histogram_bins
+ self.top_n = top_n
+
+ @staticmethod
+ def classify_feature(dtype) -> Optional[str]:
+ primitive = dtype
+ if hasattr(dtype, "base_type"):
+ primitive = dtype.base_type if dtype.base_type else dtype
+
+ if isinstance(primitive, PrimitiveFeastType):
+ if primitive in _NUMERIC_TYPES:
+ return "numeric"
+ if primitive in _CATEGORICAL_TYPES:
+ return "categorical"
+ return None
+
+ @staticmethod
+ def classify_feature_arrow(arrow_type: pa.DataType) -> Optional[str]:
+ """Classify a PyArrow data type as numeric or categorical."""
+ if (
+ pa.types.is_integer(arrow_type)
+ or pa.types.is_floating(arrow_type)
+ or pa.types.is_decimal(arrow_type)
+ ):
+ return "numeric"
+ if (
+ pa.types.is_string(arrow_type)
+ or pa.types.is_large_string(arrow_type)
+ or pa.types.is_boolean(arrow_type)
+ ):
+ return "categorical"
+ return None
+
+ def compute_numeric(self, array: pa.Array) -> Dict:
+ total = len(array)
+ null_count = array.null_count
+ result = {
+ "feature_type": "numeric",
+ "row_count": total,
+ "null_count": null_count,
+ "null_rate": null_count / total if total > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": None,
+ }
+
+ valid = pc.drop_null(array) # type: ignore[attr-defined]
+ if len(valid) == 0:
+ return result
+
+ float_array = pc.cast(valid, pa.float64())
+ result["mean"] = _safe_float(pc.mean(float_array).as_py()) # type: ignore[attr-defined]
+ result["stddev"] = _safe_float(pc.stddev(float_array, ddof=1).as_py()) # type: ignore[attr-defined]
+
+ min_max = pc.min_max(float_array) # type: ignore[attr-defined]
+ result["min_val"] = min_max["min"].as_py()
+ result["max_val"] = min_max["max"].as_py()
+
+ quantiles = pc.quantile(float_array, q=[0.50, 0.75, 0.90, 0.95, 0.99]) # type: ignore[attr-defined]
+ q_values = quantiles.to_pylist()
+ result["p50"] = q_values[0]
+ result["p75"] = q_values[1]
+ result["p90"] = q_values[2]
+ result["p95"] = q_values[3]
+ result["p99"] = q_values[4]
+
+ np_array = float_array.to_numpy()
+ counts, bin_edges = np.histogram(np_array, bins=self.histogram_bins)
+ result["histogram"] = {
+ "bins": bin_edges.tolist(),
+ "counts": counts.tolist(),
+ "bin_width": float(bin_edges[1] - bin_edges[0])
+ if len(bin_edges) > 1
+ else 0,
+ }
+
+ return result
+
+ def compute_categorical(self, array: pa.Array) -> Dict:
+ total = len(array)
+ null_count = array.null_count
+ result = {
+ "feature_type": "categorical",
+ "row_count": total,
+ "null_count": null_count,
+ "null_rate": null_count / total if total > 0 else 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": None,
+ }
+
+ valid = pc.drop_null(array) # type: ignore[attr-defined]
+ if len(valid) == 0:
+ return result
+
+ value_counts = pc.value_counts(valid) # type: ignore[attr-defined]
+ entries = [
+ {"value": vc["values"].as_py(), "count": vc["counts"].as_py()}
+ for vc in value_counts
+ ]
+ entries.sort(key=lambda x: x["count"], reverse=True)
+
+ unique_count = len(entries)
+ top_entries = entries[: self.top_n]
+ other_count = sum(e["count"] for e in entries[self.top_n :])
+
+ result["histogram"] = {
+ "values": top_entries,
+ "other_count": other_count,
+ "unique_count": unique_count,
+ }
+
+ return result
+
+ def compute_all(
+ self,
+ table: pa.Table,
+ feature_fields: List[Tuple[str, str]],
+ ) -> List[Dict]:
+ results = []
+ for name, ftype in feature_fields:
+ if name not in table.column_names:
+ logger.warning("Column '%s' not found in arrow table, skipping", name)
+ continue
+ column = table.column(name)
+ if ftype == "numeric":
+ metrics = self.compute_numeric(column)
+ elif ftype == "categorical":
+ metrics = self.compute_categorical(column)
+ else:
+ continue
+ metrics["feature_name"] = name
+ results.append(metrics)
+ return results
diff --git a/sdk/python/feast/monitoring/monitoring_service.py b/sdk/python/feast/monitoring/monitoring_service.py
new file mode 100644
index 00000000000..888690b806d
--- /dev/null
+++ b/sdk/python/feast/monitoring/monitoring_service.py
@@ -0,0 +1,1254 @@
+import logging
+import math
+import time
+from collections import defaultdict
+from datetime import date, datetime, timedelta, timezone
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+from feast.feature_logging import LOG_TIMESTAMP_FIELD, FeatureServiceLoggingSource
+from feast.infra.offline_stores.offline_store import OfflineStore
+from feast.monitoring.dqm_job_manager import DQMJobManager
+from feast.monitoring.metrics_calculator import MetricsCalculator
+from feast.monitoring.monitoring_utils import build_view_aggregate
+
+if TYPE_CHECKING:
+ from feast.feature_store import FeatureStore
+
+logger = logging.getLogger(__name__)
+
+VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES
+
+_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
+_FAR_FUTURE = datetime(2099, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
+
+GRANULARITY_WINDOWS = {
+ "daily": timedelta(days=1),
+ "weekly": timedelta(days=7),
+ "biweekly": timedelta(days=14),
+ "monthly": timedelta(days=30),
+ "quarterly": timedelta(days=90),
+}
+
+_FLOAT_FIELDS = frozenset(
+ {
+ "null_rate",
+ "mean",
+ "stddev",
+ "min_val",
+ "max_val",
+ "p50",
+ "p75",
+ "p90",
+ "p95",
+ "p99",
+ "avg_null_rate",
+ "max_null_rate",
+ }
+)
+
+
+def _sanitize_floats(row: Dict[str, Any]) -> Dict[str, Any]:
+ """Replace NaN/Inf float values with None so JSON serialization succeeds."""
+ for key in _FLOAT_FIELDS:
+ val = row.get(key)
+ if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
+ row[key] = None
+ return row
+
+
+class MonitoringService:
+ def __init__(self, store: "FeatureStore"):
+ self._store = store
+ self._job_manager: Optional[DQMJobManager] = None
+ self._calculator = MetricsCalculator()
+ self._monitoring_tables_ensured = False
+ self._offline_store_cache = None
+
+ def _get_offline_store(self):
+ if self._offline_store_cache is None:
+ self._offline_store_cache = self._store._get_provider().offline_store
+ return self._offline_store_cache
+
+ def _ensure_monitoring_tables(self):
+ if not self._monitoring_tables_ensured:
+ self._get_offline_store().ensure_monitoring_tables(self._store.config)
+ self._monitoring_tables_ensured = True
+
+ @property
+ def job_manager(self) -> DQMJobManager:
+ if self._job_manager is None:
+ self._job_manager = DQMJobManager(
+ self._get_offline_store(), self._store.config
+ )
+ self._job_manager.ensure_table()
+ return self._job_manager
+
+ # ------------------------------------------------------------------ #
+ # Auto-compute: detect dates, compute all granularities
+ # ------------------------------------------------------------------ #
+
+ def auto_compute(
+ self,
+ project: Optional[str] = None,
+ feature_view_name: Optional[str] = None,
+ ) -> Dict[str, Any]:
+ """Detect date ranges from source data and compute all granularities."""
+ start_time = time.time()
+ self._ensure_monitoring_tables()
+ if project is None:
+ project = self._store.config.project
+
+ feature_views = self._resolve_feature_views(project, feature_view_name)
+ total_features = 0
+ total_views = 0
+ granularities_computed = set()
+
+ for fv in feature_views:
+ try:
+ feature_fields = self._classify_fields(fv)
+ if not feature_fields:
+ continue
+
+ max_ts = self._get_max_timestamp(fv)
+ if max_ts is None:
+ logger.warning(
+ "No data found for feature view '%s', skipping", fv.name
+ )
+ continue
+
+ now = datetime.now(timezone.utc)
+
+ for granularity, window in GRANULARITY_WINDOWS.items():
+ window_start = max_ts - window
+ metrics_list = self._compute_feature_metrics(
+ fv,
+ feature_fields,
+ window_start,
+ max_ts,
+ )
+ self._save_computed_metrics(
+ project=project,
+ feature_view=fv,
+ metrics_list=metrics_list,
+ metric_date=window_start.date(),
+ granularity=granularity,
+ set_baseline=False,
+ now=now,
+ )
+ self._compute_feature_service_metrics(
+ project=project,
+ granularity=granularity,
+ metric_dates=[window_start.date()],
+ set_baseline=False,
+ )
+ total_features += len(metrics_list)
+ granularities_computed.add(granularity)
+ total_views += 1
+ except Exception:
+ logger.exception(
+ "Failed to auto-compute metrics for feature view '%s'", fv.name
+ )
+
+ duration_ms = int((time.time() - start_time) * 1000)
+
+ return {
+ "status": "completed",
+ "computed_feature_views": total_views,
+ "computed_features": total_features,
+ "granularities": sorted(granularities_computed),
+ "duration_ms": duration_ms,
+ }
+
+ # ------------------------------------------------------------------ #
+ # Log source: compute metrics from feature serving logs
+ # ------------------------------------------------------------------ #
+
+ def compute_log_metrics(
+ self,
+ project: str,
+ feature_service_name: str,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ granularity: str = "daily",
+ set_baseline: bool = False,
+ ) -> Dict[str, Any]:
+ """Compute monitoring metrics from feature serving logs.
+
+ Requires the feature service to have a logging_config with a
+ LoggingDestination that can be converted to a DataSource.
+ """
+ self._ensure_monitoring_tables()
+ if granularity not in VALID_GRANULARITIES:
+ raise ValueError(
+ f"Invalid granularity '{granularity}'. "
+ f"Must be one of {VALID_GRANULARITIES}"
+ )
+
+ start_time = time.time()
+ start_dt, end_dt = self._to_date_range(start_date, end_date)
+
+ if project is None:
+ project = self._store.config.project
+
+ fs = self._store.registry.get_feature_service(
+ name=feature_service_name, project=project
+ )
+ log_source = self._resolve_log_source(fs)
+ if log_source is None:
+ return {
+ "status": "skipped",
+ "reason": f"Feature service '{feature_service_name}' has no logging configured",
+ "duration_ms": int((time.time() - start_time) * 1000),
+ }
+
+ data_source, ts_field, feature_fields, log_col_map = log_source
+ metrics_list = self._compute_from_source(
+ data_source,
+ ts_field,
+ feature_fields,
+ start_dt,
+ end_dt,
+ )
+
+ now = datetime.now(timezone.utc)
+ metric_date = start_dt.date()
+
+ self._save_log_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ log_col_map=log_col_map,
+ metrics_list=metrics_list,
+ metric_date=metric_date,
+ granularity=granularity,
+ set_baseline=set_baseline,
+ now=now,
+ )
+
+ duration_ms = int((time.time() - start_time) * 1000)
+ return {
+ "status": "completed",
+ "data_source_type": "log",
+ "feature_service_name": feature_service_name,
+ "granularity": granularity,
+ "computed_features": len(metrics_list),
+ "metric_date": metric_date.isoformat(),
+ "duration_ms": duration_ms,
+ }
+
+ def auto_compute_log_metrics(
+ self,
+ project: Optional[str] = None,
+ feature_service_name: Optional[str] = None,
+ ) -> Dict[str, Any]:
+ """Auto-detect date ranges from log data and compute all granularities."""
+ start_time = time.time()
+ self._ensure_monitoring_tables()
+ if project is None:
+ project = self._store.config.project
+
+ if feature_service_name:
+ services = [
+ self._store.registry.get_feature_service(
+ name=feature_service_name, project=project
+ )
+ ]
+ else:
+ services = self._store.registry.list_feature_services(project=project)
+
+ total_features = 0
+ total_services = 0
+ granularities_computed: set = set()
+
+ for fs in services:
+ try:
+ log_source = self._resolve_log_source(fs)
+ if log_source is None:
+ continue
+
+ data_source, ts_field, feature_fields, log_col_map = log_source
+
+ max_ts = self._get_max_timestamp_for_source(data_source, ts_field)
+ if max_ts is None:
+ logger.warning(
+ "No log data found for feature service '%s', skipping",
+ fs.name,
+ )
+ continue
+
+ now = datetime.now(timezone.utc)
+
+ for gran, window in GRANULARITY_WINDOWS.items():
+ window_start = max_ts - window
+ metrics_list = self._compute_from_source(
+ data_source,
+ ts_field,
+ feature_fields,
+ window_start,
+ max_ts,
+ )
+ self._save_log_metrics(
+ project=project,
+ feature_service_name=fs.name,
+ log_col_map=log_col_map,
+ metrics_list=metrics_list,
+ metric_date=window_start.date(),
+ granularity=gran,
+ set_baseline=False,
+ now=now,
+ )
+ total_features += len(metrics_list)
+ granularities_computed.add(gran)
+
+ total_services += 1
+ except Exception:
+ logger.exception(
+ "Failed to auto-compute log metrics for feature service '%s'",
+ fs.name,
+ )
+
+ duration_ms = int((time.time() - start_time) * 1000)
+ return {
+ "status": "completed",
+ "data_source_type": "log",
+ "computed_feature_services": total_services,
+ "computed_features": total_features,
+ "granularities": sorted(granularities_computed),
+ "duration_ms": duration_ms,
+ }
+
+ # ------------------------------------------------------------------ #
+ # Baseline: compute from all available source data
+ # ------------------------------------------------------------------ #
+
+ def compute_baseline(
+ self,
+ project: Optional[str] = None,
+ feature_view_name: Optional[str] = None,
+ feature_names: Optional[List[str]] = None,
+ ) -> Dict[str, Any]:
+ """Compute baseline metrics from all available source data.
+
+ Idempotent: only features without existing baselines are computed.
+ """
+ start_time = time.time()
+ self._ensure_monitoring_tables()
+ if project is None:
+ project = self._store.config.project
+
+ feature_views = self._resolve_feature_views(project, feature_view_name)
+ total_features = 0
+ total_views = 0
+
+ for fv in feature_views:
+ try:
+ fields_needing_baseline = self._get_features_without_baseline(
+ project, fv, feature_names
+ )
+ if not fields_needing_baseline:
+ logger.info(
+ "All features in '%s' already have baselines, skipping",
+ fv.name,
+ )
+ continue
+
+ feature_fields = self._classify_fields(
+ fv, fields=fields_needing_baseline
+ )
+ if not feature_fields:
+ continue
+
+ metrics_list = self._compute_feature_metrics(
+ fv,
+ feature_fields,
+ _EPOCH,
+ _FAR_FUTURE,
+ )
+
+ now = datetime.now(timezone.utc)
+ self._save_computed_metrics(
+ project=project,
+ feature_view=fv,
+ metrics_list=metrics_list,
+ metric_date=date.today(),
+ granularity="daily",
+ set_baseline=True,
+ now=now,
+ )
+
+ total_features += len(metrics_list)
+ total_views += 1
+ except Exception:
+ logger.exception(
+ "Failed to compute baseline for feature view '%s'", fv.name
+ )
+
+ duration_ms = int((time.time() - start_time) * 1000)
+
+ return {
+ "status": "completed",
+ "computed_features": total_features,
+ "computed_feature_views": total_views,
+ "is_baseline": True,
+ "duration_ms": duration_ms,
+ }
+
+ # ------------------------------------------------------------------ #
+ # Compute: explicit dates + granularity (stored)
+ # ------------------------------------------------------------------ #
+
+ def compute_metrics(
+ self,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_names: Optional[List[str]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ granularity: str = "daily",
+ set_baseline: bool = False,
+ ) -> Dict[str, Any]:
+ self._ensure_monitoring_tables()
+ if granularity not in VALID_GRANULARITIES:
+ raise ValueError(
+ f"Invalid granularity '{granularity}'. "
+ f"Must be one of {VALID_GRANULARITIES}"
+ )
+
+ start_time = time.time()
+ start_dt, end_dt = self._to_date_range(start_date, end_date)
+
+ feature_views = self._resolve_feature_views(project, feature_view_name)
+
+ total_features = 0
+ total_views = 0
+ computed_dates: set = set()
+
+ for fv in feature_views:
+ try:
+ fv_metrics = self._compute_for_feature_view(
+ project=project,
+ feature_view=fv,
+ feature_names=feature_names,
+ start_dt=start_dt,
+ end_dt=end_dt,
+ granularity=granularity,
+ set_baseline=set_baseline,
+ )
+ total_features += fv_metrics["feature_count"]
+ total_views += 1
+ computed_dates.update(fv_metrics["dates"])
+ except Exception:
+ logger.exception(
+ "Failed to compute metrics for feature view '%s'", fv.name
+ )
+
+ total_services = self._compute_feature_service_metrics(
+ project=project,
+ granularity=granularity,
+ metric_dates=list(computed_dates),
+ set_baseline=set_baseline,
+ )
+
+ duration_ms = int((time.time() - start_time) * 1000)
+
+ return {
+ "status": "completed",
+ "granularity": granularity,
+ "computed_features": total_features,
+ "computed_feature_views": total_views,
+ "computed_feature_services": total_services,
+ "metric_dates": sorted(d.isoformat() for d in computed_dates),
+ "duration_ms": duration_ms,
+ }
+
+ # ------------------------------------------------------------------ #
+ # Transient compute (not stored)
+ # ------------------------------------------------------------------ #
+
+ def compute_transient(
+ self,
+ project: str,
+ feature_view_name: str,
+ feature_names: Optional[List[str]] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> Dict[str, Any]:
+ """Compute metrics on-the-fly for an arbitrary date range without persisting."""
+ start_time = time.time()
+ start_dt, end_dt = self._to_date_range(start_date, end_date)
+ effective_start = start_date or (date.today() - timedelta(days=1))
+ effective_end = end_date or date.today()
+
+ fv = self._store.registry.get_feature_view(
+ name=feature_view_name, project=project
+ )
+
+ feature_fields = self._classify_fields(fv, feature_names=feature_names)
+ if not feature_fields:
+ return {
+ "status": "completed",
+ "feature_view_name": feature_view_name,
+ "start_date": effective_start.isoformat(),
+ "end_date": effective_end.isoformat(),
+ "metrics": [],
+ "duration_ms": int((time.time() - start_time) * 1000),
+ }
+
+ metrics_list = self._compute_feature_metrics(
+ fv,
+ feature_fields,
+ start_dt,
+ end_dt,
+ )
+
+ for m in metrics_list:
+ m["feature_view_name"] = feature_view_name
+ m["start_date"] = effective_start.isoformat()
+ m["end_date"] = effective_end.isoformat()
+
+ return {
+ "status": "completed",
+ "feature_view_name": feature_view_name,
+ "start_date": effective_start.isoformat(),
+ "end_date": effective_end.isoformat(),
+ "metrics": metrics_list,
+ "duration_ms": int((time.time() - start_time) * 1000),
+ }
+
+ # ------------------------------------------------------------------ #
+ # DQM Job helpers
+ # ------------------------------------------------------------------ #
+
+ def submit_job(
+ self,
+ project: str,
+ job_type: str,
+ feature_view_name: Optional[str] = None,
+ parameters: Optional[Dict[str, Any]] = None,
+ ) -> str:
+ return self.job_manager.submit(
+ project=project,
+ job_type=job_type,
+ feature_view_name=feature_view_name,
+ parameters=parameters,
+ )
+
+ def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
+ return self.job_manager.get_job(job_id)
+
+ def execute_job(self, job_id: str) -> Dict[str, Any]:
+ return self.job_manager.execute_job(job_id, self)
+
+ # ------------------------------------------------------------------ #
+ # Read helpers (delegate to offline store)
+ # ------------------------------------------------------------------ #
+
+ def _query(
+ self,
+ metric_type: str,
+ project: str,
+ filters=None,
+ start_date=None,
+ end_date=None,
+ ):
+ self._ensure_monitoring_tables()
+ rows = self._get_offline_store().query_monitoring_metrics(
+ config=self._store.config,
+ project=project,
+ metric_type=metric_type,
+ filters=filters,
+ start_date=start_date,
+ end_date=end_date,
+ )
+ return [_sanitize_floats(r) for r in rows]
+
+ def get_feature_metrics(
+ self,
+ project: str,
+ feature_service_name: Optional[str] = None,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ granularity: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ filters = {
+ "feature_view_name": feature_view_name,
+ "feature_name": feature_name,
+ "granularity": granularity,
+ "data_source_type": data_source_type,
+ }
+ if feature_service_name:
+ return self._get_metrics_by_service(
+ project,
+ feature_service_name,
+ lambda fv_name: self._query(
+ "feature",
+ project,
+ {**filters, "feature_view_name": fv_name},
+ start_date,
+ end_date,
+ ),
+ )
+ return self._query("feature", project, filters, start_date, end_date)
+
+ def get_feature_view_metrics(
+ self,
+ project: str,
+ feature_service_name: Optional[str] = None,
+ feature_view_name: Optional[str] = None,
+ granularity: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ filters = {
+ "feature_view_name": feature_view_name,
+ "granularity": granularity,
+ "data_source_type": data_source_type,
+ }
+ if feature_service_name:
+ return self._get_metrics_by_service(
+ project,
+ feature_service_name,
+ lambda fv_name: self._query(
+ "feature_view",
+ project,
+ {**filters, "feature_view_name": fv_name},
+ start_date,
+ end_date,
+ ),
+ )
+ return self._query("feature_view", project, filters, start_date, end_date)
+
+ def get_feature_service_metrics(
+ self,
+ project: str,
+ feature_service_name: Optional[str] = None,
+ granularity: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ filters = {
+ "feature_service_name": feature_service_name,
+ "granularity": granularity,
+ "data_source_type": data_source_type,
+ }
+ return self._query("feature_service", project, filters, start_date, end_date)
+
+ def get_baseline(
+ self,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ ) -> List[Dict[str, Any]]:
+ filters = {
+ "feature_view_name": feature_view_name,
+ "feature_name": feature_name,
+ "data_source_type": data_source_type,
+ "is_baseline": True,
+ }
+ return self._query("feature", project, filters)
+
+ def get_timeseries(
+ self,
+ project: str,
+ feature_view_name: Optional[str] = None,
+ feature_name: Optional[str] = None,
+ feature_service_name: Optional[str] = None,
+ granularity: Optional[str] = None,
+ data_source_type: Optional[str] = None,
+ start_date: Optional[date] = None,
+ end_date: Optional[date] = None,
+ ) -> List[Dict[str, Any]]:
+ return self.get_feature_metrics(
+ project=project,
+ feature_service_name=feature_service_name,
+ feature_view_name=feature_view_name,
+ feature_name=feature_name,
+ granularity=granularity,
+ data_source_type=data_source_type,
+ start_date=start_date,
+ end_date=end_date,
+ )
+
+ # ------------------------------------------------------------------ #
+ # Auto-baseline trigger for feast apply
+ # ------------------------------------------------------------------ #
+
+ def submit_baseline_for_new_features(
+ self,
+ project: str,
+ feature_views: Optional[List] = None,
+ ) -> List[str]:
+ """Submit baseline DQM jobs for feature views with new features.
+
+ Called from feast apply. Returns list of submitted job IDs.
+ Idempotent — only features without existing baselines are included.
+ """
+ if project is None:
+ project = self._store.config.project
+
+ if feature_views is None:
+ feature_views = self._store.registry.list_feature_views(project=project)
+
+ job_ids = []
+ for fv in feature_views:
+ new_features = self._get_features_without_baseline(project, fv)
+ if not new_features:
+ continue
+
+ feature_names = [f.name for f in new_features]
+ job_id = self.job_manager.submit(
+ project=project,
+ job_type="baseline",
+ feature_view_name=fv.name,
+ parameters={"feature_names": feature_names},
+ )
+ job_ids.append(job_id)
+ logger.info(
+ "Queued baseline computation for '%s' features %s (job: %s)",
+ fv.name,
+ feature_names,
+ job_id,
+ )
+
+ return job_ids
+
+ # ------------------------------------------------------------------ #
+ # Private: compute engine dispatch (SQL push-down → Python fallback)
+ # ------------------------------------------------------------------ #
+
+ def _compute_feature_metrics(
+ self,
+ feature_view,
+ feature_fields: List[Tuple[str, str]],
+ start_dt: datetime,
+ end_dt: datetime,
+ ) -> List[Dict[str, Any]]:
+ """Compute metrics from a feature view's batch source."""
+ return self._compute_from_source(
+ feature_view.batch_source,
+ feature_view.batch_source.timestamp_field,
+ feature_fields,
+ start_dt,
+ end_dt,
+ )
+
+ def _get_max_timestamp(self, feature_view) -> Optional[datetime]:
+ """Query the batch source for MAX(event_timestamp)."""
+ return self._get_max_timestamp_for_source(
+ feature_view.batch_source,
+ feature_view.batch_source.timestamp_field,
+ )
+
+ # ------------------------------------------------------------------ #
+ # Private: shared helpers (DRY)
+ # ------------------------------------------------------------------ #
+
+ @staticmethod
+ def _to_date_range(
+ start_date: Optional[date], end_date: Optional[date]
+ ) -> Tuple[datetime, datetime]:
+ today = date.today()
+ if end_date is None:
+ end_date = today
+ if start_date is None:
+ start_date = end_date - timedelta(days=1)
+ start_dt = datetime(
+ start_date.year, start_date.month, start_date.day, tzinfo=timezone.utc
+ )
+ end_dt = datetime(
+ end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=timezone.utc
+ )
+ return start_dt, end_dt
+
+ @staticmethod
+ def _classify_fields(
+ feature_view,
+ feature_names=None,
+ fields=None,
+ ) -> List[Tuple[str, str]]:
+ """Extract and classify features as numeric/categorical.
+
+ Args:
+ feature_view: FeatureView to extract fields from (used if fields is None).
+ feature_names: Optional filter list of feature names.
+ fields: Optional pre-selected Field objects (e.g., from idempotency check).
+ """
+ if fields is None:
+ fields = feature_view.features
+ if feature_names:
+ fields = [f for f in fields if f.name in feature_names]
+
+ result = []
+ for field in fields:
+ ftype = MetricsCalculator.classify_feature(field.dtype)
+ if ftype is None:
+ logger.warning(
+ "Unsupported dtype '%s' for feature '%s', skipping",
+ field.dtype,
+ field.name,
+ )
+ continue
+ result.append((field.name, ftype))
+ return result
+
+ def _save_computed_metrics(
+ self,
+ project: str,
+ feature_view,
+ metrics_list: List[Dict[str, Any]],
+ metric_date: date,
+ granularity: str,
+ set_baseline: bool,
+ now: datetime,
+ ) -> None:
+ if not metrics_list:
+ return
+
+ offline_store = self._get_offline_store()
+ config = self._store.config
+
+ if set_baseline:
+ offline_store.clear_monitoring_baseline(
+ config=config,
+ project=project,
+ feature_view_name=feature_view.name,
+ )
+
+ for m in metrics_list:
+ m["project_id"] = project
+ m["feature_view_name"] = feature_view.name
+ m["metric_date"] = metric_date
+ m["granularity"] = granularity
+ m["data_source_type"] = "batch"
+ m["computed_at"] = now
+ m["is_baseline"] = set_baseline
+
+ offline_store.save_monitoring_metrics(config, "feature", metrics_list)
+
+ view_metric = {
+ "project_id": project,
+ "feature_view_name": feature_view.name,
+ "metric_date": metric_date,
+ "granularity": granularity,
+ "data_source_type": "batch",
+ "computed_at": now,
+ "is_baseline": set_baseline,
+ **build_view_aggregate(metrics_list),
+ }
+ offline_store.save_monitoring_metrics(config, "feature_view", [view_metric])
+
+ def _resolve_join_key_columns(self, feature_view) -> List[str]:
+ config = self._store.config
+ return (
+ [
+ entity.name
+ for entity in self._store.registry.list_entities(project=config.project)
+ if entity.name in (feature_view.entities or [])
+ ]
+ or feature_view.entities
+ or []
+ )
+
+ def _get_metrics_by_service(
+ self, project: str, feature_service_name: str, query_fn
+ ):
+ fs = self._store.registry.get_feature_service(
+ name=feature_service_name, project=project
+ )
+ fv_names = [proj.name for proj in fs.feature_view_projections]
+ results = []
+ for fv_name in fv_names:
+ results.extend(query_fn(fv_name))
+ return results
+
+ def _resolve_feature_views(self, project: str, feature_view_name: Optional[str]):
+ if feature_view_name:
+ fv = self._store.registry.get_feature_view(
+ name=feature_view_name, project=project
+ )
+ return [fv]
+ return self._store.registry.list_feature_views(project=project)
+
+ def _get_features_without_baseline(self, project, feature_view, feature_names=None):
+ existing = self.get_baseline(
+ project=project,
+ feature_view_name=feature_view.name,
+ )
+ existing_names = {m["feature_name"] for m in existing}
+
+ fields = feature_view.features
+ if feature_names:
+ fields = [f for f in fields if f.name in feature_names]
+
+ return [f for f in fields if f.name not in existing_names]
+
+ def _compute_for_feature_view(
+ self,
+ project: str,
+ feature_view,
+ feature_names: Optional[List[str]],
+ start_dt: datetime,
+ end_dt: datetime,
+ granularity: str,
+ set_baseline: bool,
+ ) -> Dict[str, Any]:
+ feature_fields = self._classify_fields(
+ feature_view, feature_names=feature_names
+ )
+ if not feature_fields:
+ return {"feature_count": 0, "dates": set()}
+
+ metrics_list = self._compute_feature_metrics(
+ feature_view,
+ feature_fields,
+ start_dt,
+ end_dt,
+ )
+
+ now = datetime.now(timezone.utc)
+ metric_date = start_dt.date()
+
+ self._save_computed_metrics(
+ project=project,
+ feature_view=feature_view,
+ metrics_list=metrics_list,
+ metric_date=metric_date,
+ granularity=granularity,
+ set_baseline=set_baseline,
+ now=now,
+ )
+
+ return {"feature_count": len(metrics_list), "dates": {metric_date}}
+
+ # ------------------------------------------------------------------ #
+ # Private: log source helpers
+ # ------------------------------------------------------------------ #
+
+ def _resolve_log_source(self, feature_service):
+ """Resolve log data source for a feature service.
+
+ Returns (DataSource, timestamp_field, feature_fields, log_col_map)
+ or None if the feature service has no logging configured.
+
+ ``feature_fields`` uses the raw log column names (needed for
+ SQL/PyArrow column access). ``log_col_map`` maps each raw log
+ column to ``(feature_view_name, normalized_feature_name)`` so
+ callers can store metrics under the correct view and feature
+ name — critical for drift detection across batch and log sources.
+ """
+ if not feature_service.logging_config:
+ return None
+
+ destination = feature_service.logging_config.destination
+ try:
+ data_source = destination.to_data_source()
+ except NotImplementedError:
+ logger.warning(
+ "Logging destination for '%s' does not support to_data_source()",
+ feature_service.name,
+ )
+ return None
+
+ logging_source = FeatureServiceLoggingSource(
+ feature_service,
+ self._store.config.project,
+ )
+ schema = logging_source.get_schema(self._store.registry)
+
+ skip_cols = {
+ LOG_TIMESTAMP_FIELD,
+ "__log_date",
+ "__request_id",
+ }
+ entity_columns = set()
+ view_feature_names: dict = {}
+ for proj in feature_service.feature_view_projections:
+ view_alias = proj.name_to_use()
+ try:
+ fv = self._store.registry.get_feature_view(
+ name=proj.name, project=self._store.config.project
+ )
+ for ec in fv.entity_columns:
+ entity_columns.add(ec.name)
+ except Exception:
+ pass
+ for feat in proj.features:
+ log_col = f"{view_alias}__{feat.name}"
+ view_feature_names[log_col] = (proj.name, feat.name)
+
+ feature_fields = []
+ log_col_map: dict = {}
+ for field in schema:
+ if field.name in skip_cols or field.name in entity_columns:
+ continue
+ if field.name.endswith("__timestamp") or field.name.endswith("__status"):
+ continue
+ ftype = MetricsCalculator.classify_feature_arrow(field.type)
+ if ftype is not None:
+ feature_fields.append((field.name, ftype))
+ if field.name in view_feature_names:
+ log_col_map[field.name] = view_feature_names[field.name]
+
+ if not feature_fields:
+ return None
+
+ return data_source, LOG_TIMESTAMP_FIELD, feature_fields, log_col_map
+
+ def _get_max_timestamp_for_source(self, data_source, ts_field):
+ """Get MAX timestamp from an arbitrary data source.
+
+ Prefers the offline store's native push-down; falls back to reading
+ the table and computing max in Python.
+ """
+ offline_store = self._get_offline_store()
+ try:
+ return offline_store.get_monitoring_max_timestamp(
+ config=self._store.config,
+ data_source=data_source,
+ timestamp_field=ts_field,
+ )
+ except NotImplementedError:
+ pass
+
+ import pyarrow.compute as pc
+
+ retrieval_job = offline_store.pull_all_from_table_or_query(
+ config=self._store.config,
+ data_source=data_source,
+ join_key_columns=[],
+ feature_name_columns=[],
+ timestamp_field=ts_field,
+ start_date=_EPOCH,
+ end_date=_FAR_FUTURE,
+ )
+
+ table = retrieval_job.to_arrow()
+ if ts_field not in table.column_names or len(table) == 0:
+ return None
+
+ max_val = pc.max(table.column(ts_field)).as_py()
+ if max_val is None:
+ return None
+
+ if isinstance(max_val, datetime):
+ return max_val if max_val.tzinfo else max_val.replace(tzinfo=timezone.utc)
+ return datetime.combine(max_val, datetime.min.time(), tzinfo=timezone.utc)
+
+ def _compute_from_source(
+ self,
+ data_source,
+ ts_field: str,
+ feature_fields: List[Tuple[str, str]],
+ start_dt: datetime,
+ end_dt: datetime,
+ ) -> List[Dict[str, Any]]:
+ """Compute metrics from an arbitrary data source (batch or log).
+
+ Prefers SQL push-down; falls back to Python-based computation.
+ """
+ offline_store = self._get_offline_store()
+ try:
+ return offline_store.compute_monitoring_metrics(
+ config=self._store.config,
+ data_source=data_source,
+ feature_columns=feature_fields,
+ timestamp_field=ts_field,
+ start_date=start_dt,
+ end_date=end_dt,
+ histogram_bins=self._calculator.histogram_bins,
+ top_n=self._calculator.top_n,
+ )
+ except NotImplementedError:
+ logger.debug(
+ "Offline store does not support compute_monitoring_metrics, "
+ "falling back to Python-based computation for log source"
+ )
+ retrieval_job = offline_store.pull_all_from_table_or_query(
+ config=self._store.config,
+ data_source=data_source,
+ join_key_columns=[],
+ feature_name_columns=[name for name, _ in feature_fields],
+ timestamp_field=ts_field,
+ start_date=start_dt,
+ end_date=end_dt,
+ )
+ arrow_table = retrieval_job.to_arrow()
+ return self._calculator.compute_all(arrow_table, feature_fields)
+
+ def _save_log_metrics(
+ self,
+ project: str,
+ feature_service_name: str,
+ log_col_map: Dict[str, Tuple[str, str]],
+ metrics_list: List[Dict[str, Any]],
+ metric_date: date,
+ granularity: str,
+ set_baseline: bool,
+ now: datetime,
+ ) -> None:
+ """Save log-sourced metrics tagged with data_source_type='log'.
+
+ Normalizes log column names (``driver_stats__conv_rate``) back to
+ their originating ``feature_view_name`` and ``feature_name`` so
+ that drift detection can join batch and log metrics on the same
+ feature identity.
+ """
+ if not metrics_list:
+ return
+
+ offline_store = self._get_offline_store()
+ config = self._store.config
+
+ for m in metrics_list:
+ log_col = m.get("feature_name", "")
+ view_name, feat_name = log_col_map.get(
+ log_col, (feature_service_name, log_col)
+ )
+ m["project_id"] = project
+ m["feature_view_name"] = view_name
+ m["feature_name"] = feat_name
+ m["metric_date"] = metric_date
+ m["granularity"] = granularity
+ m["data_source_type"] = "log"
+ m["computed_at"] = now
+ m["is_baseline"] = set_baseline
+
+ offline_store.save_monitoring_metrics(config, "feature", metrics_list)
+
+ # --- per-feature-view aggregates (grouped by originating view) ---
+ by_view: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+ for m in metrics_list:
+ by_view[m["feature_view_name"]].append(m)
+
+ view_metrics = [
+ {
+ "project_id": project,
+ "feature_view_name": vname,
+ "metric_date": metric_date,
+ "granularity": granularity,
+ "data_source_type": "log",
+ "computed_at": now,
+ "is_baseline": set_baseline,
+ **build_view_aggregate(vmetrics),
+ }
+ for vname, vmetrics in by_view.items()
+ ]
+ offline_store.save_monitoring_metrics(config, "feature_view", view_metrics)
+
+ # --- feature service aggregate ---
+ svc_agg = build_view_aggregate(metrics_list)
+ svc_metric = {
+ "project_id": project,
+ "feature_service_name": feature_service_name,
+ "metric_date": metric_date,
+ "granularity": granularity,
+ "data_source_type": "log",
+ "computed_at": now,
+ "is_baseline": set_baseline,
+ "total_feature_views": len(by_view),
+ "total_features": svc_agg["total_features"],
+ "avg_null_rate": svc_agg["avg_null_rate"],
+ "max_null_rate": svc_agg["max_null_rate"],
+ }
+ offline_store.save_monitoring_metrics(config, "feature_service", [svc_metric])
+
+ def _read_batch_source(self, feature_view, feature_fields, start_dt, end_dt):
+ config = self._store.config
+ data_source = feature_view.batch_source
+ offline_store = self._get_offline_store()
+
+ retrieval_job = offline_store.pull_all_from_table_or_query(
+ config=config,
+ data_source=data_source,
+ join_key_columns=self._resolve_join_key_columns(feature_view),
+ feature_name_columns=[name for name, _ in feature_fields],
+ timestamp_field=data_source.timestamp_field,
+ created_timestamp_column=data_source.created_timestamp_column,
+ start_date=start_dt,
+ end_date=end_dt,
+ )
+
+ return retrieval_job.to_arrow()
+
+ def _compute_feature_service_metrics(
+ self,
+ project: str,
+ granularity: str,
+ metric_dates: List[date],
+ set_baseline: bool,
+ ) -> int:
+ if not metric_dates:
+ return 0
+
+ feature_services = self._store.registry.list_feature_services(project=project)
+ if not feature_services:
+ return 0
+
+ offline_store = self._get_offline_store()
+ config = self._store.config
+ now = datetime.now(timezone.utc)
+ count = 0
+
+ for fs in feature_services:
+ try:
+ fv_names = {proj.name for proj in fs.feature_view_projections}
+
+ for metric_date in metric_dates:
+ fv_metrics = offline_store.query_monitoring_metrics(
+ config=config,
+ project=project,
+ metric_type="feature_view",
+ filters={
+ "granularity": granularity,
+ "data_source_type": "batch",
+ },
+ start_date=metric_date,
+ end_date=metric_date,
+ )
+
+ relevant = [
+ m for m in fv_metrics if m.get("feature_view_name") in fv_names
+ ]
+ if not relevant:
+ continue
+
+ null_rates = [
+ m["avg_null_rate"]
+ for m in relevant
+ if m.get("avg_null_rate") is not None
+ ]
+
+ service_metric = {
+ "project_id": project,
+ "feature_service_name": fs.name,
+ "metric_date": metric_date
+ if isinstance(metric_date, date)
+ else date.fromisoformat(str(metric_date)),
+ "granularity": granularity,
+ "data_source_type": "batch",
+ "computed_at": now,
+ "is_baseline": set_baseline,
+ "total_feature_views": len(relevant),
+ "total_features": sum(
+ m.get("total_features", 0) for m in relevant
+ ),
+ "avg_null_rate": (
+ sum(null_rates) / len(null_rates) if null_rates else 0.0
+ ),
+ "max_null_rate": max(null_rates) if null_rates else 0.0,
+ }
+ offline_store.save_monitoring_metrics(
+ config,
+ "feature_service",
+ [service_metric],
+ )
+ count += 1
+ except Exception:
+ logger.exception("Failed to compute service metrics for '%s'", fs.name)
+
+ return count
diff --git a/sdk/python/feast/monitoring/monitoring_utils.py b/sdk/python/feast/monitoring/monitoring_utils.py
new file mode 100644
index 00000000000..67d545d8786
--- /dev/null
+++ b/sdk/python/feast/monitoring/monitoring_utils.py
@@ -0,0 +1,272 @@
+"""Shared constants and helpers for monitoring across all offline store backends.
+
+Every backend needs the same table names, column lists, primary keys,
+empty-metric templates, and result-row normalization. Centralizing them
+here avoids ~8x duplication and prevents column-list drift.
+"""
+
+import json
+import math
+from datetime import date, datetime
+from typing import Any, Dict, List, Optional, Tuple
+
+# ------------------------------------------------------------------ #
+# Table names
+# ------------------------------------------------------------------ #
+
+MON_TABLE_FEATURE = "feast_monitoring_feature_metrics"
+MON_TABLE_FEATURE_VIEW = "feast_monitoring_feature_view_metrics"
+MON_TABLE_FEATURE_SERVICE = "feast_monitoring_feature_service_metrics"
+MON_TABLE_JOB = "feast_monitoring_jobs"
+
+# ------------------------------------------------------------------ #
+# Column definitions — (ordered, used by INSERT / SELECT / Parquet)
+# ------------------------------------------------------------------ #
+
+FEATURE_METRICS_COLUMNS: List[str] = [
+ "project_id",
+ "feature_view_name",
+ "feature_name",
+ "metric_date",
+ "granularity",
+ "data_source_type",
+ "computed_at",
+ "is_baseline",
+ "feature_type",
+ "row_count",
+ "null_count",
+ "null_rate",
+ "mean",
+ "stddev",
+ "min_val",
+ "max_val",
+ "p50",
+ "p75",
+ "p90",
+ "p95",
+ "p99",
+ "histogram",
+]
+
+FEATURE_METRICS_PK: List[str] = [
+ "project_id",
+ "feature_view_name",
+ "feature_name",
+ "metric_date",
+ "granularity",
+ "data_source_type",
+]
+
+FEATURE_VIEW_METRICS_COLUMNS: List[str] = [
+ "project_id",
+ "feature_view_name",
+ "metric_date",
+ "granularity",
+ "data_source_type",
+ "computed_at",
+ "is_baseline",
+ "total_row_count",
+ "total_features",
+ "features_with_nulls",
+ "avg_null_rate",
+ "max_null_rate",
+]
+
+FEATURE_VIEW_METRICS_PK: List[str] = [
+ "project_id",
+ "feature_view_name",
+ "metric_date",
+ "granularity",
+ "data_source_type",
+]
+
+FEATURE_SERVICE_METRICS_COLUMNS: List[str] = [
+ "project_id",
+ "feature_service_name",
+ "metric_date",
+ "granularity",
+ "data_source_type",
+ "computed_at",
+ "is_baseline",
+ "total_feature_views",
+ "total_features",
+ "avg_null_rate",
+ "max_null_rate",
+]
+
+FEATURE_SERVICE_METRICS_PK: List[str] = [
+ "project_id",
+ "feature_service_name",
+ "metric_date",
+ "granularity",
+ "data_source_type",
+]
+
+JOB_COLUMNS: List[str] = [
+ "job_id",
+ "project_id",
+ "feature_view_name",
+ "job_type",
+ "status",
+ "parameters",
+ "metric_date",
+ "started_at",
+ "completed_at",
+ "error_message",
+ "result_summary",
+]
+
+JOB_PK: List[str] = [
+ "job_id",
+]
+
+
+def monitoring_table_meta(
+ metric_type: str,
+) -> Tuple[str, List[str], List[str]]:
+ """Return (table_name, columns, pk_columns) for a metric type.
+
+ Raises ValueError for unknown metric types.
+ """
+ if metric_type == "feature":
+ return MON_TABLE_FEATURE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK
+ if metric_type == "feature_view":
+ return (
+ MON_TABLE_FEATURE_VIEW,
+ FEATURE_VIEW_METRICS_COLUMNS,
+ FEATURE_VIEW_METRICS_PK,
+ )
+ if metric_type == "feature_service":
+ return (
+ MON_TABLE_FEATURE_SERVICE,
+ FEATURE_SERVICE_METRICS_COLUMNS,
+ FEATURE_SERVICE_METRICS_PK,
+ )
+ if metric_type == "job":
+ return MON_TABLE_JOB, JOB_COLUMNS, JOB_PK
+ raise ValueError(f"Unknown monitoring metric_type: '{metric_type}'")
+
+
+# ------------------------------------------------------------------ #
+# Tiny helpers duplicated across backends
+# ------------------------------------------------------------------ #
+
+
+def opt_float(val: Any) -> Optional[float]:
+ """Safely cast a value to float, returning None for None/NaN/Inf."""
+ if val is None:
+ return None
+ f = float(val)
+ if math.isnan(f) or math.isinf(f):
+ return None
+ return f
+
+
+def empty_numeric_metric(feature_name: str) -> Dict[str, Any]:
+ """Return a metric dict with all-None stats for a numeric feature."""
+ return {
+ "feature_name": feature_name,
+ "feature_type": "numeric",
+ "row_count": 0,
+ "null_count": 0,
+ "null_rate": 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": None,
+ }
+
+
+def empty_categorical_metric(feature_name: str) -> Dict[str, Any]:
+ """Return a metric dict with all-None stats for a categorical feature."""
+ return {
+ "feature_name": feature_name,
+ "feature_type": "categorical",
+ "row_count": 0,
+ "null_count": 0,
+ "null_rate": 0.0,
+ "mean": None,
+ "stddev": None,
+ "min_val": None,
+ "max_val": None,
+ "p50": None,
+ "p75": None,
+ "p90": None,
+ "p95": None,
+ "p99": None,
+ "histogram": None,
+ }
+
+
+# ------------------------------------------------------------------ #
+# Result-row normalization (used after SQL fetch or Parquet read)
+# ------------------------------------------------------------------ #
+
+
+def normalize_monitoring_row(record: Dict[str, Any]) -> Dict[str, Any]:
+ """Normalize a monitoring metric dict for JSON serialization.
+
+ - Replaces float NaN / Inf with None (not JSON-serializable).
+ - Parses ``histogram`` from JSON string if needed.
+ - Converts ``metric_date`` / ``computed_at`` to ISO strings.
+ - Normalizes ``is_baseline`` to Python bool.
+ """
+ import math
+
+ for key, val in record.items():
+ if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
+ record[key] = None
+
+ hist = record.get("histogram")
+ if isinstance(hist, str):
+ try:
+ record["histogram"] = json.loads(hist)
+ except (json.JSONDecodeError, TypeError):
+ pass
+
+ for key in ("metric_date", "computed_at"):
+ val = record.get(key)
+ if isinstance(val, (date, datetime)):
+ record[key] = val.isoformat()
+
+ baseline = record.get("is_baseline")
+ if baseline is not None:
+ record["is_baseline"] = bool(baseline)
+
+ return record
+
+
+# ------------------------------------------------------------------ #
+# View-level aggregate builder (shared by batch + log save paths)
+# ------------------------------------------------------------------ #
+
+
+def build_view_aggregate(
+ metrics_list: List[Dict[str, Any]],
+) -> Dict[str, Any]:
+ """Compute view-level aggregate stats from per-feature metrics.
+
+ Returns a dict with keys: total_row_count, total_features,
+ features_with_nulls, avg_null_rate, max_null_rate.
+ """
+ null_rates = [
+ m["null_rate"] for m in metrics_list if m.get("null_rate") is not None
+ ]
+ return {
+ "total_row_count": max(
+ (m["row_count"] for m in metrics_list if m.get("row_count") is not None),
+ default=0,
+ ),
+ "total_features": len(metrics_list),
+ "features_with_nulls": sum(
+ 1 for m in metrics_list if (m.get("null_count") or 0) > 0
+ ),
+ "avg_null_rate": sum(null_rates) / len(null_rates) if null_rates else 0.0,
+ "max_null_rate": max(null_rates) if null_rates else 0.0,
+ }
diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py
index 3fbcb9ec498..a13f481e577 100644
--- a/sdk/python/feast/repo_config.py
+++ b/sdk/python/feast/repo_config.py
@@ -231,6 +231,13 @@ class MaterializationConfig(BaseModel):
Supported compute engines: local, spark, ray. """
+class DqmConfig(FeastConfigBaseModel):
+ """Data Quality Monitoring (DQM) configuration."""
+
+ auto_baseline: StrictBool = True
+ """Whether baseline distribution is computed automatically on ``feast apply``."""
+
+
class OpenLineageConfig(FeastBaseModel):
"""Configuration for OpenLineage integration.
@@ -355,6 +362,9 @@ class RepoConfig(FeastBaseModel):
openlineage_config: Optional[OpenLineageConfig] = Field(None, alias="openlineage")
""" Configuration for OpenLineage data lineage integration (optional). """
+ dqm_config: Optional[DqmConfig] = Field(None, alias="dqm")
+ """ DqmConfig: Data Quality Monitoring configuration (optional). """
+
def __init__(self, **data: Any):
super().__init__(**data)
diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py
index 28fe86602ad..767688193f0 100644
--- a/sdk/python/feast/repo_operations.py
+++ b/sdk/python/feast/repo_operations.py
@@ -1,594 +1,621 @@
-import base64
-import importlib
-import json
-import logging
-import os
-import random
-import re
-import sys
-import tempfile
-from importlib.abc import Loader
-from importlib.machinery import ModuleSpec
-from pathlib import Path
-from typing import List, Optional, Set, Union
-
-import click
-from click.exceptions import BadParameter
-
-from feast import PushSource
-from feast.batch_feature_view import BatchFeatureView
-from feast.constants import FEATURE_STORE_YAML_ENV_NAME
-from feast.data_source import DataSource, KafkaSource, KinesisSource
-from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add
-from feast.entity import Entity
-from feast.feature_service import FeatureService
-from feast.feature_store import FeatureStore
-from feast.feature_view import DUMMY_ENTITY, FeatureView
-from feast.file_utils import replace_str_in_file
-from feast.infra.registry.base_registry import BaseRegistry
-from feast.infra.registry.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry
-from feast.names import adjectives, animals
-from feast.on_demand_feature_view import OnDemandFeatureView
-from feast.permissions.permission import Permission
-from feast.project import Project
-from feast.repo_config import RepoConfig
-from feast.repo_contents import RepoContents
-from feast.stream_feature_view import StreamFeatureView
-
-logger = logging.getLogger(__name__)
-
-
-def py_path_to_module(path: Path) -> str:
- return (
- str(path.relative_to(os.getcwd()))[: -len(".py")]
- .replace("./", "")
- .replace("/", ".")
- .replace("\\", ".")
- )
-
-
-def read_feastignore(repo_root: Path) -> List[str]:
- """Read .feastignore in the repo root directory (if exists) and return the list of user-defined ignore paths"""
- feast_ignore = repo_root / ".feastignore"
- if not feast_ignore.is_file():
- return []
- lines = feast_ignore.read_text().strip().split("\n")
- ignore_paths = []
- for line in lines:
- # Remove everything after the first occurance of "#" symbol (comments)
- if line.find("#") >= 0:
- line = line[: line.find("#")]
- # Strip leading or ending whitespaces
- line = line.strip()
- # Add this processed line to ignore_paths if it's not empty
- if len(line) > 0:
- ignore_paths.append(line)
- return ignore_paths
-
-
-def get_ignore_files(repo_root: Path, ignore_paths: List[str]) -> Set[Path]:
- """Get all ignore files that match any of the user-defined ignore paths"""
- ignore_files = set()
- for ignore_path in set(ignore_paths):
- # ignore_path may contains matchers (* or **). Use glob() to match user-defined path to actual paths
- for matched_path in repo_root.glob(ignore_path):
- if matched_path.is_file():
- # If the matched path is a file, add that to ignore_files set
- ignore_files.add(matched_path.resolve())
- else:
- # Otherwise, list all Python files in that directory and add all of them to ignore_files set
- ignore_files |= {
- sub_path.resolve()
- for sub_path in matched_path.glob("**/*.py")
- if sub_path.is_file()
- }
- return ignore_files
-
-
-def get_repo_files(repo_root: Path) -> List[Path]:
- """Get the list of all repo files, ignoring undesired files & directories specified in .feastignore"""
- # Read ignore paths from .feastignore and create a set of all files that match any of these paths
- ignore_paths = read_feastignore(repo_root) + [
- ".git",
- ".feastignore",
- ".venv",
- "**/.ipynb_checkpoints",
- "**/.pytest_cache",
- "**/__pycache__",
- ]
- ignore_files = get_ignore_files(repo_root, ignore_paths)
-
- # List all Python files in the root directory (recursively)
- repo_files = {
- p.resolve()
- for p in repo_root.glob("**/*.py")
- if p.is_file() and "__init__.py" != p.name
- }
- # Ignore all files that match any of the ignore paths in .feastignore
- repo_files -= ignore_files
-
- # Sort repo_files to read them in the same order every time
- return sorted(repo_files)
-
-
-def parse_repo(repo_root: Path) -> RepoContents:
- """
- Collects unique Feast object definitions from the given feature repo.
-
- Specifically, if an object foo has already been added, bar will still be added if
- (bar == foo), but not if (bar is foo). This ensures that import statements will
- not result in duplicates, but defining two equal objects will.
- """
- res = RepoContents(
- projects=[],
- data_sources=[],
- entities=[],
- feature_views=[],
- feature_services=[],
- on_demand_feature_views=[],
- stream_feature_views=[],
- permissions=[],
- )
-
- for repo_file in get_repo_files(repo_root):
- module_path = py_path_to_module(repo_file)
- module = importlib.import_module(module_path)
-
- for attr_name in dir(module):
- obj = getattr(module, attr_name)
-
- if isinstance(obj, DataSource) and not any(
- (obj is ds) for ds in res.data_sources
- ):
- res.data_sources.append(obj)
-
- # Handle batch sources defined within stream sources.
- if (
- isinstance(obj, PushSource)
- or isinstance(obj, KafkaSource)
- or isinstance(obj, KinesisSource)
- ):
- batch_source = obj.batch_source
-
- if batch_source and not any(
- (batch_source is ds) for ds in res.data_sources
- ):
- res.data_sources.append(batch_source)
- if (
- isinstance(obj, FeatureView)
- and not any((obj is fv) for fv in res.feature_views)
- and not isinstance(obj, StreamFeatureView)
- and not isinstance(obj, BatchFeatureView)
- ):
- res.feature_views.append(obj)
-
- # Handle batch sources defined with feature views.
- batch_source = obj.batch_source
- if batch_source is not None and not any(
- (batch_source is ds) for ds in res.data_sources
- ):
- res.data_sources.append(batch_source)
-
- # Handle stream sources defined with feature views.
- if obj.stream_source:
- stream_source = obj.stream_source
- if not any((stream_source is ds) for ds in res.data_sources):
- res.data_sources.append(stream_source)
- elif isinstance(obj, StreamFeatureView) and not any(
- (obj is sfv) for sfv in res.stream_feature_views
- ):
- res.stream_feature_views.append(obj)
-
- # Handle batch sources defined with feature views.
- batch_source = obj.batch_source
- if batch_source is not None and not any(
- (batch_source is ds) for ds in res.data_sources
- ):
- res.data_sources.append(batch_source)
- assert obj.stream_source
- stream_source = obj.stream_source
- if not any((stream_source is ds) for ds in res.data_sources):
- res.data_sources.append(stream_source)
- elif isinstance(obj, BatchFeatureView) and not any(
- (obj is bfv) for bfv in res.feature_views
- ):
- res.feature_views.append(obj)
-
- # Handle batch sources defined with feature views.
- batch_source = obj.batch_source
- if batch_source is not None and not any(
- (batch_source is ds) for ds in res.data_sources
- ):
- res.data_sources.append(batch_source)
- elif isinstance(obj, Entity) and not any(
- (obj is entity) for entity in res.entities
- ):
- res.entities.append(obj)
- elif isinstance(obj, FeatureService) and not any(
- (obj is fs) for fs in res.feature_services
- ):
- res.feature_services.append(obj)
- elif isinstance(obj, OnDemandFeatureView) and not any(
- (obj is odfv) for odfv in res.on_demand_feature_views
- ):
- res.on_demand_feature_views.append(obj)
- elif isinstance(obj, Permission) and not any(
- (obj is p) for p in res.permissions
- ):
- res.permissions.append(obj)
- elif isinstance(obj, Project) and not any((obj is p) for p in res.projects):
- res.projects.append(obj)
-
- res.entities.append(DUMMY_ENTITY)
- return res
-
-
-def plan(
- repo_config: RepoConfig,
- repo_path: Path,
- skip_source_validation: bool,
- skip_feature_view_validation: bool = False,
-):
- os.chdir(repo_path)
- repo = _get_repo_contents(repo_path, repo_config.project, repo_config)
- for project in repo.projects:
- repo_config.project = project.name
- store, registry = _get_store_and_registry(repo_config)
- # TODO: When we support multiple projects in a single repo, we should filter repo contents by project
- if not skip_source_validation:
- provider = store._get_provider()
- data_sources = [
- t.batch_source for t in repo.feature_views if t.batch_source is not None
- ]
- # Make sure the data source used by this feature view is supported by Feast
- for data_source in data_sources:
- provider.validate_data_source(store.config, data_source)
-
- registry_diff, infra_diff, _ = store.plan(
- repo, skip_feature_view_validation=skip_feature_view_validation
- )
- click.echo(registry_diff.to_string())
- click.echo(infra_diff.to_string())
-
-
-def _get_repo_contents(
- repo_path,
- project_name: Optional[str] = None,
- repo_config: Optional[RepoConfig] = None,
-):
- sys.dont_write_bytecode = True
- repo = parse_repo(repo_path)
-
- if len(repo.projects) < 1:
- if project_name:
- print(
- f"No project found in the repository. Using project name {project_name} defined in feature_store.yaml"
- )
- project_description = (
- repo_config.project_description if repo_config else None
- )
- repo.projects.append(
- Project(name=project_name, description=project_description or "")
- )
- else:
- print(
- "No project found in the repository. Either define Project in repository or define a project in feature_store.yaml"
- )
- sys.exit(1)
- elif len(repo.projects) == 1:
- if repo.projects[0].name != project_name:
- print(
- "Project object name should match with the project name defined in feature_store.yaml"
- )
- sys.exit(1)
- else:
- print(
- "Multiple projects found in the repository. Currently no support for multiple projects"
- )
- sys.exit(1)
-
- return repo
-
-
-def _get_store_and_registry(repo_config):
- store = FeatureStore(config=repo_config)
- registry = store.registry
- return store, registry
-
-
-def extract_objects_for_apply_delete(project, registry, repo):
- # TODO(achals): This code path should be refactored to handle added & kept entities separately.
- (
- _,
- objs_to_delete,
- objs_to_update,
- objs_to_add,
- ) = extract_objects_for_keep_delete_update_add(registry, project, repo)
-
- all_to_apply: List[
- Union[
- Entity,
- FeatureView,
- OnDemandFeatureView,
- StreamFeatureView,
- FeatureService,
- ]
- ] = []
- for object_type in FEAST_OBJECT_TYPES:
- to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type])
- all_to_apply.extend(to_apply)
-
- all_to_delete: List[
- Union[
- Entity,
- FeatureView,
- OnDemandFeatureView,
- StreamFeatureView,
- FeatureService,
- ]
- ] = []
- for object_type in FEAST_OBJECT_TYPES:
- all_to_delete.extend(objs_to_delete[object_type])
-
- return (
- all_to_apply,
- all_to_delete,
- set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union(
- set(objs_to_update[FeastObjectType.FEATURE_VIEW])
- ),
- objs_to_delete[FeastObjectType.FEATURE_VIEW],
- )
-
-
-def apply_total_with_repo_instance(
- store: FeatureStore,
- project_name: str,
- registry: BaseRegistry,
- repo: RepoContents,
- skip_source_validation: bool,
- skip_feature_view_validation: bool = False,
- no_promote: bool = False,
-):
- if not skip_source_validation:
- provider = store._get_provider()
- data_sources = [
- t.batch_source for t in repo.feature_views if t.batch_source is not None
- ]
- # Make sure the data source used by this feature view is supported by Feast
- for data_source in data_sources:
- provider.validate_data_source(store.config, data_source)
-
- # For each object in the registry, determine whether it should be kept or deleted.
- (
- all_to_apply,
- all_to_delete,
- views_to_keep,
- views_to_delete,
- ) = extract_objects_for_apply_delete(project_name, registry, repo)
-
- try:
- if store._should_use_plan():
- # Planning phase - compute diffs first without progress bars
- registry_diff, infra_diff, new_infra = store.plan(
- repo,
- skip_feature_view_validation=skip_feature_view_validation,
- )
- click.echo(registry_diff.to_string())
-
- # Only show progress bars if there are actual infrastructure changes
- progress_ctx = None
- if len(infra_diff.infra_object_diffs) > 0:
- from feast.diff.apply_progress import ApplyProgressContext
-
- progress_ctx = ApplyProgressContext()
- progress_ctx.start_overall_progress()
-
- # Apply phase
- store._apply_diffs(
- registry_diff,
- infra_diff,
- new_infra,
- progress_ctx=progress_ctx,
- no_promote=no_promote,
- )
- click.echo(infra_diff.to_string())
- else:
- # Legacy apply path - no progress bars for legacy path
- store.apply(
- all_to_apply,
- objects_to_delete=all_to_delete,
- partial=False,
- skip_feature_view_validation=skip_feature_view_validation,
- no_promote=no_promote,
- )
- log_infra_changes(views_to_keep, views_to_delete)
- finally:
- # Cleanup is handled in the new _apply_diffs method
- pass
-
-
-def log_infra_changes(
- views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView]
-):
- from colorama import Fore, Style
-
- for view in views_to_keep:
- click.echo(
- f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}"
- )
- for view in views_to_delete:
- click.echo(
- f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}"
- )
-
-
-def create_feature_store(
- ctx: click.Context,
-) -> FeatureStore:
- repo = ctx.obj["CHDIR"]
- # If we received a base64 encoded version of feature_store.yaml, use that
- config_base64 = os.getenv(FEATURE_STORE_YAML_ENV_NAME)
- if config_base64:
- print("Received base64 encoded feature_store.yaml")
- config_bytes = base64.b64decode(config_base64)
- # Create a new unique directory for writing feature_store.yaml
- repo_path = Path(tempfile.mkdtemp())
- with open(repo_path / "feature_store.yaml", "wb") as f:
- f.write(config_bytes)
- return FeatureStore(repo_path=str(repo_path.resolve()))
- else:
- fs_yaml_file = ctx.obj["FS_YAML_FILE"]
- cli_check_repo(repo, fs_yaml_file)
- return FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file)
-
-
-def apply_total(
- repo_config: RepoConfig,
- repo_path: Path,
- skip_source_validation: bool,
- skip_feature_view_validation: bool = False,
- no_promote: bool = False,
-):
- os.chdir(repo_path)
- repo = _get_repo_contents(repo_path, repo_config.project, repo_config)
- for project in repo.projects:
- repo_config.project = project.name
- store, registry = _get_store_and_registry(repo_config)
- if not is_valid_name(project.name):
- print(
- f"{project.name} is not valid. Project name should only have "
- f"alphanumerical values, underscores, and hyphens but not start with an underscore or hyphen."
- )
- sys.exit(1)
- # TODO: When we support multiple projects in a single repo, we should filter repo contents by project. Currently there is no way to associate Feast objects to project.
- print(f"Applying changes for project {project.name}")
- apply_total_with_repo_instance(
- store,
- project.name,
- registry,
- repo,
- skip_source_validation,
- skip_feature_view_validation,
- no_promote=no_promote,
- )
-
-
-def teardown(repo_config: RepoConfig, repo_path: Optional[str]):
- # Cannot pass in both repo_path and repo_config to FeatureStore.
- feature_store = FeatureStore(repo_path=repo_path, config=repo_config)
- feature_store.teardown()
-
-
-def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str:
- """For debugging only: output contents of the metadata registry"""
- registry_config = repo_config.registry
- project = repo_config.project
- registry = Registry(
- project,
- registry_config=registry_config,
- repo_path=repo_path,
- auth_config=repo_config.auth_config,
- )
- registry_dict = registry.to_dict(project=project)
- return json.dumps(registry_dict, indent=2, sort_keys=True)
-
-
-def cli_check_repo(repo_path: Path, fs_yaml_file: Path):
- sys.path.append(str(repo_path))
- if not fs_yaml_file.exists():
- print(
- f"Can't find feature repo configuration file at {fs_yaml_file}. "
- "Make sure you're running feast from an initialized feast repository."
- )
- sys.exit(1)
-
-
-def init_repo(repo_name: str, template: str, repo_path: Optional[str] = None):
- import os
- from pathlib import Path
- from shutil import copytree
-
- from colorama import Fore, Style
-
- # Validate project name
- if not is_valid_name(repo_name):
- raise BadParameter(
- message="Name should be alphanumeric values, underscores, and hyphens but not start with an underscore or hyphen",
- param_hint="PROJECT_DIRECTORY",
- )
-
- # Determine where to create the repository
- if repo_path:
- # User specified a custom path
- target_path = Path(repo_path).resolve()
- target_path.mkdir(parents=True, exist_ok=True)
- display_path = repo_path
- else:
- # Default behavior: create subdirectory with project name
- target_path = Path(os.path.join(Path.cwd(), repo_name))
- target_path.mkdir(exist_ok=True)
- display_path = repo_name
-
- repo_config_path = target_path / "feature_store.yaml"
-
- if repo_config_path.exists():
- print(
- f"The directory {Style.BRIGHT + Fore.GREEN}{display_path}{Style.RESET_ALL} contains an existing feature "
- f"store repository that may cause a conflict"
- )
- print()
- sys.exit(1)
-
- # Copy template directory
- template_path = str(Path(Path(__file__).parent / "templates" / template).absolute())
- if not os.path.exists(template_path):
- raise IOError(f"Could not find template {template}")
- copytree(template_path, str(target_path), dirs_exist_ok=True)
-
- # Rename gitignore files back to .gitignore
- for gitignore_path in target_path.rglob("gitignore"):
- gitignore_path.rename(gitignore_path.with_name(".gitignore"))
-
- # Seed the repository
- bootstrap_path = target_path / "bootstrap.py"
- if os.path.exists(bootstrap_path):
- import importlib.util
-
- spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path))
- assert isinstance(spec, ModuleSpec)
- bootstrap = importlib.util.module_from_spec(spec)
- assert isinstance(spec.loader, Loader)
- spec.loader.exec_module(bootstrap)
- bootstrap.bootstrap() # type: ignore
- os.remove(bootstrap_path)
-
- # Template the feature_store.yaml file
- feature_store_yaml_path = target_path / "feature_repo" / "feature_store.yaml"
- replace_str_in_file(
- feature_store_yaml_path, "project: my_project", f"project: {repo_name}"
- )
-
- # Remove the __pycache__ folder if it exists
- import shutil
-
- shutil.rmtree(target_path / "__pycache__", ignore_errors=True)
-
- import click
-
- click.echo()
- click.echo(
- f"Creating a new Feast repository in {Style.BRIGHT + Fore.GREEN}{target_path}{Style.RESET_ALL}."
- )
- click.echo()
-
-
-def is_valid_name(name: str) -> bool:
- """A name should be alphanumeric values, underscores, and hyphens but not start with an underscore"""
- return (
- not name.startswith(("_", "-")) and re.compile(r"[^\w-]+").search(name) is None
- )
-
-
-def generate_project_name() -> str:
- """Generates a unique project name"""
- return f"{random.choice(adjectives)}_{random.choice(animals)}"
+import base64
+import importlib
+import json
+import logging
+import os
+import random
+import re
+import sys
+import tempfile
+from importlib.abc import Loader
+from importlib.machinery import ModuleSpec
+from pathlib import Path
+from typing import List, Optional, Set, Union
+
+import click
+from click.exceptions import BadParameter
+
+from feast import PushSource
+from feast.batch_feature_view import BatchFeatureView
+from feast.constants import FEATURE_STORE_YAML_ENV_NAME
+from feast.data_source import DataSource, KafkaSource, KinesisSource
+from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add
+from feast.entity import Entity
+from feast.feature_service import FeatureService
+from feast.feature_store import FeatureStore
+from feast.feature_view import DUMMY_ENTITY, FeatureView
+from feast.file_utils import replace_str_in_file
+from feast.infra.registry.base_registry import BaseRegistry
+from feast.infra.registry.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry
+from feast.names import adjectives, animals
+from feast.on_demand_feature_view import OnDemandFeatureView
+from feast.permissions.permission import Permission
+from feast.project import Project
+from feast.repo_config import RepoConfig
+from feast.repo_contents import RepoContents
+from feast.stream_feature_view import StreamFeatureView
+
+logger = logging.getLogger(__name__)
+
+
+def py_path_to_module(path: Path) -> str:
+ return (
+ str(path.relative_to(os.getcwd()))[: -len(".py")]
+ .replace("./", "")
+ .replace("/", ".")
+ .replace("\\", ".")
+ )
+
+
+def read_feastignore(repo_root: Path) -> List[str]:
+ """Read .feastignore in the repo root directory (if exists) and return the list of user-defined ignore paths"""
+ feast_ignore = repo_root / ".feastignore"
+ if not feast_ignore.is_file():
+ return []
+ lines = feast_ignore.read_text().strip().split("\n")
+ ignore_paths = []
+ for line in lines:
+ # Remove everything after the first occurance of "#" symbol (comments)
+ if line.find("#") >= 0:
+ line = line[: line.find("#")]
+ # Strip leading or ending whitespaces
+ line = line.strip()
+ # Add this processed line to ignore_paths if it's not empty
+ if len(line) > 0:
+ ignore_paths.append(line)
+ return ignore_paths
+
+
+def get_ignore_files(repo_root: Path, ignore_paths: List[str]) -> Set[Path]:
+ """Get all ignore files that match any of the user-defined ignore paths"""
+ ignore_files = set()
+ for ignore_path in set(ignore_paths):
+ # ignore_path may contains matchers (* or **). Use glob() to match user-defined path to actual paths
+ for matched_path in repo_root.glob(ignore_path):
+ if matched_path.is_file():
+ # If the matched path is a file, add that to ignore_files set
+ ignore_files.add(matched_path.resolve())
+ else:
+ # Otherwise, list all Python files in that directory and add all of them to ignore_files set
+ ignore_files |= {
+ sub_path.resolve()
+ for sub_path in matched_path.glob("**/*.py")
+ if sub_path.is_file()
+ }
+ return ignore_files
+
+
+def get_repo_files(repo_root: Path) -> List[Path]:
+ """Get the list of all repo files, ignoring undesired files & directories specified in .feastignore"""
+ # Read ignore paths from .feastignore and create a set of all files that match any of these paths
+ ignore_paths = read_feastignore(repo_root) + [
+ ".git",
+ ".feastignore",
+ ".venv",
+ "**/.ipynb_checkpoints",
+ "**/.pytest_cache",
+ "**/__pycache__",
+ ]
+ ignore_files = get_ignore_files(repo_root, ignore_paths)
+
+ # List all Python files in the root directory (recursively)
+ repo_files = {
+ p.resolve()
+ for p in repo_root.glob("**/*.py")
+ if p.is_file() and "__init__.py" != p.name
+ }
+ # Ignore all files that match any of the ignore paths in .feastignore
+ repo_files -= ignore_files
+
+ # Sort repo_files to read them in the same order every time
+ return sorted(repo_files)
+
+
+def parse_repo(repo_root: Path) -> RepoContents:
+ """
+ Collects unique Feast object definitions from the given feature repo.
+
+ Specifically, if an object foo has already been added, bar will still be added if
+ (bar == foo), but not if (bar is foo). This ensures that import statements will
+ not result in duplicates, but defining two equal objects will.
+ """
+ res = RepoContents(
+ projects=[],
+ data_sources=[],
+ entities=[],
+ feature_views=[],
+ feature_services=[],
+ on_demand_feature_views=[],
+ stream_feature_views=[],
+ permissions=[],
+ )
+
+ for repo_file in get_repo_files(repo_root):
+ module_path = py_path_to_module(repo_file)
+ module = importlib.import_module(module_path)
+
+ for attr_name in dir(module):
+ obj = getattr(module, attr_name)
+
+ if isinstance(obj, DataSource) and not any(
+ (obj is ds) for ds in res.data_sources
+ ):
+ res.data_sources.append(obj)
+
+ # Handle batch sources defined within stream sources.
+ if (
+ isinstance(obj, PushSource)
+ or isinstance(obj, KafkaSource)
+ or isinstance(obj, KinesisSource)
+ ):
+ batch_source = obj.batch_source
+
+ if batch_source and not any(
+ (batch_source is ds) for ds in res.data_sources
+ ):
+ res.data_sources.append(batch_source)
+ if (
+ isinstance(obj, FeatureView)
+ and not any((obj is fv) for fv in res.feature_views)
+ and not isinstance(obj, StreamFeatureView)
+ and not isinstance(obj, BatchFeatureView)
+ ):
+ res.feature_views.append(obj)
+
+ # Handle batch sources defined with feature views.
+ batch_source = obj.batch_source
+ if batch_source is not None and not any(
+ (batch_source is ds) for ds in res.data_sources
+ ):
+ res.data_sources.append(batch_source)
+
+ # Handle stream sources defined with feature views.
+ if obj.stream_source:
+ stream_source = obj.stream_source
+ if not any((stream_source is ds) for ds in res.data_sources):
+ res.data_sources.append(stream_source)
+ elif isinstance(obj, StreamFeatureView) and not any(
+ (obj is sfv) for sfv in res.stream_feature_views
+ ):
+ res.stream_feature_views.append(obj)
+
+ # Handle batch sources defined with feature views.
+ batch_source = obj.batch_source
+ if batch_source is not None and not any(
+ (batch_source is ds) for ds in res.data_sources
+ ):
+ res.data_sources.append(batch_source)
+ assert obj.stream_source
+ stream_source = obj.stream_source
+ if not any((stream_source is ds) for ds in res.data_sources):
+ res.data_sources.append(stream_source)
+ elif isinstance(obj, BatchFeatureView) and not any(
+ (obj is bfv) for bfv in res.feature_views
+ ):
+ res.feature_views.append(obj)
+
+ # Handle batch sources defined with feature views.
+ batch_source = obj.batch_source
+ if batch_source is not None and not any(
+ (batch_source is ds) for ds in res.data_sources
+ ):
+ res.data_sources.append(batch_source)
+ elif isinstance(obj, Entity) and not any(
+ (obj is entity) for entity in res.entities
+ ):
+ res.entities.append(obj)
+ elif isinstance(obj, FeatureService) and not any(
+ (obj is fs) for fs in res.feature_services
+ ):
+ res.feature_services.append(obj)
+ elif isinstance(obj, OnDemandFeatureView) and not any(
+ (obj is odfv) for odfv in res.on_demand_feature_views
+ ):
+ res.on_demand_feature_views.append(obj)
+ elif isinstance(obj, Permission) and not any(
+ (obj is p) for p in res.permissions
+ ):
+ res.permissions.append(obj)
+ elif isinstance(obj, Project) and not any((obj is p) for p in res.projects):
+ res.projects.append(obj)
+
+ res.entities.append(DUMMY_ENTITY)
+ return res
+
+
+def plan(
+ repo_config: RepoConfig,
+ repo_path: Path,
+ skip_source_validation: bool,
+ skip_feature_view_validation: bool = False,
+):
+ os.chdir(repo_path)
+ repo = _get_repo_contents(repo_path, repo_config.project, repo_config)
+ for project in repo.projects:
+ repo_config.project = project.name
+ store, registry = _get_store_and_registry(repo_config)
+ # TODO: When we support multiple projects in a single repo, we should filter repo contents by project
+ if not skip_source_validation:
+ provider = store._get_provider()
+ data_sources = [
+ t.batch_source for t in repo.feature_views if t.batch_source is not None
+ ]
+ # Make sure the data source used by this feature view is supported by Feast
+ for data_source in data_sources:
+ provider.validate_data_source(store.config, data_source)
+
+ registry_diff, infra_diff, _ = store.plan(
+ repo, skip_feature_view_validation=skip_feature_view_validation
+ )
+ click.echo(registry_diff.to_string())
+ click.echo(infra_diff.to_string())
+
+
+def _get_repo_contents(
+ repo_path,
+ project_name: Optional[str] = None,
+ repo_config: Optional[RepoConfig] = None,
+):
+ sys.dont_write_bytecode = True
+ repo = parse_repo(repo_path)
+
+ if len(repo.projects) < 1:
+ if project_name:
+ print(
+ f"No project found in the repository. Using project name {project_name} defined in feature_store.yaml"
+ )
+ project_description = (
+ repo_config.project_description if repo_config else None
+ )
+ repo.projects.append(
+ Project(name=project_name, description=project_description or "")
+ )
+ else:
+ print(
+ "No project found in the repository. Either define Project in repository or define a project in feature_store.yaml"
+ )
+ sys.exit(1)
+ elif len(repo.projects) == 1:
+ if repo.projects[0].name != project_name:
+ print(
+ "Project object name should match with the project name defined in feature_store.yaml"
+ )
+ sys.exit(1)
+ else:
+ print(
+ "Multiple projects found in the repository. Currently no support for multiple projects"
+ )
+ sys.exit(1)
+
+ return repo
+
+
+def _get_store_and_registry(repo_config):
+ store = FeatureStore(config=repo_config)
+ registry = store.registry
+ return store, registry
+
+
+def extract_objects_for_apply_delete(project, registry, repo):
+ # TODO(achals): This code path should be refactored to handle added & kept entities separately.
+ (
+ _,
+ objs_to_delete,
+ objs_to_update,
+ objs_to_add,
+ ) = extract_objects_for_keep_delete_update_add(registry, project, repo)
+
+ all_to_apply: List[
+ Union[
+ Entity,
+ FeatureView,
+ OnDemandFeatureView,
+ StreamFeatureView,
+ FeatureService,
+ ]
+ ] = []
+ for object_type in FEAST_OBJECT_TYPES:
+ to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type])
+ all_to_apply.extend(to_apply)
+
+ all_to_delete: List[
+ Union[
+ Entity,
+ FeatureView,
+ OnDemandFeatureView,
+ StreamFeatureView,
+ FeatureService,
+ ]
+ ] = []
+ for object_type in FEAST_OBJECT_TYPES:
+ all_to_delete.extend(objs_to_delete[object_type])
+
+ return (
+ all_to_apply,
+ all_to_delete,
+ set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union(
+ set(objs_to_update[FeastObjectType.FEATURE_VIEW])
+ ),
+ objs_to_delete[FeastObjectType.FEATURE_VIEW],
+ )
+
+
+def apply_total_with_repo_instance(
+ store: FeatureStore,
+ project_name: str,
+ registry: BaseRegistry,
+ repo: RepoContents,
+ skip_source_validation: bool,
+ skip_feature_view_validation: bool = False,
+ no_promote: bool = False,
+):
+ if not skip_source_validation:
+ provider = store._get_provider()
+ data_sources = [
+ t.batch_source for t in repo.feature_views if t.batch_source is not None
+ ]
+ # Make sure the data source used by this feature view is supported by Feast
+ for data_source in data_sources:
+ provider.validate_data_source(store.config, data_source)
+
+ # For each object in the registry, determine whether it should be kept or deleted.
+ (
+ all_to_apply,
+ all_to_delete,
+ views_to_keep,
+ views_to_delete,
+ ) = extract_objects_for_apply_delete(project_name, registry, repo)
+
+ try:
+ if store._should_use_plan():
+ # Planning phase - compute diffs first without progress bars
+ registry_diff, infra_diff, new_infra = store.plan(
+ repo,
+ skip_feature_view_validation=skip_feature_view_validation,
+ )
+ click.echo(registry_diff.to_string())
+
+ # Only show progress bars if there are actual infrastructure changes
+ progress_ctx = None
+ if len(infra_diff.infra_object_diffs) > 0:
+ from feast.diff.apply_progress import ApplyProgressContext
+
+ progress_ctx = ApplyProgressContext()
+ progress_ctx.start_overall_progress()
+
+ # Apply phase
+ store._apply_diffs(
+ registry_diff,
+ infra_diff,
+ new_infra,
+ progress_ctx=progress_ctx,
+ no_promote=no_promote,
+ )
+ click.echo(infra_diff.to_string())
+ else:
+ # Legacy apply path - no progress bars for legacy path
+ store.apply(
+ all_to_apply,
+ objects_to_delete=all_to_delete,
+ partial=False,
+ skip_feature_view_validation=skip_feature_view_validation,
+ no_promote=no_promote,
+ )
+ log_infra_changes(views_to_keep, views_to_delete)
+ finally:
+ # Cleanup is handled in the new _apply_diffs method
+ pass
+
+ _submit_baseline_jobs_if_needed(store, project_name, repo)
+
+
+def _submit_baseline_jobs_if_needed(store, project_name, repo):
+ """Submit async baseline DQM jobs for new features after feast apply."""
+ dqm = store.config.dqm_config
+ if dqm is not None and not dqm.auto_baseline:
+ return
+
+ try:
+ from feast.monitoring.monitoring_service import MonitoringService
+
+ svc = MonitoringService(store)
+ feature_views = list(repo.feature_views)
+ if not feature_views:
+ return
+
+ job_ids = svc.submit_baseline_for_new_features(
+ project=project_name, feature_views=feature_views
+ )
+ for job_id in job_ids:
+ click.echo(f" → Queued baseline metrics computation (DQM job: {job_id})")
+ except Exception:
+ logging.getLogger(__name__).debug(
+ "Monitoring baseline submission skipped (non-critical)", exc_info=True
+ )
+
+
+def log_infra_changes(
+ views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView]
+):
+ from colorama import Fore, Style
+
+ for view in views_to_keep:
+ click.echo(
+ f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}"
+ )
+ for view in views_to_delete:
+ click.echo(
+ f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}"
+ )
+
+
+def create_feature_store(
+ ctx: click.Context,
+) -> FeatureStore:
+ repo = ctx.obj["CHDIR"]
+ # If we received a base64 encoded version of feature_store.yaml, use that
+ config_base64 = os.getenv(FEATURE_STORE_YAML_ENV_NAME)
+ if config_base64:
+ print("Received base64 encoded feature_store.yaml")
+ config_bytes = base64.b64decode(config_base64)
+ # Create a new unique directory for writing feature_store.yaml
+ repo_path = Path(tempfile.mkdtemp())
+ with open(repo_path / "feature_store.yaml", "wb") as f:
+ f.write(config_bytes)
+ return FeatureStore(repo_path=str(repo_path.resolve()))
+ else:
+ fs_yaml_file = ctx.obj["FS_YAML_FILE"]
+ cli_check_repo(repo, fs_yaml_file)
+ return FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file)
+
+
+def apply_total(
+ repo_config: RepoConfig,
+ repo_path: Path,
+ skip_source_validation: bool,
+ skip_feature_view_validation: bool = False,
+ no_promote: bool = False,
+):
+ os.chdir(repo_path)
+ repo = _get_repo_contents(repo_path, repo_config.project, repo_config)
+ for project in repo.projects:
+ repo_config.project = project.name
+ store, registry = _get_store_and_registry(repo_config)
+ if not is_valid_name(project.name):
+ print(
+ f"{project.name} is not valid. Project name should only have "
+ f"alphanumerical values, underscores, and hyphens but not start with an underscore or hyphen."
+ )
+ sys.exit(1)
+ # TODO: When we support multiple projects in a single repo, we should filter repo contents by project. Currently there is no way to associate Feast objects to project.
+ print(f"Applying changes for project {project.name}")
+ apply_total_with_repo_instance(
+ store,
+ project.name,
+ registry,
+ repo,
+ skip_source_validation,
+ skip_feature_view_validation,
+ no_promote=no_promote,
+ )
+
+
+def teardown(repo_config: RepoConfig, repo_path: Optional[str]):
+ # Cannot pass in both repo_path and repo_config to FeatureStore.
+ feature_store = FeatureStore(repo_path=repo_path, config=repo_config)
+ feature_store.teardown()
+
+
+def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str:
+ """For debugging only: output contents of the metadata registry"""
+ registry_config = repo_config.registry
+ project = repo_config.project
+ registry = Registry(
+ project,
+ registry_config=registry_config,
+ repo_path=repo_path,
+ auth_config=repo_config.auth_config,
+ )
+ registry_dict = registry.to_dict(project=project)
+ return json.dumps(registry_dict, indent=2, sort_keys=True)
+
+
+def cli_check_repo(repo_path: Path, fs_yaml_file: Path):
+ sys.path.append(str(repo_path))
+ if not fs_yaml_file.exists():
+ print(
+ f"Can't find feature repo configuration file at {fs_yaml_file}. "
+ "Make sure you're running feast from an initialized feast repository."
+ )
+ sys.exit(1)
+
+
+def init_repo(repo_name: str, template: str, repo_path: Optional[str] = None):
+ import os
+ from pathlib import Path
+ from shutil import copytree
+
+ from colorama import Fore, Style
+
+ # Validate project name
+ if not is_valid_name(repo_name):
+ raise BadParameter(
+ message="Name should be alphanumeric values, underscores, and hyphens but not start with an underscore or hyphen",
+ param_hint="PROJECT_DIRECTORY",
+ )
+
+ # Determine where to create the repository
+ if repo_path:
+ # User specified a custom path
+ target_path = Path(repo_path).resolve()
+ target_path.mkdir(parents=True, exist_ok=True)
+ display_path = repo_path
+ else:
+ # Default behavior: create subdirectory with project name
+ target_path = Path(os.path.join(Path.cwd(), repo_name))
+ target_path.mkdir(exist_ok=True)
+ display_path = repo_name
+
+ repo_config_path = target_path / "feature_store.yaml"
+
+ if repo_config_path.exists():
+ print(
+ f"The directory {Style.BRIGHT + Fore.GREEN}{display_path}{Style.RESET_ALL} contains an existing feature "
+ f"store repository that may cause a conflict"
+ )
+ print()
+ sys.exit(1)
+
+ # Copy template directory
+ template_path = str(Path(Path(__file__).parent / "templates" / template).absolute())
+ if not os.path.exists(template_path):
+ raise IOError(f"Could not find template {template}")
+ copytree(template_path, str(target_path), dirs_exist_ok=True)
+
+ # Rename gitignore files back to .gitignore
+ for gitignore_path in target_path.rglob("gitignore"):
+ gitignore_path.rename(gitignore_path.with_name(".gitignore"))
+
+ # Seed the repository
+ bootstrap_path = target_path / "bootstrap.py"
+ if os.path.exists(bootstrap_path):
+ import importlib.util
+
+ spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path))
+ assert isinstance(spec, ModuleSpec)
+ bootstrap = importlib.util.module_from_spec(spec)
+ assert isinstance(spec.loader, Loader)
+ spec.loader.exec_module(bootstrap)
+ bootstrap.bootstrap() # type: ignore
+ os.remove(bootstrap_path)
+
+ # Template the feature_store.yaml file
+ feature_store_yaml_path = target_path / "feature_repo" / "feature_store.yaml"
+ replace_str_in_file(
+ feature_store_yaml_path, "project: my_project", f"project: {repo_name}"
+ )
+
+ # Remove the __pycache__ folder if it exists
+ import shutil
+
+ shutil.rmtree(target_path / "__pycache__", ignore_errors=True)
+
+ import click
+
+ click.echo()
+ click.echo(
+ f"Creating a new Feast repository in {Style.BRIGHT + Fore.GREEN}{target_path}{Style.RESET_ALL}."
+ )
+ click.echo()
+
+
+def is_valid_name(name: str) -> bool:
+ """A name should be alphanumeric values, underscores, and hyphens but not start with an underscore"""
+ return (
+ not name.startswith(("_", "-")) and re.compile(r"[^\w-]+").search(name) is None
+ )
+
+
+def generate_project_name() -> str:
+ """Generates a unique project name"""
+ return f"{random.choice(adjectives)}_{random.choice(animals)}"
diff --git a/sdk/python/tests/integration/monitoring/__init__.py b/sdk/python/tests/integration/monitoring/__init__.py
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/sdk/python/tests/integration/monitoring/__init__.py
@@ -0,0 +1 @@
+
diff --git a/sdk/python/tests/integration/monitoring/test_monitoring_integration.py b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py
new file mode 100644
index 00000000000..213244e1010
--- /dev/null
+++ b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py
@@ -0,0 +1,1039 @@
+"""Integration tests for the monitoring feature.
+
+Tests cover:
+- Auto-compute (all granularities from source timestamps)
+- Compute baseline (idempotent)
+- Transient compute
+- DQM job lifecycle
+- CLI commands
+- REST API endpoints
+- RBAC enforcement
+- Compute engine dispatch (SQL push-down vs Python fallback)
+- Log source monitoring (feature serving logs)
+"""
+
+from datetime import date, datetime, timezone
+from unittest.mock import MagicMock, patch
+
+import pyarrow as pa
+import pytest
+from click.testing import CliRunner
+
+from feast.monitoring.monitoring_service import VALID_GRANULARITIES, MonitoringService
+from feast.types import PrimitiveFeastType
+
+# ------------------------------------------------------------------ #
+# Shared helpers
+# ------------------------------------------------------------------ #
+
+
+def _make_feature_field(name, dtype):
+ field = MagicMock()
+ field.name = name
+ field.dtype = dtype
+ return field
+
+
+def _make_feature_view(name, features, entities=None, batch_source=None):
+ fv = MagicMock()
+ fv.name = name
+ fv.features = features
+ fv.entities = entities or []
+ if batch_source is None:
+ batch_source = MagicMock()
+ batch_source.timestamp_field = "event_timestamp"
+ batch_source.created_timestamp_column = ""
+ fv.batch_source = batch_source
+ return fv
+
+
+def _make_feature_service(name, fv_names, logging_config=None, feature_map=None):
+ """Create a mock FeatureService.
+
+ Args:
+ feature_map: optional dict mapping view_name -> list of feature names.
+ Used to build realistic projections with features and name_to_use().
+ """
+ fs = MagicMock()
+ fs.name = name
+ fs.feature_view_projections = [MagicMock(name=n) for n in fv_names]
+ for proj, n in zip(fs.feature_view_projections, fv_names):
+ proj.name = n
+ proj.name_to_use.return_value = n
+ if feature_map and n in feature_map:
+ feats = []
+ for fname in feature_map[n]:
+ f = MagicMock()
+ f.name = fname
+ feats.append(f)
+ proj.features = feats
+ else:
+ proj.features = []
+ fs.logging_config = logging_config
+ return fs
+
+
+def _make_logging_config_with_source(log_table_schema):
+ """Create a mock LoggingConfig whose destination.to_data_source() returns a DataSource."""
+ logging_config = MagicMock()
+ mock_data_source = MagicMock()
+ mock_data_source.timestamp_field = "__log_timestamp"
+ mock_data_source.created_timestamp_column = ""
+ logging_config.destination.to_data_source.return_value = mock_data_source
+ return logging_config, mock_data_source
+
+
+def _make_mock_store(feature_views, feature_services=None):
+ """Create a mock FeatureStore with offline store that uses Python fallback."""
+ store = MagicMock()
+ store.project = "test_project"
+ store.config.project = "test_project"
+ store.config.offline_store = MagicMock()
+
+ store.registry.list_feature_views.return_value = feature_views
+ store.registry.list_entities.return_value = []
+ store.registry.list_feature_services.return_value = feature_services or []
+
+ if feature_views:
+ store.registry.get_feature_view.return_value = feature_views[0]
+
+ if feature_services:
+ store.registry.get_feature_service.return_value = feature_services[0]
+
+ arrow_table = pa.table(
+ {
+ "conv_rate": [0.1, 0.5, 0.9, 0.3, 0.7],
+ "acc_rate": [0.8, 0.6, 0.4, 0.9, 0.2],
+ "city": ["NYC", "LA", "NYC", "SF", "LA"],
+ "event_timestamp": [
+ datetime(2025, 3, 25, tzinfo=timezone.utc),
+ datetime(2025, 3, 26, tzinfo=timezone.utc),
+ datetime(2025, 3, 26, tzinfo=timezone.utc),
+ datetime(2025, 3, 27, tzinfo=timezone.utc),
+ datetime(2025, 3, 27, tzinfo=timezone.utc),
+ ],
+ }
+ )
+
+ mock_retrieval = MagicMock()
+ mock_retrieval.to_arrow.return_value = arrow_table
+
+ provider = MagicMock()
+ provider.offline_store.pull_all_from_table_or_query.return_value = mock_retrieval
+ provider.offline_store.compute_monitoring_metrics.side_effect = NotImplementedError
+ provider.offline_store.get_monitoring_max_timestamp.side_effect = (
+ NotImplementedError
+ )
+
+ job_store = {}
+
+ def _mock_save(config, metric_type, metrics):
+ if metric_type == "job":
+ for m in metrics:
+ job_store[m["job_id"]] = dict(m)
+
+ def _mock_query(config, project, metric_type, filters=None, **kwargs):
+ if metric_type == "job":
+ results = list(job_store.values())
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ results = [r for r in results if r.get(key) == value]
+ return results
+ return []
+
+ provider.offline_store.ensure_monitoring_tables.return_value = None
+ provider.offline_store.save_monitoring_metrics.side_effect = _mock_save
+ provider.offline_store.query_monitoring_metrics.side_effect = _mock_query
+ provider.offline_store.clear_monitoring_baseline.return_value = None
+
+ store._get_provider.return_value = provider
+
+ return store
+
+
+# ------------------------------------------------------------------ #
+# Test: Auto-compute
+# ------------------------------------------------------------------ #
+
+
+class TestAutoCompute:
+ def test_auto_compute_all_granularities(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ result = svc.auto_compute(project="test_project")
+
+ assert result["status"] == "completed"
+ assert result["computed_feature_views"] == 1
+ assert len(result["granularities"]) == len(VALID_GRANULARITIES)
+ for g in VALID_GRANULARITIES:
+ assert g in result["granularities"]
+
+ provider = store._get_provider.return_value
+ provider.offline_store.save_monitoring_metrics.assert_called()
+
+ def test_auto_compute_specific_view(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ result = svc.auto_compute(
+ project="test_project",
+ feature_view_name="driver_stats",
+ )
+
+ assert result["status"] == "completed"
+ assert result["computed_feature_views"] == 1
+
+
+# ------------------------------------------------------------------ #
+# Test: Compute baseline
+# ------------------------------------------------------------------ #
+
+
+class TestComputeBaseline:
+ def test_compute_baseline_for_new_features(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [
+ _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64),
+ _make_feature_field("city", PrimitiveFeastType.STRING),
+ ],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ result = svc.compute_baseline(project="test_project")
+
+ assert result["status"] == "completed"
+ assert result["is_baseline"] is True
+ assert result["computed_features"] == 2
+
+ provider = store._get_provider.return_value
+ provider.offline_store.clear_monitoring_baseline.assert_called()
+ provider.offline_store.save_monitoring_metrics.assert_called()
+
+ def test_baseline_idempotent_skips_existing(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [
+ _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64),
+ _make_feature_field("acc_rate", PrimitiveFeastType.FLOAT64),
+ ],
+ )
+ store = _make_mock_store([fv])
+
+ # Simulate conv_rate already has baseline via query_monitoring_metrics
+ provider = store._get_provider.return_value
+ existing_baseline = {
+ "project_id": "test_project",
+ "feature_view_name": "driver_stats",
+ "feature_name": "conv_rate",
+ "metric_date": "2025-01-01",
+ "granularity": "daily",
+ "data_source_type": "batch",
+ "computed_at": datetime.now(timezone.utc).isoformat(),
+ "is_baseline": True,
+ "feature_type": "numeric",
+ "row_count": 100,
+ "null_count": 0,
+ "null_rate": 0.0,
+ "mean": 5.0,
+ "stddev": 1.0,
+ "min_val": 0.0,
+ "max_val": 10.0,
+ "p50": 5.0,
+ "p75": 7.5,
+ "p90": 9.0,
+ "p95": 9.5,
+ "p99": 9.9,
+ "histogram": None,
+ }
+
+ original_side_effect = (
+ provider.offline_store.query_monitoring_metrics.side_effect
+ )
+
+ def _query_with_baseline(config, project, metric_type, filters=None, **kwargs):
+ if metric_type == "feature" and filters and filters.get("is_baseline"):
+ return [existing_baseline]
+ if original_side_effect:
+ return original_side_effect(
+ config, project, metric_type, filters=filters, **kwargs
+ )
+ return []
+
+ provider.offline_store.query_monitoring_metrics.side_effect = (
+ _query_with_baseline
+ )
+
+ svc = MonitoringService(store)
+ result = svc.compute_baseline(project="test_project")
+
+ # Only acc_rate should be computed (conv_rate already has baseline)
+ assert result["computed_features"] == 1
+
+
+# ------------------------------------------------------------------ #
+# Test: Transient compute
+# ------------------------------------------------------------------ #
+
+
+class TestTransientCompute:
+ def test_transient_returns_metrics_without_saving(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [
+ _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64),
+ _make_feature_field("city", PrimitiveFeastType.STRING),
+ ],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ result = svc.compute_transient(
+ project="test_project",
+ feature_view_name="driver_stats",
+ start_date=date(2025, 1, 1),
+ end_date=date(2025, 1, 15),
+ )
+
+ assert result["status"] == "completed"
+ assert result["start_date"] == "2025-01-01"
+ assert result["end_date"] == "2025-01-15"
+ assert len(result["metrics"]) == 2
+
+ # Transient should NOT call save
+ provider = store._get_provider.return_value
+ provider.offline_store.save_monitoring_metrics.assert_not_called()
+
+ def test_transient_empty_features(self):
+ fv = _make_feature_view(
+ "fv",
+ [_make_feature_field("ts", PrimitiveFeastType.UNIX_TIMESTAMP)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ result = svc.compute_transient(
+ project="test_project",
+ feature_view_name="fv",
+ )
+ assert result["metrics"] == []
+
+
+# ------------------------------------------------------------------ #
+# Test: DQM Job Manager
+# ------------------------------------------------------------------ #
+
+
+class TestDQMJobManager:
+ def _make_manager(self):
+ from feast.monitoring.dqm_job_manager import DQMJobManager
+
+ stored = {}
+
+ def mock_save(config, metric_type, metrics):
+ for m in metrics:
+ stored[m["job_id"]] = dict(m)
+
+ def mock_query(config, project, metric_type, filters=None, **kwargs):
+ results = list(stored.values())
+ if filters:
+ for key, value in filters.items():
+ if value is not None:
+ results = [r for r in results if r.get(key) == value]
+ return results
+
+ offline_store = MagicMock()
+ offline_store.save_monitoring_metrics.side_effect = mock_save
+ offline_store.query_monitoring_metrics.side_effect = mock_query
+ return DQMJobManager(offline_store, MagicMock())
+
+ def test_submit_and_get_job(self):
+ mgr = self._make_manager()
+ job_id = mgr.submit(
+ project="test_project",
+ job_type="auto_compute",
+ feature_view_name="driver_stats",
+ )
+
+ assert job_id is not None
+ assert len(job_id) == 36 # UUID format
+
+ job = mgr.get_job(job_id)
+ assert job is not None
+ assert job["project_id"] == "test_project"
+ assert job["job_type"] == "auto_compute"
+ assert job["status"] == "pending"
+
+ def test_update_status(self):
+ from feast.monitoring.dqm_job_manager import JOB_STATUS_RUNNING
+
+ mgr = self._make_manager()
+ job_id = mgr.submit(
+ project="test_project",
+ job_type="compute",
+ )
+ mgr.update_status(job_id, JOB_STATUS_RUNNING)
+
+ job = mgr.get_job(job_id)
+ assert job["status"] == JOB_STATUS_RUNNING
+ assert job["started_at"] is not None
+
+
+# ------------------------------------------------------------------ #
+# Test: CLI
+# ------------------------------------------------------------------ #
+
+
+class TestComputeMetricsCLI:
+ def test_help(self):
+ from feast.cli.monitor import monitor_cmd
+
+ runner = CliRunner()
+ result = runner.invoke(monitor_cmd, ["run", "--help"])
+ assert result.exit_code == 0
+ assert "--granularity" in result.output
+ assert "--set-baseline" in result.output
+ assert "--feature-view" in result.output
+
+ @patch("feast.cli.monitor.create_feature_store")
+ @patch("feast.monitoring.monitoring_service.MonitoringService.auto_compute")
+ def test_run_auto_mode(self, mock_auto, mock_create_store):
+ from feast.cli.monitor import monitor_cmd
+
+ mock_store = MagicMock()
+ mock_store.project = "proj"
+ mock_create_store.return_value = mock_store
+
+ mock_auto.return_value = {
+ "status": "completed",
+ "computed_feature_views": 2,
+ "computed_features": 5,
+ "granularities": list(VALID_GRANULARITIES),
+ "duration_ms": 1200,
+ }
+
+ runner = CliRunner()
+ result = runner.invoke(monitor_cmd, ["run"])
+
+ assert result.exit_code == 0
+ assert "Auto-computing" in result.output
+ assert "Features computed: 5" in result.output
+ mock_auto.assert_called_once()
+
+ @patch("feast.cli.monitor.create_feature_store")
+ @patch("feast.monitoring.monitoring_service.MonitoringService.compute_metrics")
+ def test_run_explicit_granularity(self, mock_compute, mock_create_store):
+ from feast.cli.monitor import monitor_cmd
+
+ mock_store = MagicMock()
+ mock_store.project = "proj"
+ mock_create_store.return_value = mock_store
+
+ mock_compute.return_value = {
+ "status": "completed",
+ "granularity": "weekly",
+ "computed_features": 3,
+ "computed_feature_views": 1,
+ "computed_feature_services": 1,
+ "metric_dates": ["2025-01-01"],
+ "duration_ms": 500,
+ }
+
+ runner = CliRunner()
+ result = runner.invoke(
+ monitor_cmd,
+ [
+ "run",
+ "--granularity",
+ "weekly",
+ "--start-date",
+ "2025-01-01",
+ "--end-date",
+ "2025-01-07",
+ ],
+ )
+
+ assert result.exit_code == 0
+ assert "Granularity: weekly" in result.output
+
+
+# ------------------------------------------------------------------ #
+# Test: REST API
+# ------------------------------------------------------------------ #
+
+
+class TestRESTEndpoints:
+ @pytest.fixture
+ def app(self):
+ from fastapi import FastAPI
+ from fastapi.testclient import TestClient
+
+ from feast.api.registry.rest.monitoring import get_monitoring_router
+
+ mock_handler = MagicMock()
+ mock_server = MagicMock()
+
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ mock_server.store = _make_mock_store([fv])
+
+ app = FastAPI()
+ app.include_router(get_monitoring_router(mock_handler, mock_server))
+
+ return TestClient(app), mock_server
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_auto_compute_endpoint(self, mock_perms, app):
+ client, _ = app
+
+ response = client.post(
+ "/monitoring/auto_compute",
+ json={"project": "test_project"},
+ )
+
+ assert response.status_code == 200
+ data = response.json()
+ assert data["status"] == "completed"
+ assert "job_id" in data
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_transient_compute_endpoint(self, mock_perms, app):
+ client, _ = app
+
+ response = client.post(
+ "/monitoring/compute/transient",
+ json={
+ "project": "test_project",
+ "feature_view_name": "driver_stats",
+ "start_date": "2025-01-05",
+ "end_date": "2025-01-20",
+ },
+ )
+
+ assert response.status_code == 200
+ data = response.json()
+ assert data["status"] == "completed"
+ assert len(data["metrics"]) >= 1
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_get_metrics_with_granularity(self, mock_perms, app):
+ client, _ = app
+
+ response = client.get(
+ "/monitoring/metrics/features",
+ params={"project": "test_project", "granularity": "weekly"},
+ )
+
+ assert response.status_code == 200
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_get_timeseries(self, mock_perms, app):
+ client, _ = app
+
+ response = client.get(
+ "/monitoring/metrics/timeseries",
+ params={
+ "project": "test_project",
+ "feature_view_name": "driver_stats",
+ "granularity": "daily",
+ },
+ )
+
+ assert response.status_code == 200
+
+
+# ------------------------------------------------------------------ #
+# Test: RBAC enforcement
+# ------------------------------------------------------------------ #
+
+
+class TestRBACEnforcement:
+ @pytest.fixture
+ def app(self):
+ from fastapi import FastAPI
+ from fastapi.testclient import TestClient
+
+ from feast.api.registry.rest.monitoring import get_monitoring_router
+
+ mock_handler = MagicMock()
+ mock_server = MagicMock()
+
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ mock_server.store = _make_mock_store([fv])
+
+ app = FastAPI()
+ app.include_router(get_monitoring_router(mock_handler, mock_server))
+
+ return TestClient(app), mock_server
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_compute_requires_update(self, mock_perms, app):
+ client, _ = app
+
+ from feast.permissions.action import AuthzedAction
+
+ client.post(
+ "/monitoring/compute",
+ json={
+ "project": "test_project",
+ "feature_view_name": "driver_stats",
+ },
+ )
+
+ mock_perms.assert_called()
+ call_args = mock_perms.call_args
+ assert AuthzedAction.UPDATE in call_args.kwargs.get(
+ "actions", call_args[1].get("actions", [])
+ )
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_transient_requires_describe(self, mock_perms, app):
+ client, _ = app
+
+ from feast.permissions.action import AuthzedAction
+
+ client.post(
+ "/monitoring/compute/transient",
+ json={
+ "project": "test_project",
+ "feature_view_name": "driver_stats",
+ },
+ )
+
+ mock_perms.assert_called()
+ call_args = mock_perms.call_args
+ assert AuthzedAction.DESCRIBE in call_args.kwargs.get(
+ "actions", call_args[1].get("actions", [])
+ )
+
+ @patch("feast.api.registry.rest.monitoring.assert_permissions")
+ def test_read_requires_describe(self, mock_perms, app):
+ client, _ = app
+
+ from feast.permissions.action import AuthzedAction
+
+ client.get(
+ "/monitoring/metrics/features",
+ params={"project": "test_project", "feature_view_name": "driver_stats"},
+ )
+
+ mock_perms.assert_called()
+ call_args = mock_perms.call_args
+ assert AuthzedAction.DESCRIBE in call_args.kwargs.get(
+ "actions", call_args[1].get("actions", [])
+ )
+
+
+# ------------------------------------------------------------------ #
+# Test: SQL push-down dispatch
+# ------------------------------------------------------------------ #
+
+
+class TestComputeEngineDispatch:
+ """Verify that MonitoringService prefers SQL push-down and falls back
+ to Python-based computation when the offline store doesn't support it."""
+
+ def _make_store_with_pushdown(self, pushdown_result):
+ """Create a mock store where the offline store supports push-down."""
+ fv = _make_feature_view(
+ "driver_stats",
+ [
+ _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64),
+ _make_feature_field("city", PrimitiveFeastType.STRING),
+ ],
+ )
+ store = _make_mock_store([fv])
+ provider = store._get_provider.return_value
+ provider.offline_store.compute_monitoring_metrics.side_effect = None
+ provider.offline_store.compute_monitoring_metrics.return_value = pushdown_result
+ provider.offline_store.get_monitoring_max_timestamp.side_effect = None
+ provider.offline_store.get_monitoring_max_timestamp.return_value = datetime(
+ 2025, 3, 27, tzinfo=timezone.utc
+ )
+ return store, fv
+
+ def test_uses_sql_pushdown_when_available(self):
+ """When the offline store supports compute_monitoring_metrics,
+ pull_all_from_table_or_query should NOT be called."""
+ sql_result = [
+ {
+ "feature_name": "conv_rate",
+ "feature_type": "numeric",
+ "row_count": 100,
+ "null_count": 2,
+ "null_rate": 0.02,
+ "mean": 0.5,
+ "stddev": 0.2,
+ "min_val": 0.0,
+ "max_val": 1.0,
+ "p50": 0.5,
+ "p75": 0.75,
+ "p90": 0.9,
+ "p95": 0.95,
+ "p99": 0.99,
+ "histogram": {
+ "bins": [0.0, 0.5, 1.0],
+ "counts": [50, 50],
+ "bin_width": 0.5,
+ },
+ },
+ ]
+ store, _ = self._make_store_with_pushdown(sql_result)
+ svc = MonitoringService(store)
+
+ result = svc.compute_transient(
+ project="test_project",
+ feature_view_name="driver_stats",
+ feature_names=["conv_rate"],
+ start_date=date(2025, 1, 1),
+ end_date=date(2025, 1, 15),
+ )
+
+ assert result["status"] == "completed"
+ assert len(result["metrics"]) == 1
+ assert result["metrics"][0]["mean"] == 0.5
+
+ provider = store._get_provider.return_value
+ provider.offline_store.compute_monitoring_metrics.assert_called_once()
+ provider.offline_store.pull_all_from_table_or_query.assert_not_called()
+
+ def test_falls_back_to_python_when_not_supported(self):
+ """When compute_monitoring_metrics raises NotImplementedError,
+ the service falls back to pulling data + Python compute."""
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+
+ svc = MonitoringService(store)
+ result = svc.compute_transient(
+ project="test_project",
+ feature_view_name="driver_stats",
+ start_date=date(2025, 1, 1),
+ end_date=date(2025, 1, 15),
+ )
+
+ assert result["status"] == "completed"
+ assert len(result["metrics"]) == 1
+ assert result["metrics"][0]["feature_name"] == "conv_rate"
+
+ provider = store._get_provider.return_value
+ provider.offline_store.pull_all_from_table_or_query.assert_called()
+
+ def test_auto_compute_uses_pushdown_for_max_timestamp(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ provider = store._get_provider.return_value
+
+ provider.offline_store.get_monitoring_max_timestamp.side_effect = None
+ provider.offline_store.get_monitoring_max_timestamp.return_value = datetime(
+ 2025, 3, 27, tzinfo=timezone.utc
+ )
+ provider.offline_store.compute_monitoring_metrics.side_effect = None
+ provider.offline_store.compute_monitoring_metrics.return_value = [
+ {
+ "feature_name": "conv_rate",
+ "feature_type": "numeric",
+ "row_count": 5,
+ "null_count": 0,
+ "null_rate": 0.0,
+ "mean": 0.5,
+ "stddev": 0.2,
+ "min_val": 0.1,
+ "max_val": 0.9,
+ "p50": 0.5,
+ "p75": 0.7,
+ "p90": 0.9,
+ "p95": 0.9,
+ "p99": 0.9,
+ "histogram": None,
+ },
+ ]
+
+ svc = MonitoringService(store)
+ result = svc.auto_compute(project="test_project")
+
+ assert result["status"] == "completed"
+ provider.offline_store.get_monitoring_max_timestamp.assert_called()
+ provider.offline_store.compute_monitoring_metrics.assert_called()
+ provider.offline_store.pull_all_from_table_or_query.assert_not_called()
+
+
+# ------------------------------------------------------------------ #
+# Test: Native storage dispatch
+# ------------------------------------------------------------------ #
+
+
+class TestNativeStorageDispatch:
+ """Verify that MonitoringService uses OfflineStore for all storage
+ operations (save, query, clear_baseline, ensure_tables)."""
+
+ def test_save_goes_through_offline_store(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ svc.compute_metrics(
+ project="test_project",
+ granularity="daily",
+ )
+
+ provider = store._get_provider.return_value
+ provider.offline_store.ensure_monitoring_tables.assert_called()
+ provider.offline_store.save_monitoring_metrics.assert_called()
+
+ save_calls = provider.offline_store.save_monitoring_metrics.call_args_list
+ metric_types_saved = {c[0][1] for c in save_calls}
+ assert "feature" in metric_types_saved
+ assert "feature_view" in metric_types_saved
+
+ def test_query_goes_through_offline_store(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ svc.get_feature_metrics(project="test_project", granularity="daily")
+
+ provider = store._get_provider.return_value
+ provider.offline_store.query_monitoring_metrics.assert_called()
+ call_args = provider.offline_store.query_monitoring_metrics.call_args
+ assert call_args[1]["metric_type"] == "feature"
+
+ def test_baseline_clear_goes_through_offline_store(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ svc.compute_baseline(project="test_project")
+
+ provider = store._get_provider.return_value
+ provider.offline_store.clear_monitoring_baseline.assert_called()
+
+ def test_transient_does_not_save(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ store = _make_mock_store([fv])
+ svc = MonitoringService(store)
+
+ svc.compute_transient(
+ project="test_project",
+ feature_view_name="driver_stats",
+ )
+
+ provider = store._get_provider.return_value
+ provider.offline_store.save_monitoring_metrics.assert_not_called()
+
+
+# ------------------------------------------------------------------ #
+# Test: Log source monitoring
+# ------------------------------------------------------------------ #
+
+
+class TestLogSourceMonitoring:
+ """Verify that monitoring can compute metrics from feature serving logs."""
+
+ # Realistic log column names follow the {view}__{feature} convention
+ # produced by FeatureServiceLoggingSource.get_schema().
+ _LOG_SCHEMA = pa.schema(
+ [
+ ("driver_id", pa.int64()),
+ ("driver_stats__conv_rate", pa.float64()),
+ ("driver_stats__conv_rate__timestamp", pa.timestamp("us", tz="UTC")),
+ ("driver_stats__conv_rate__status", pa.int32()),
+ ("driver_stats__city", pa.utf8()),
+ ("driver_stats__city__timestamp", pa.timestamp("us", tz="UTC")),
+ ("driver_stats__city__status", pa.int32()),
+ ("__log_timestamp", pa.timestamp("us", tz="UTC")),
+ ("__log_date", pa.date32()),
+ ("__request_id", pa.utf8()),
+ ]
+ )
+
+ def _make_log_store(self):
+ """Create a mock store with a feature service that has logging configured."""
+ fv = _make_feature_view(
+ "driver_stats",
+ [
+ _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64),
+ _make_feature_field("city", PrimitiveFeastType.STRING),
+ ],
+ )
+
+ logging_config, log_data_source = _make_logging_config_with_source(
+ self._LOG_SCHEMA
+ )
+
+ fs = _make_feature_service(
+ "driver_service",
+ ["driver_stats"],
+ logging_config=logging_config,
+ feature_map={"driver_stats": ["conv_rate", "city"]},
+ )
+ store = _make_mock_store([fv], feature_services=[fs])
+
+ log_arrow_table = pa.table(
+ {
+ "driver_stats__conv_rate": [0.1, 0.5, 0.9, 0.3, 0.7],
+ "driver_stats__city": ["NYC", "LA", "NYC", "SF", "LA"],
+ "__log_timestamp": [
+ datetime(2025, 3, 25, tzinfo=timezone.utc),
+ datetime(2025, 3, 26, tzinfo=timezone.utc),
+ datetime(2025, 3, 26, tzinfo=timezone.utc),
+ datetime(2025, 3, 27, tzinfo=timezone.utc),
+ datetime(2025, 3, 27, tzinfo=timezone.utc),
+ ],
+ }
+ )
+
+ mock_log_retrieval = MagicMock()
+ mock_log_retrieval.to_arrow.return_value = log_arrow_table
+
+ provider = store._get_provider.return_value
+ provider.offline_store.pull_all_from_table_or_query.return_value = (
+ mock_log_retrieval
+ )
+
+ entity_col = MagicMock()
+ entity_col.name = "driver_id"
+ fv.entity_columns = [entity_col]
+
+ return store, fs
+
+ def test_compute_log_metrics(self):
+ store, fs = self._make_log_store()
+ svc = MonitoringService(store)
+
+ with patch(
+ "feast.monitoring.monitoring_service.FeatureServiceLoggingSource"
+ ) as mock_cls:
+ mock_instance = MagicMock()
+ mock_instance.get_schema.return_value = self._LOG_SCHEMA
+ mock_cls.return_value = mock_instance
+
+ result = svc.compute_log_metrics(
+ project="test_project",
+ feature_service_name="driver_service",
+ start_date=date(2025, 3, 25),
+ end_date=date(2025, 3, 27),
+ granularity="daily",
+ )
+
+ assert result["status"] == "completed"
+ assert result["data_source_type"] == "log"
+ assert result["computed_features"] == 2
+
+ provider = store._get_provider.return_value
+ provider.offline_store.save_monitoring_metrics.assert_called()
+
+ save_calls = provider.offline_store.save_monitoring_metrics.call_args_list
+ feature_calls = [c for c in save_calls if c[0][1] == "feature"]
+ assert len(feature_calls) >= 1
+ saved_metrics = feature_calls[0][0][2]
+ assert all(m["data_source_type"] == "log" for m in saved_metrics)
+ # Feature names normalized: driver_stats__conv_rate -> conv_rate
+ saved_names = {m["feature_name"] for m in saved_metrics}
+ assert saved_names == {"conv_rate", "city"}
+ # Feature view name is the actual view, not the service
+ assert all(m["feature_view_name"] == "driver_stats" for m in saved_metrics)
+
+ # Feature service aggregate saved to the service table
+ svc_calls = [c for c in save_calls if c[0][1] == "feature_service"]
+ assert len(svc_calls) >= 1
+ svc_metric = svc_calls[0][0][2][0]
+ assert svc_metric["feature_service_name"] == "driver_service"
+ assert svc_metric["data_source_type"] == "log"
+ assert svc_metric["total_features"] == 2
+
+ def test_compute_log_metrics_no_logging_config(self):
+ fv = _make_feature_view(
+ "driver_stats",
+ [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)],
+ )
+ fs = _make_feature_service("no_log_service", ["driver_stats"])
+ fs.logging_config = None
+ store = _make_mock_store([fv], feature_services=[fs])
+ svc = MonitoringService(store)
+
+ result = svc.compute_log_metrics(
+ project="test_project",
+ feature_service_name="no_log_service",
+ )
+
+ assert result["status"] == "skipped"
+ assert "no logging configured" in result["reason"]
+
+ def test_auto_compute_log_metrics(self):
+ store, fs = self._make_log_store()
+ svc = MonitoringService(store)
+
+ with patch(
+ "feast.monitoring.monitoring_service.FeatureServiceLoggingSource"
+ ) as mock_cls:
+ mock_instance = MagicMock()
+ mock_instance.get_schema.return_value = self._LOG_SCHEMA
+ mock_cls.return_value = mock_instance
+
+ result = svc.auto_compute_log_metrics(project="test_project")
+
+ assert result["status"] == "completed"
+ assert result["data_source_type"] == "log"
+ assert result["computed_feature_services"] == 1
+ assert len(result["granularities"]) == len(VALID_GRANULARITIES)
+
+ def test_log_metrics_tagged_differently_from_batch(self):
+ """Log metrics should have data_source_type='log', batch should have 'batch'."""
+ store, fs = self._make_log_store()
+ svc = MonitoringService(store)
+
+ with patch(
+ "feast.monitoring.monitoring_service.FeatureServiceLoggingSource"
+ ) as mock_cls:
+ mock_instance = MagicMock()
+ mock_instance.get_schema.return_value = self._LOG_SCHEMA
+ mock_cls.return_value = mock_instance
+
+ svc.compute_log_metrics(
+ project="test_project",
+ feature_service_name="driver_service",
+ granularity="daily",
+ )
+
+ provider = store._get_provider.return_value
+ save_calls = provider.offline_store.save_monitoring_metrics.call_args_list
+ feature_calls = [c for c in save_calls if c[0][1] == "feature"]
+ for call in feature_calls:
+ for m in call[0][2]:
+ assert m["data_source_type"] == "log"
+ assert m["feature_view_name"] == "driver_stats"
+ assert m["feature_name"] in ("conv_rate", "city")
diff --git a/sdk/python/tests/unit/monitoring/__init__.py b/sdk/python/tests/unit/monitoring/__init__.py
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/sdk/python/tests/unit/monitoring/__init__.py
@@ -0,0 +1 @@
+
diff --git a/sdk/python/tests/unit/monitoring/test_compute_correctness.py b/sdk/python/tests/unit/monitoring/test_compute_correctness.py
new file mode 100644
index 00000000000..9f9005380df
--- /dev/null
+++ b/sdk/python/tests/unit/monitoring/test_compute_correctness.py
@@ -0,0 +1,1831 @@
+"""
+Compute correctness tests for monitoring metric calculations.
+
+Verifies that each offline store backend's SQL/compute helpers produce
+mathematically correct results for a known golden dataset.
+
+- DuckDB and Dask tests run fully in-memory with zero external dependencies.
+- PostgreSQL tests require a live Postgres instance (skipped if unavailable).
+- Snowflake, BigQuery, Redshift, Spark, Oracle tests require their respective
+ backends (skipped if unavailable).
+"""
+
+import statistics
+from datetime import datetime, timezone
+from typing import Any, Dict
+
+import pyarrow as pa
+import pytest
+
+# ---------------------------------------------------------------------------
+# Golden dataset: known values with hand-computable statistics
+# ---------------------------------------------------------------------------
+
+NUMERIC_VALUES = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
+NUMERIC_WITH_NULLS = [1.0, None, 3.0, None, 5.0, None, 7.0, None, 9.0, None]
+CATEGORICAL_VALUES = ["a", "b", "a", "c", "a", "b", "d", "a", "b", "c"]
+
+ROW_COUNT = len(NUMERIC_VALUES)
+NON_NULL_VALUES = [v for v in NUMERIC_WITH_NULLS if v is not None]
+
+
+def _expected_numeric_stats():
+ """Hand-computed expected values for NUMERIC_VALUES = [1..10]."""
+ vals = NUMERIC_VALUES
+ return {
+ "row_count": 10,
+ "null_count": 0,
+ "null_rate": 0.0,
+ "mean": 5.5,
+ "stddev": statistics.stdev(vals), # sample stddev ≈ 3.0277
+ "min_val": 1.0,
+ "max_val": 10.0,
+ "p50": 5.5,
+ "p75": 7.75,
+ "p90": 9.1,
+ "p95": 9.55,
+ "p99": 9.91,
+ }
+
+
+def _expected_numeric_with_nulls_stats():
+ """Hand-computed expected values for NUMERIC_WITH_NULLS."""
+ vals = NON_NULL_VALUES # [1, 3, 5, 7, 9]
+ return {
+ "row_count": 10,
+ "null_count": 5,
+ "null_rate": 0.5,
+ "mean": 5.0,
+ "stddev": statistics.stdev(vals), # sample stddev ≈ 3.1623
+ "min_val": 1.0,
+ "max_val": 9.0,
+ }
+
+
+def _expected_categorical_stats():
+ """Expected values for CATEGORICAL_VALUES."""
+ return {
+ "row_count": 10,
+ "null_count": 0,
+ "null_rate": 0.0,
+ "unique_count": 4,
+ "top_value": "a",
+ "top_count": 4,
+ }
+
+
+# ---------------------------------------------------------------------------
+# Shared assertions: validate any backend's numeric/categorical result
+# ---------------------------------------------------------------------------
+
+
+def assert_numeric_correctness(
+ result: Dict[str, Any], expected: Dict, label: str, approx_percentiles: bool = False
+):
+ """Assert that a numeric result from any backend matches expected values."""
+ assert result["feature_type"] == "numeric", f"{label}: wrong feature_type"
+ assert result["row_count"] == expected["row_count"], f"{label}: wrong row_count"
+ assert result["null_count"] == expected["null_count"], f"{label}: wrong null_count"
+ assert result["null_rate"] == pytest.approx(expected["null_rate"], abs=1e-6), (
+ f"{label}: wrong null_rate"
+ )
+ assert result["mean"] == pytest.approx(expected["mean"], abs=1e-4), (
+ f"{label}: wrong mean"
+ )
+ assert result["stddev"] == pytest.approx(expected["stddev"], abs=0.05), (
+ f"{label}: wrong stddev"
+ )
+ assert result["min_val"] == pytest.approx(expected["min_val"], abs=1e-6), (
+ f"{label}: wrong min_val"
+ )
+ assert result["max_val"] == pytest.approx(expected["max_val"], abs=1e-6), (
+ f"{label}: wrong max_val"
+ )
+
+ if "p50" in expected and not approx_percentiles:
+ assert result["p50"] == pytest.approx(expected["p50"], abs=0.5), (
+ f"{label}: wrong p50"
+ )
+ assert result["p75"] == pytest.approx(expected["p75"], abs=0.5), (
+ f"{label}: wrong p75"
+ )
+ assert result["p90"] == pytest.approx(expected["p90"], abs=0.5), (
+ f"{label}: wrong p90"
+ )
+
+ # Percentile ordering is always enforced
+ assert result["p50"] <= result["p75"], f"{label}: p50 > p75"
+ assert result["p75"] <= result["p90"], f"{label}: p75 > p90"
+ assert result["p90"] <= result["p95"], f"{label}: p90 > p95"
+ assert result["p95"] <= result["p99"], f"{label}: p95 > p99"
+ assert result["p50"] >= result["min_val"], f"{label}: p50 < min"
+ assert result["p99"] <= result["max_val"], f"{label}: p99 > max"
+
+
+def assert_histogram_correctness(
+ result: Dict[str, Any], label: str, expected_bins: int = 5
+):
+ """Assert that a numeric histogram has consistent structure and totals."""
+ hist = result.get("histogram")
+ assert hist is not None, f"{label}: histogram is None"
+ assert len(hist["counts"]) == expected_bins, f"{label}: wrong number of bins"
+ assert len(hist["bins"]) == expected_bins + 1, f"{label}: wrong number of bin edges"
+ total = sum(hist["counts"])
+ non_null = result["row_count"] - result["null_count"]
+ assert total == non_null, f"{label}: histogram total {total} != non_null {non_null}"
+ assert hist["bins"][0] <= result["min_val"], f"{label}: first bin edge > min_val"
+ assert hist["bins"][-1] >= result["max_val"], f"{label}: last bin edge < max_val"
+
+
+def assert_categorical_correctness(result: Dict[str, Any], expected: Dict, label: str):
+ """Assert that a categorical result from any backend matches expected values."""
+ assert result["feature_type"] == "categorical", f"{label}: wrong feature_type"
+ assert result["row_count"] == expected["row_count"], f"{label}: wrong row_count"
+ assert result["null_count"] == expected["null_count"], f"{label}: wrong null_count"
+ assert result["null_rate"] == pytest.approx(expected["null_rate"], abs=1e-6), (
+ f"{label}: wrong null_rate"
+ )
+
+ hist = result["histogram"]
+ assert hist is not None, f"{label}: histogram is None"
+ assert hist["unique_count"] == expected["unique_count"], (
+ f"{label}: wrong unique_count"
+ )
+
+ top_entry = hist["values"][0]
+ assert top_entry["value"] == expected["top_value"], f"{label}: wrong top value"
+ assert top_entry["count"] == expected["top_count"], f"{label}: wrong top count"
+
+ total = sum(e["count"] for e in hist["values"]) + hist["other_count"]
+ expected_total = expected["row_count"] - expected["null_count"]
+ assert total == expected_total, (
+ f"{label}: categorical total {total} != expected {expected_total}"
+ )
+
+
+# ===================================================================
+# DuckDB compute correctness tests (fully in-memory, no external deps)
+# ===================================================================
+
+
+class TestDuckDBComputeCorrectness:
+ """Test DuckDB SQL helper functions produce correct metric values."""
+
+ @pytest.fixture(autouse=True)
+ def setup_duckdb(self):
+ duckdb = pytest.importorskip("duckdb")
+ self.conn = duckdb.connect()
+
+ self.conn.execute("""
+ CREATE TABLE test_data (
+ event_timestamp TIMESTAMP,
+ numeric_col DOUBLE,
+ numeric_with_nulls DOUBLE,
+ categorical_col VARCHAR
+ )
+ """)
+
+ ts = datetime(2025, 1, 15, 12, 0, 0)
+ for i in range(ROW_COUNT):
+ n_val = NUMERIC_VALUES[i]
+ n_null = NUMERIC_WITH_NULLS[i]
+ c_val = CATEGORICAL_VALUES[i]
+ n_null_sql = f"{n_null}" if n_null is not None else "NULL"
+ self.conn.execute(
+ f"INSERT INTO test_data VALUES "
+ f"(TIMESTAMP '{ts.strftime('%Y-%m-%d %H:%M:%S')}', "
+ f"{n_val}, {n_null_sql}, '{c_val}')"
+ )
+ yield
+ self.conn.close()
+
+ def test_numeric_stats_basic(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+
+ results = _duckdb_numeric_stats(
+ self.conn,
+ "test_data",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ result = results[0]
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(result, expected, "duckdb_numeric")
+ assert_histogram_correctness(result, "duckdb_numeric", expected_bins=5)
+
+ def test_numeric_stats_with_nulls(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+
+ results = _duckdb_numeric_stats(
+ self.conn,
+ "test_data",
+ ["numeric_with_nulls"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ result = results[0]
+ expected = _expected_numeric_with_nulls_stats()
+ assert_numeric_correctness(result, expected, "duckdb_numeric_nulls")
+ assert_histogram_correctness(result, "duckdb_numeric_nulls", expected_bins=5)
+
+ def test_numeric_multiple_features(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+
+ results = _duckdb_numeric_stats(
+ self.conn,
+ "test_data",
+ ["numeric_col", "numeric_with_nulls"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 2
+ assert results[0]["feature_name"] == "numeric_col"
+ assert results[1]["feature_name"] == "numeric_with_nulls"
+ assert results[0]["mean"] == pytest.approx(5.5, abs=1e-4)
+ assert results[1]["mean"] == pytest.approx(5.0, abs=1e-4)
+ assert results[0]["null_count"] == 0
+ assert results[1]["null_count"] == 5
+
+ def test_categorical_stats(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_categorical_stats
+
+ result = _duckdb_categorical_stats(
+ self.conn,
+ "test_data",
+ "categorical_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "duckdb_categorical")
+
+ def test_categorical_top_n_truncation(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_categorical_stats
+
+ result = _duckdb_categorical_stats(
+ self.conn,
+ "test_data",
+ "categorical_col",
+ "1=1",
+ top_n=2,
+ )
+
+ assert len(result["histogram"]["values"]) == 2
+ assert result["histogram"]["other_count"] > 0
+ total = (
+ sum(e["count"] for e in result["histogram"]["values"])
+ + result["histogram"]["other_count"]
+ )
+ assert total == 10
+
+ def test_histogram_bin_edges_cover_range(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_histogram
+
+ hist = _duckdb_numeric_histogram(
+ self.conn,
+ "test_data",
+ "numeric_col",
+ "1=1",
+ bins=5,
+ min_val=1.0,
+ max_val=10.0,
+ )
+
+ assert hist["bins"][0] == pytest.approx(1.0, abs=1e-6)
+ assert hist["bins"][-1] == pytest.approx(10.0, abs=0.1)
+ assert sum(hist["counts"]) == 10
+ assert hist["bin_width"] == pytest.approx(1.8, abs=0.01)
+
+ def test_histogram_single_value(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_histogram
+
+ self.conn.execute("""
+ CREATE TABLE single_val (event_timestamp TIMESTAMP, v DOUBLE)
+ """)
+ self.conn.execute(
+ "INSERT INTO single_val VALUES (TIMESTAMP '2025-01-15 12:00:00', 42.0)"
+ )
+
+ hist = _duckdb_numeric_histogram(
+ self.conn,
+ "single_val",
+ "v",
+ "1=1",
+ bins=5,
+ min_val=42.0,
+ max_val=42.0,
+ )
+
+ assert hist["counts"] == [1]
+ assert hist["bin_width"] == 0.0
+
+ def test_empty_table(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+
+ self.conn.execute("""
+ CREATE TABLE empty_tbl (event_timestamp TIMESTAMP, v DOUBLE)
+ """)
+ results = _duckdb_numeric_stats(
+ self.conn,
+ "empty_tbl",
+ ["v"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ assert results[0]["row_count"] == 0
+ assert results[0]["mean"] is None
+ assert results[0]["histogram"] is None
+
+ def test_stddev_with_single_row(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+
+ self.conn.execute("""
+ CREATE TABLE one_row (event_timestamp TIMESTAMP, v DOUBLE)
+ """)
+ self.conn.execute(
+ "INSERT INTO one_row VALUES (TIMESTAMP '2025-01-15 12:00:00', 7.0)"
+ )
+ results = _duckdb_numeric_stats(
+ self.conn,
+ "one_row",
+ ["v"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert results[0]["mean"] == pytest.approx(7.0)
+ assert results[0]["min_val"] == 7.0
+ assert results[0]["max_val"] == 7.0
+ # STDDEV_SAMP of a single value is NULL
+ assert results[0]["stddev"] is None
+
+ def test_large_dataset_percentiles(self):
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+
+ self.conn.execute("""
+ CREATE TABLE large_tbl (event_timestamp TIMESTAMP, v DOUBLE)
+ """)
+ for i in range(1, 1001):
+ self.conn.execute(
+ f"INSERT INTO large_tbl VALUES "
+ f"(TIMESTAMP '2025-01-15 12:00:00', {float(i)})"
+ )
+
+ results = _duckdb_numeric_stats(
+ self.conn,
+ "large_tbl",
+ ["v"],
+ "1=1",
+ histogram_bins=10,
+ )
+
+ r = results[0]
+ assert r["mean"] == pytest.approx(500.5, abs=0.1)
+ assert r["min_val"] == 1.0
+ assert r["max_val"] == 1000.0
+ assert r["p50"] == pytest.approx(500.5, abs=5.0)
+ assert r["p90"] == pytest.approx(900.0, abs=10.0)
+ assert r["p99"] == pytest.approx(990.0, abs=10.0)
+ assert_histogram_correctness(r, "duckdb_large", expected_bins=10)
+
+
+# ===================================================================
+# Dask (PyArrow) compute correctness tests (no external deps)
+# ===================================================================
+
+
+class TestDaskComputeCorrectness:
+ """Test Dask/PyArrow compute helpers produce correct metric values."""
+
+ def test_numeric_stats_basic(self):
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+
+ col = pa.chunked_array([pa.array(NUMERIC_VALUES, type=pa.float64())])
+ result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+
+ expected = _expected_numeric_stats()
+ result["feature_name"] = "test"
+ assert_numeric_correctness(result, expected, "dask_numeric")
+ assert_histogram_correctness(result, "dask_numeric", expected_bins=5)
+
+ def test_numeric_stats_with_nulls(self):
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+
+ col = pa.chunked_array([pa.array(NUMERIC_WITH_NULLS, type=pa.float64())])
+ result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+ result["feature_name"] = "test"
+
+ expected = _expected_numeric_with_nulls_stats()
+ assert_numeric_correctness(result, expected, "dask_numeric_nulls")
+ assert_histogram_correctness(result, "dask_numeric_nulls", expected_bins=5)
+
+ def test_numeric_all_nulls(self):
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+
+ col = pa.chunked_array([pa.array([None, None, None], type=pa.float64())])
+ result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+
+ assert result["row_count"] == 3
+ assert result["null_count"] == 3
+ assert result["mean"] is None
+ assert result["histogram"] is None
+
+ def test_numeric_empty(self):
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+
+ col = pa.chunked_array([pa.array([], type=pa.float64())])
+ result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+
+ assert result["row_count"] == 0
+ assert result["mean"] is None
+
+ def test_numeric_single_value(self):
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+
+ col = pa.chunked_array([pa.array([42.0], type=pa.float64())])
+ result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+
+ assert result["mean"] == pytest.approx(42.0)
+ assert result["min_val"] == 42.0
+ assert result["max_val"] == 42.0
+ assert result["stddev"] is None # STDDEV_SAMP of single value
+
+ def test_numeric_large_dataset_percentiles(self):
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+
+ vals = list(range(1, 1001))
+ col = pa.chunked_array([pa.array(vals, type=pa.float64())])
+ result = _dask_compute_numeric_metrics(col, histogram_bins=10)
+
+ assert result["mean"] == pytest.approx(500.5, abs=0.1)
+ assert result["p50"] == pytest.approx(500.5, abs=5.0)
+ assert result["p90"] == pytest.approx(900.0, abs=10.0)
+ assert result["p99"] == pytest.approx(990.0, abs=10.0)
+ result["feature_name"] = "test"
+ assert_histogram_correctness(result, "dask_large", expected_bins=10)
+
+ def test_categorical_stats_basic(self):
+ from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics
+
+ col = pa.chunked_array([pa.array(CATEGORICAL_VALUES, type=pa.string())])
+ result = _dask_compute_categorical_metrics(col, top_n=10)
+ result["feature_name"] = "test"
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "dask_categorical")
+
+ def test_categorical_with_nulls(self):
+ from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics
+
+ vals = ["a", None, "b", None, "a", "c"]
+ col = pa.chunked_array([pa.array(vals, type=pa.string())])
+ result = _dask_compute_categorical_metrics(col, top_n=10)
+
+ assert result["row_count"] == 6
+ assert result["null_count"] == 2
+ assert result["null_rate"] == pytest.approx(1 / 3, abs=1e-4)
+ assert result["histogram"]["unique_count"] == 3
+
+ def test_categorical_top_n_truncation(self):
+ from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics
+
+ col = pa.chunked_array([pa.array(CATEGORICAL_VALUES, type=pa.string())])
+ result = _dask_compute_categorical_metrics(col, top_n=2)
+
+ assert len(result["histogram"]["values"]) == 2
+ assert result["histogram"]["other_count"] > 0
+ total = (
+ sum(e["count"] for e in result["histogram"]["values"])
+ + result["histogram"]["other_count"]
+ )
+ assert total == 10
+
+ def test_categorical_all_nulls(self):
+ from feast.infra.offline_stores.dask import _dask_compute_categorical_metrics
+
+ col = pa.chunked_array([pa.array([None, None], type=pa.string())])
+ result = _dask_compute_categorical_metrics(col, top_n=10)
+
+ assert result["null_count"] == 2
+ assert result["histogram"] is None
+
+
+# ===================================================================
+# PostgreSQL compute correctness tests (requires live Postgres)
+# ===================================================================
+
+
+def _pg_available():
+ try:
+ import psycopg # noqa: F401
+
+ return True
+ except ImportError:
+ return False
+
+
+@pytest.mark.skipif(not _pg_available(), reason="psycopg not installed")
+class TestPostgresComputeCorrectness:
+ """Test PostgreSQL SQL helpers produce correct metric values.
+
+ Requires env vars: FEAST_PG_HOST, FEAST_PG_PORT, FEAST_PG_DB,
+ FEAST_PG_USER, FEAST_PG_PASS (or a local Postgres at localhost:5432).
+ """
+
+ @pytest.fixture(autouse=True)
+ def setup_pg(self):
+ import os
+
+ import psycopg
+
+ host = os.environ.get("FEAST_PG_HOST", "localhost")
+ port = os.environ.get("FEAST_PG_PORT", "5432")
+ db = os.environ.get("FEAST_PG_DB", "feast")
+ user = os.environ.get("FEAST_PG_USER", "feast")
+ password = os.environ.get("FEAST_PG_PASS", "feast")
+
+ try:
+ self.conn = psycopg.connect(
+ f"host={host} port={port} dbname={db} user={user} password={password}",
+ autocommit=True,
+ )
+ except psycopg.OperationalError:
+ pytest.skip("PostgreSQL not reachable")
+
+ self.conn.execute("DROP TABLE IF EXISTS feast_test_monitoring_correctness")
+ self.conn.execute("""
+ CREATE TABLE feast_test_monitoring_correctness (
+ event_timestamp TIMESTAMPTZ,
+ numeric_col DOUBLE PRECISION,
+ numeric_with_nulls DOUBLE PRECISION,
+ categorical_col TEXT
+ )
+ """)
+
+ ts = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
+ for i in range(ROW_COUNT):
+ n_val = NUMERIC_VALUES[i]
+ n_null = NUMERIC_WITH_NULLS[i]
+ c_val = CATEGORICAL_VALUES[i]
+ self.conn.execute(
+ "INSERT INTO feast_test_monitoring_correctness VALUES (%s, %s, %s, %s)",
+ (ts, n_val, n_null, c_val),
+ )
+ yield
+ self.conn.execute("DROP TABLE IF EXISTS feast_test_monitoring_correctness")
+ self.conn.close()
+
+ def test_numeric_stats(self):
+ from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import (
+ _sql_numeric_stats,
+ )
+
+ results = _sql_numeric_stats(
+ self.conn,
+ "feast_test_monitoring_correctness",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(results[0], expected, "pg_numeric")
+ assert_histogram_correctness(results[0], "pg_numeric", expected_bins=5)
+
+ def test_numeric_stats_with_nulls(self):
+ from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import (
+ _sql_numeric_stats,
+ )
+
+ results = _sql_numeric_stats(
+ self.conn,
+ "feast_test_monitoring_correctness",
+ ["numeric_with_nulls"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ expected = _expected_numeric_with_nulls_stats()
+ assert_numeric_correctness(results[0], expected, "pg_numeric_nulls")
+
+ def test_numeric_multiple_features(self):
+ from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import (
+ _sql_numeric_stats,
+ )
+
+ results = _sql_numeric_stats(
+ self.conn,
+ "feast_test_monitoring_correctness",
+ ["numeric_col", "numeric_with_nulls"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 2
+ assert results[0]["mean"] == pytest.approx(5.5, abs=1e-4)
+ assert results[1]["mean"] == pytest.approx(5.0, abs=1e-4)
+
+ def test_categorical_stats(self):
+ from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import (
+ _sql_categorical_stats,
+ )
+
+ result = _sql_categorical_stats(
+ self.conn,
+ "feast_test_monitoring_correctness",
+ "categorical_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "pg_categorical")
+
+
+# ===================================================================
+# Snowflake compute correctness (mocked connection, real parsing)
+# ===================================================================
+
+
+def _snowflake_importable():
+ try:
+ from feast.infra.offline_stores.snowflake import (
+ _snowflake_sql_numeric_stats, # noqa: F401
+ )
+
+ return True
+ except (ImportError, Exception):
+ return False
+
+
+@pytest.mark.skipif(not _snowflake_importable(), reason="Snowflake deps not installed")
+class TestSnowflakeComputeCorrectness:
+ """Tests Snowflake result parsing with mocked cursor.
+
+ The cursor returns exactly the row format Snowflake would produce.
+ This validates column indexing, opt_float, null_count math, and
+ histogram assembly without needing a live Snowflake account.
+ """
+
+ def _make_mock_cursor(self, fetchone_val=None, fetchall_val=None):
+ from unittest.mock import MagicMock
+
+ cursor = MagicMock()
+ cursor.fetchone.return_value = fetchone_val
+ cursor.fetchall.return_value = fetchall_val or []
+ return cursor
+
+ def test_numeric_stats(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.snowflake import (
+ _snowflake_sql_numeric_stats,
+ )
+
+ vals = NUMERIC_VALUES
+ mean_v = statistics.mean(vals)
+ stddev_v = statistics.stdev(vals)
+ # row: COUNT(*), then per-feature: nn, avg, stddev, min, max, p50..p99
+ stats_row = (
+ 10,
+ 10,
+ mean_v,
+ stddev_v,
+ 1.0,
+ 10.0,
+ 5.5,
+ 7.75,
+ 9.1,
+ 9.55,
+ 9.91,
+ )
+ stats_cursor = self._make_mock_cursor(fetchone_val=stats_row)
+ hist_row_data = [(1, 2), (2, 2), (3, 2), (4, 2), (5, 2)]
+ hist_cursor = self._make_mock_cursor(fetchall_val=hist_row_data)
+
+ call_count = [0]
+
+ def mock_execute(conn, query):
+ call_count[0] += 1
+ return stats_cursor if call_count[0] == 1 else hist_cursor
+
+ with patch(
+ "feast.infra.offline_stores.snowflake.execute_snowflake_statement",
+ side_effect=mock_execute,
+ ):
+ results = _snowflake_sql_numeric_stats(
+ MagicMock(),
+ "test_table",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ r = results[0]
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(r, expected, "snowflake_numeric")
+ assert r["histogram"] is not None
+ assert sum(r["histogram"]["counts"]) == 10
+
+ def test_numeric_stats_with_nulls(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.snowflake import (
+ _snowflake_sql_numeric_stats,
+ )
+
+ vals = NON_NULL_VALUES
+ stats_row = (
+ 10,
+ 5,
+ statistics.mean(vals),
+ statistics.stdev(vals),
+ 1.0,
+ 9.0,
+ 5.0,
+ 7.0,
+ 8.6,
+ 8.8,
+ 8.96,
+ )
+ stats_cursor = self._make_mock_cursor(fetchone_val=stats_row)
+ hist_cursor = self._make_mock_cursor(
+ fetchall_val=[(1, 1), (2, 1), (3, 1), (4, 1), (5, 1)]
+ )
+
+ call_count = [0]
+
+ def mock_execute(conn, query):
+ call_count[0] += 1
+ return stats_cursor if call_count[0] == 1 else hist_cursor
+
+ with patch(
+ "feast.infra.offline_stores.snowflake.execute_snowflake_statement",
+ side_effect=mock_execute,
+ ):
+ results = _snowflake_sql_numeric_stats(
+ MagicMock(),
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ r = results[0]
+ assert r["null_count"] == 5
+ assert r["null_rate"] == pytest.approx(0.5)
+ assert r["mean"] == pytest.approx(5.0, abs=1e-4)
+
+ def test_categorical_stats(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.snowflake import (
+ _snowflake_sql_categorical_stats,
+ )
+
+ rows = [
+ (10, 0, 4, "a", 4),
+ (10, 0, 4, "b", 3),
+ (10, 0, 4, "c", 2),
+ (10, 0, 4, "d", 1),
+ ]
+ cursor = self._make_mock_cursor(fetchall_val=rows)
+
+ with patch(
+ "feast.infra.offline_stores.snowflake.execute_snowflake_statement",
+ return_value=cursor,
+ ):
+ result = _snowflake_sql_categorical_stats(
+ MagicMock(),
+ "t",
+ "cat_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "snowflake_categorical")
+
+ def test_empty_result(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.snowflake import (
+ _snowflake_sql_numeric_stats,
+ )
+
+ cursor = self._make_mock_cursor(fetchone_val=None)
+ with patch(
+ "feast.infra.offline_stores.snowflake.execute_snowflake_statement",
+ return_value=cursor,
+ ):
+ results = _snowflake_sql_numeric_stats(
+ MagicMock(),
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ assert results[0]["mean"] is None
+ assert results[0]["row_count"] == 0
+
+
+# ===================================================================
+# BigQuery compute correctness (mocked client, real parsing)
+# ===================================================================
+
+
+class TestBigQueryComputeCorrectness:
+ """Tests BigQuery result parsing with mocked client.
+
+ BigQuery results use dict-like row access (row["column_name"]).
+ """
+
+ def _make_mock_bq_row(self, data: dict):
+ """Create an object supporting both dict-key and index access."""
+
+ class BQRow:
+ def __init__(self, d):
+ self._data = d
+ self._keys = list(d.keys())
+
+ def __getitem__(self, key):
+ if isinstance(key, int):
+ return self._data[self._keys[key]]
+ return self._data[key]
+
+ return BQRow(data)
+
+ def _make_mock_job(self, rows):
+ from unittest.mock import MagicMock
+
+ job = MagicMock()
+ job.result.return_value = None
+ job.__iter__ = lambda self_: iter(rows)
+ return job
+
+ def test_numeric_stats(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.bigquery import _bq_numeric_stats
+
+ vals = NUMERIC_VALUES
+ row_data = {
+ "_row_count": 10,
+ "c0_nn": 10,
+ "c0_avg": statistics.mean(vals),
+ "c0_stddev": statistics.stdev(vals),
+ "c0_min": 1.0,
+ "c0_max": 10.0,
+ "c0_p50": 5.5,
+ "c0_p75": 7.75,
+ "c0_p90": 9.1,
+ "c0_p95": 9.55,
+ "c0_p99": 9.91,
+ }
+ stats_row = self._make_mock_bq_row(row_data)
+ stats_job = self._make_mock_job([stats_row])
+
+ hist_rows = [
+ self._make_mock_bq_row({"bucket": i + 1, "cnt": 2}) for i in range(5)
+ ]
+ hist_job = self._make_mock_job(hist_rows)
+
+ call_count = [0]
+
+ def mock_query(sql, *args, **kwargs):
+ call_count[0] += 1
+ return stats_job if call_count[0] == 1 else hist_job
+
+ mock_config = MagicMock()
+ mock_config.offline_store.billing_project_id = "proj"
+ mock_config.offline_store.project_id = "proj"
+ mock_config.offline_store.location = "US"
+
+ with patch(
+ "feast.infra.offline_stores.bigquery._get_bigquery_client"
+ ) as mock_client:
+ mock_client.return_value.query = mock_query
+ results = _bq_numeric_stats(
+ mock_config,
+ "test_table",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ r = results[0]
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(r, expected, "bq_numeric")
+ assert r["histogram"] is not None
+ assert sum(r["histogram"]["counts"]) == 10
+
+ def test_numeric_stats_with_nulls(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.bigquery import _bq_numeric_stats
+
+ vals = NON_NULL_VALUES
+ row_data = {
+ "_row_count": 10,
+ "c0_nn": 5,
+ "c0_avg": statistics.mean(vals),
+ "c0_stddev": statistics.stdev(vals),
+ "c0_min": 1.0,
+ "c0_max": 9.0,
+ "c0_p50": 5.0,
+ "c0_p75": 7.0,
+ "c0_p90": 8.6,
+ "c0_p95": 8.8,
+ "c0_p99": 8.96,
+ }
+ stats_row = self._make_mock_bq_row(row_data)
+ stats_job = self._make_mock_job([stats_row])
+ hist_job = self._make_mock_job(
+ [self._make_mock_bq_row({"bucket": i + 1, "cnt": 1}) for i in range(5)]
+ )
+
+ call_count = [0]
+
+ def mock_query(sql, *args, **kwargs):
+ call_count[0] += 1
+ return stats_job if call_count[0] == 1 else hist_job
+
+ mock_config = MagicMock()
+ mock_config.offline_store.billing_project_id = "proj"
+ mock_config.offline_store.project_id = "proj"
+ mock_config.offline_store.location = "US"
+
+ with patch(
+ "feast.infra.offline_stores.bigquery._get_bigquery_client"
+ ) as mock_client:
+ mock_client.return_value.query = mock_query
+ results = _bq_numeric_stats(
+ mock_config,
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ r = results[0]
+ assert r["null_count"] == 5
+ assert r["null_rate"] == pytest.approx(0.5)
+ assert r["mean"] == pytest.approx(5.0, abs=1e-4)
+
+ def test_categorical_stats(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.bigquery import _bq_categorical_stats
+
+ rows = [
+ self._make_mock_bq_row(
+ {
+ "row_count": 10,
+ "null_count": 0,
+ "unique_count": 4,
+ "value": "a",
+ "cnt": 4,
+ }
+ ),
+ self._make_mock_bq_row(
+ {
+ "row_count": 10,
+ "null_count": 0,
+ "unique_count": 4,
+ "value": "b",
+ "cnt": 3,
+ }
+ ),
+ self._make_mock_bq_row(
+ {
+ "row_count": 10,
+ "null_count": 0,
+ "unique_count": 4,
+ "value": "c",
+ "cnt": 2,
+ }
+ ),
+ self._make_mock_bq_row(
+ {
+ "row_count": 10,
+ "null_count": 0,
+ "unique_count": 4,
+ "value": "d",
+ "cnt": 1,
+ }
+ ),
+ ]
+ job = self._make_mock_job(rows)
+
+ mock_config = MagicMock()
+ mock_config.offline_store.billing_project_id = "proj"
+ mock_config.offline_store.project_id = "proj"
+ mock_config.offline_store.location = "US"
+
+ with patch(
+ "feast.infra.offline_stores.bigquery._get_bigquery_client"
+ ) as mock_client:
+ mock_client.return_value.query.return_value = job
+ result = _bq_categorical_stats(
+ mock_config,
+ "t",
+ "cat_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "bq_categorical")
+
+ def test_multiple_features(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.bigquery import _bq_numeric_stats
+
+ row_data = {
+ "_row_count": 10,
+ "c0_nn": 10,
+ "c0_avg": 5.5,
+ "c0_stddev": 3.03,
+ "c0_min": 1.0,
+ "c0_max": 10.0,
+ "c0_p50": 5.5,
+ "c0_p75": 7.75,
+ "c0_p90": 9.1,
+ "c0_p95": 9.55,
+ "c0_p99": 9.91,
+ "c1_nn": 5,
+ "c1_avg": 5.0,
+ "c1_stddev": 3.16,
+ "c1_min": 1.0,
+ "c1_max": 9.0,
+ "c1_p50": 5.0,
+ "c1_p75": 7.0,
+ "c1_p90": 8.6,
+ "c1_p95": 8.8,
+ "c1_p99": 8.96,
+ }
+ stats_job = self._make_mock_job([self._make_mock_bq_row(row_data)])
+ hist_job = self._make_mock_job(
+ [self._make_mock_bq_row({"bucket": i + 1, "cnt": 2}) for i in range(5)]
+ )
+
+ call_count = [0]
+
+ def mock_query(sql, *args, **kwargs):
+ call_count[0] += 1
+ return stats_job if call_count[0] == 1 else hist_job
+
+ mock_config = MagicMock()
+ mock_config.offline_store.billing_project_id = "p"
+ mock_config.offline_store.project_id = "p"
+ mock_config.offline_store.location = "US"
+
+ with patch(
+ "feast.infra.offline_stores.bigquery._get_bigquery_client"
+ ) as mock_client:
+ mock_client.return_value.query = mock_query
+ results = _bq_numeric_stats(
+ mock_config,
+ "t",
+ ["col_a", "col_b"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 2
+ assert results[0]["mean"] == pytest.approx(5.5, abs=1e-2)
+ assert results[1]["mean"] == pytest.approx(5.0, abs=1e-2)
+ assert results[0]["null_count"] == 0
+ assert results[1]["null_count"] == 5
+
+
+# ===================================================================
+# Redshift compute correctness (mocked Data API, real parsing)
+# ===================================================================
+
+
+class TestRedshiftComputeCorrectness:
+ """Tests Redshift result parsing with mocked _redshift_execute_fetch_rows.
+
+ Redshift Data API returns rows as lists of field dicts, e.g.
+ [{"longValue": 10}, {"doubleValue": 5.5}, ...].
+ """
+
+ def _long(self, v):
+ return {"longValue": v}
+
+ def _double(self, v):
+ return {"doubleValue": v}
+
+ def _string(self, v):
+ return {"stringValue": v}
+
+ def _null(self):
+ return {"isNull": True}
+
+ def test_numeric_stats(self):
+ from unittest.mock import patch
+
+ from feast.infra.offline_stores.redshift import (
+ _redshift_sql_numeric_stats,
+ )
+
+ vals = NUMERIC_VALUES
+ row = [
+ self._long(10), # COUNT(*)
+ self._long(10), # COUNT(col)
+ self._double(statistics.mean(vals)), # AVG
+ self._double(statistics.stdev(vals)), # STDDEV_SAMP
+ self._double(1.0), # MIN
+ self._double(10.0), # MAX
+ self._double(5.5), # p50
+ self._double(7.75), # p75
+ self._double(9.1), # p90
+ self._double(9.55), # p95
+ self._double(9.91), # p99
+ ]
+ hist_rows = [[self._long(i + 1), self._long(2)] for i in range(5)]
+
+ call_count = [0]
+
+ def mock_fetch(config, sql):
+ call_count[0] += 1
+ return [row] if call_count[0] == 1 else hist_rows
+
+ with patch(
+ "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows",
+ side_effect=mock_fetch,
+ ):
+ from unittest.mock import MagicMock
+
+ results = _redshift_sql_numeric_stats(
+ MagicMock(),
+ "test_table",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ r = results[0]
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(r, expected, "redshift_numeric")
+ assert r["histogram"] is not None
+ assert sum(r["histogram"]["counts"]) == 10
+
+ def test_numeric_stats_with_nulls(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.redshift import (
+ _redshift_sql_numeric_stats,
+ )
+
+ vals = NON_NULL_VALUES
+ row = [
+ self._long(10),
+ self._long(5),
+ self._double(statistics.mean(vals)),
+ self._double(statistics.stdev(vals)),
+ self._double(1.0),
+ self._double(9.0),
+ self._double(5.0),
+ self._double(7.0),
+ self._double(8.6),
+ self._double(8.8),
+ self._double(8.96),
+ ]
+ hist_rows = [[self._long(i + 1), self._long(1)] for i in range(5)]
+
+ call_count = [0]
+
+ def mock_fetch(config, sql):
+ call_count[0] += 1
+ return [row] if call_count[0] == 1 else hist_rows
+
+ with patch(
+ "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows",
+ side_effect=mock_fetch,
+ ):
+ results = _redshift_sql_numeric_stats(
+ MagicMock(),
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ r = results[0]
+ assert r["null_count"] == 5
+ assert r["null_rate"] == pytest.approx(0.5)
+ assert r["mean"] == pytest.approx(5.0, abs=1e-4)
+
+ def test_categorical_stats(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.redshift import (
+ _redshift_sql_categorical_stats,
+ )
+
+ rows = [
+ [
+ self._long(10),
+ self._long(0),
+ self._long(4),
+ self._string("a"),
+ self._long(4),
+ ],
+ [
+ self._long(10),
+ self._long(0),
+ self._long(4),
+ self._string("b"),
+ self._long(3),
+ ],
+ [
+ self._long(10),
+ self._long(0),
+ self._long(4),
+ self._string("c"),
+ self._long(2),
+ ],
+ [
+ self._long(10),
+ self._long(0),
+ self._long(4),
+ self._string("d"),
+ self._long(1),
+ ],
+ ]
+
+ with patch(
+ "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows",
+ return_value=rows,
+ ):
+ result = _redshift_sql_categorical_stats(
+ MagicMock(),
+ "t",
+ "cat_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "redshift_categorical")
+
+ def test_empty_result(self):
+ from unittest.mock import MagicMock, patch
+
+ from feast.infra.offline_stores.redshift import (
+ _redshift_sql_numeric_stats,
+ )
+
+ with patch(
+ "feast.infra.offline_stores.redshift._redshift_execute_fetch_rows",
+ return_value=[],
+ ):
+ results = _redshift_sql_numeric_stats(
+ MagicMock(),
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ assert results[0]["mean"] is None
+ assert results[0]["row_count"] == 0
+
+
+# ===================================================================
+# Spark compute correctness tests (requires SparkSession)
+# ===================================================================
+
+
+def _spark_available():
+ try:
+ from pyspark.sql import SparkSession # noqa: F401
+
+ return True
+ except ImportError:
+ return False
+
+
+@pytest.mark.skipif(not _spark_available(), reason="PySpark not installed")
+class TestSparkComputeCorrectness:
+ """Test Spark SQL helpers produce correct metric values.
+
+ Uses a local SparkSession — no external cluster required.
+ """
+
+ @pytest.fixture(autouse=True)
+ def setup_spark(self):
+ from pyspark.sql import SparkSession
+ from pyspark.sql.types import (
+ DoubleType,
+ StringType,
+ StructField,
+ StructType,
+ TimestampType,
+ )
+
+ try:
+ self.spark = (
+ SparkSession.builder.master("local[1]")
+ .appName("feast_monitoring_test")
+ .config("spark.ui.enabled", "false")
+ .config("spark.driver.bindAddress", "127.0.0.1")
+ .getOrCreate()
+ )
+ except Exception as e:
+ pytest.skip(f"SparkSession unavailable: {e}")
+
+ schema = StructType(
+ [
+ StructField("event_timestamp", TimestampType(), False),
+ StructField("numeric_col", DoubleType(), True),
+ StructField("numeric_with_nulls", DoubleType(), True),
+ StructField("categorical_col", StringType(), True),
+ ]
+ )
+
+ ts = datetime(2025, 1, 15, 12, 0, 0)
+ rows = [
+ (ts, NUMERIC_VALUES[i], NUMERIC_WITH_NULLS[i], CATEGORICAL_VALUES[i])
+ for i in range(ROW_COUNT)
+ ]
+ df = self.spark.createDataFrame(rows, schema)
+ df.createOrReplaceTempView("feast_test_monitoring")
+
+ yield
+ self.spark.sql("DROP VIEW IF EXISTS feast_test_monitoring")
+
+ def test_numeric_stats(self):
+ from feast.infra.offline_stores.contrib.spark_offline_store.spark import (
+ _spark_sql_numeric_stats,
+ )
+
+ results = _spark_sql_numeric_stats(
+ self.spark,
+ "feast_test_monitoring",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(
+ results[0],
+ expected,
+ "spark_numeric",
+ approx_percentiles=True,
+ )
+ assert results[0]["histogram"] is not None
+ assert sum(results[0]["histogram"]["counts"]) == 10
+
+ def test_numeric_stats_with_nulls(self):
+ from feast.infra.offline_stores.contrib.spark_offline_store.spark import (
+ _spark_sql_numeric_stats,
+ )
+
+ results = _spark_sql_numeric_stats(
+ self.spark,
+ "feast_test_monitoring",
+ ["numeric_with_nulls"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ expected = _expected_numeric_with_nulls_stats()
+ assert_numeric_correctness(
+ results[0],
+ expected,
+ "spark_numeric_nulls",
+ approx_percentiles=True,
+ )
+
+ def test_categorical_stats(self):
+ from feast.infra.offline_stores.contrib.spark_offline_store.spark import (
+ _spark_sql_categorical_stats,
+ )
+
+ result = _spark_sql_categorical_stats(
+ self.spark,
+ "feast_test_monitoring",
+ "categorical_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "spark_categorical")
+
+ def test_numeric_multiple_features(self):
+ from feast.infra.offline_stores.contrib.spark_offline_store.spark import (
+ _spark_sql_numeric_stats,
+ )
+
+ results = _spark_sql_numeric_stats(
+ self.spark,
+ "feast_test_monitoring",
+ ["numeric_col", "numeric_with_nulls"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 2
+ assert results[0]["mean"] == pytest.approx(5.5, abs=1e-4)
+ assert results[1]["mean"] == pytest.approx(5.0, abs=1e-4)
+
+ def test_categorical_top_n_truncation(self):
+ from feast.infra.offline_stores.contrib.spark_offline_store.spark import (
+ _spark_sql_categorical_stats,
+ )
+
+ result = _spark_sql_categorical_stats(
+ self.spark,
+ "feast_test_monitoring",
+ "categorical_col",
+ "1=1",
+ top_n=2,
+ )
+
+ assert len(result["histogram"]["values"]) == 2
+ assert result["histogram"]["other_count"] > 0
+ total = (
+ sum(e["count"] for e in result["histogram"]["values"])
+ + result["histogram"]["other_count"]
+ )
+ assert total == 10
+
+
+# ===================================================================
+# Oracle compute correctness (mocked Ibis connection, real parsing)
+# ===================================================================
+
+
+def _oracle_importable():
+ try:
+ from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import (
+ _oracle_numeric_stats, # noqa: F401
+ )
+
+ return True
+ except ImportError:
+ return False
+
+
+@pytest.mark.skipif(not _oracle_importable(), reason="Oracle deps not installed")
+class TestOracleComputeCorrectness:
+ """Tests Oracle result parsing with mocked Ibis connection.
+
+ _oracle_fetchall returns list of tuples (positional indexing).
+ """
+
+ def test_numeric_stats(self):
+ from unittest.mock import patch
+
+ from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import (
+ _oracle_numeric_stats,
+ )
+
+ vals = NUMERIC_VALUES
+ row = (
+ 10,
+ 10,
+ statistics.mean(vals),
+ statistics.stdev(vals),
+ 1.0,
+ 10.0,
+ 5.5,
+ 7.75,
+ 9.1,
+ 9.55,
+ 9.91,
+ )
+ hist_rows = [(i + 1, 2) for i in range(5)]
+
+ call_count = [0]
+
+ def mock_fetchall(con, sql):
+ call_count[0] += 1
+ return [row] if call_count[0] == 1 else hist_rows
+
+ with patch(
+ "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall",
+ side_effect=mock_fetchall,
+ ):
+ results = _oracle_numeric_stats(
+ None,
+ "test_table",
+ ["numeric_col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ r = results[0]
+ expected = _expected_numeric_stats()
+ assert_numeric_correctness(r, expected, "oracle_numeric")
+ assert r["histogram"] is not None
+ assert sum(r["histogram"]["counts"]) == 10
+
+ def test_numeric_stats_with_nulls(self):
+ from unittest.mock import patch
+
+ from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import (
+ _oracle_numeric_stats,
+ )
+
+ vals = NON_NULL_VALUES
+ row = (
+ 10,
+ 5,
+ statistics.mean(vals),
+ statistics.stdev(vals),
+ 1.0,
+ 9.0,
+ 5.0,
+ 7.0,
+ 8.6,
+ 8.8,
+ 8.96,
+ )
+ hist_rows = [(i + 1, 1) for i in range(5)]
+
+ call_count = [0]
+
+ def mock_fetchall(con, sql):
+ call_count[0] += 1
+ return [row] if call_count[0] == 1 else hist_rows
+
+ with patch(
+ "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall",
+ side_effect=mock_fetchall,
+ ):
+ results = _oracle_numeric_stats(
+ None,
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ r = results[0]
+ assert r["null_count"] == 5
+ assert r["null_rate"] == pytest.approx(0.5)
+ assert r["mean"] == pytest.approx(5.0, abs=1e-4)
+
+ def test_categorical_stats(self):
+ from unittest.mock import patch
+
+ from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import (
+ _oracle_categorical_stats,
+ )
+
+ rows = [
+ (10, 0, 4, "a", 4),
+ (10, 0, 4, "b", 3),
+ (10, 0, 4, "c", 2),
+ (10, 0, 4, "d", 1),
+ ]
+
+ with patch(
+ "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall",
+ return_value=rows,
+ ):
+ result = _oracle_categorical_stats(
+ None,
+ "t",
+ "cat_col",
+ "1=1",
+ top_n=10,
+ )
+
+ expected = _expected_categorical_stats()
+ assert_categorical_correctness(result, expected, "oracle_categorical")
+
+ def test_empty_result(self):
+ from unittest.mock import patch
+
+ from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import (
+ _oracle_numeric_stats,
+ )
+
+ with patch(
+ "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall",
+ return_value=[None],
+ ):
+ results = _oracle_numeric_stats(
+ None,
+ "t",
+ ["col"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 1
+ assert results[0]["mean"] is None
+ assert results[0]["row_count"] == 0
+
+ def test_multiple_features(self):
+ from unittest.mock import patch
+
+ from feast.infra.offline_stores.contrib.oracle_offline_store.oracle import (
+ _oracle_numeric_stats,
+ )
+
+ row = (
+ 10,
+ # Feature 0: numeric_col
+ 10,
+ 5.5,
+ 3.03,
+ 1.0,
+ 10.0,
+ 5.5,
+ 7.75,
+ 9.1,
+ 9.55,
+ 9.91,
+ # Feature 1: numeric_with_nulls
+ 5,
+ 5.0,
+ 3.16,
+ 1.0,
+ 9.0,
+ 5.0,
+ 7.0,
+ 8.6,
+ 8.8,
+ 8.96,
+ )
+ hist_rows = [(i + 1, 2) for i in range(5)]
+
+ call_count = [0]
+
+ def mock_fetchall(con, sql):
+ call_count[0] += 1
+ return [row] if call_count[0] == 1 else hist_rows
+
+ with patch(
+ "feast.infra.offline_stores.contrib.oracle_offline_store.oracle._oracle_fetchall",
+ side_effect=mock_fetchall,
+ ):
+ results = _oracle_numeric_stats(
+ None,
+ "t",
+ ["col_a", "col_b"],
+ "1=1",
+ histogram_bins=5,
+ )
+
+ assert len(results) == 2
+ assert results[0]["mean"] == pytest.approx(5.5, abs=1e-2)
+ assert results[1]["mean"] == pytest.approx(5.0, abs=1e-2)
+ assert results[0]["null_count"] == 0
+ assert results[1]["null_count"] == 5
+
+
+# ===================================================================
+# Cross-backend consistency: MetricsCalculator vs DuckDB vs Dask
+# ===================================================================
+
+
+class TestCrossBackendConsistency:
+ """Verify that DuckDB, Dask, and MetricsCalculator produce
+ consistent results for the same dataset."""
+
+ def test_numeric_mean_matches_across_backends(self):
+ duckdb = pytest.importorskip("duckdb")
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+ from feast.monitoring.metrics_calculator import MetricsCalculator
+
+ calc = MetricsCalculator(histogram_bins=5, top_n=10)
+ arr = pa.array(NUMERIC_VALUES, type=pa.float64())
+ pyarrow_result = calc.compute_numeric(arr)
+
+ col = pa.chunked_array([arr])
+ dask_result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+
+ conn = duckdb.connect()
+ conn.execute("CREATE TABLE consistency_test (v DOUBLE)")
+ for v in NUMERIC_VALUES:
+ conn.execute(f"INSERT INTO consistency_test VALUES ({v})")
+
+ duckdb_results = _duckdb_numeric_stats(
+ conn,
+ "consistency_test",
+ ["v"],
+ "1=1",
+ histogram_bins=5,
+ )
+ conn.close()
+
+ duckdb_result = duckdb_results[0]
+
+ assert pyarrow_result["mean"] == pytest.approx(dask_result["mean"], abs=1e-6)
+ assert pyarrow_result["mean"] == pytest.approx(duckdb_result["mean"], abs=1e-6)
+ assert dask_result["mean"] == pytest.approx(duckdb_result["mean"], abs=1e-6)
+
+ assert pyarrow_result["stddev"] == pytest.approx(
+ dask_result["stddev"], abs=0.01
+ )
+ assert pyarrow_result["stddev"] == pytest.approx(
+ duckdb_result["stddev"], abs=0.01
+ )
+
+ assert pyarrow_result["min_val"] == dask_result["min_val"]
+ assert pyarrow_result["min_val"] == duckdb_result["min_val"]
+ assert pyarrow_result["max_val"] == dask_result["max_val"]
+ assert pyarrow_result["max_val"] == duckdb_result["max_val"]
+
+ def test_categorical_unique_count_matches(self):
+ duckdb = pytest.importorskip("duckdb")
+ from feast.infra.offline_stores.dask import (
+ _dask_compute_categorical_metrics,
+ )
+ from feast.infra.offline_stores.duckdb import _duckdb_categorical_stats
+ from feast.monitoring.metrics_calculator import MetricsCalculator
+
+ calc = MetricsCalculator(histogram_bins=5, top_n=10)
+ arr = pa.array(CATEGORICAL_VALUES, type=pa.string())
+ pyarrow_result = calc.compute_categorical(arr)
+
+ col = pa.chunked_array([arr])
+ dask_result = _dask_compute_categorical_metrics(col, top_n=10)
+
+ conn = duckdb.connect()
+ conn.execute("CREATE TABLE cat_consistency (v VARCHAR)")
+ for v in CATEGORICAL_VALUES:
+ conn.execute(f"INSERT INTO cat_consistency VALUES ('{v}')")
+
+ duckdb_result = _duckdb_categorical_stats(
+ conn,
+ "cat_consistency",
+ "v",
+ "1=1",
+ top_n=10,
+ )
+ conn.close()
+
+ assert (
+ pyarrow_result["histogram"]["unique_count"]
+ == dask_result["histogram"]["unique_count"]
+ == duckdb_result["histogram"]["unique_count"]
+ == 4
+ )
+
+ pyarrow_top = pyarrow_result["histogram"]["values"][0]
+ dask_top = dask_result["histogram"]["values"][0]
+ duckdb_top = duckdb_result["histogram"]["values"][0]
+ assert pyarrow_top["value"] == dask_top["value"] == duckdb_top["value"] == "a"
+ assert pyarrow_top["count"] == dask_top["count"] == duckdb_top["count"] == 4
+
+ def test_null_rate_matches_across_backends(self):
+ duckdb = pytest.importorskip("duckdb")
+ from feast.infra.offline_stores.dask import _dask_compute_numeric_metrics
+ from feast.infra.offline_stores.duckdb import _duckdb_numeric_stats
+ from feast.monitoring.metrics_calculator import MetricsCalculator
+
+ calc = MetricsCalculator(histogram_bins=5, top_n=10)
+ arr = pa.array(NUMERIC_WITH_NULLS, type=pa.float64())
+ pyarrow_result = calc.compute_numeric(arr)
+
+ col = pa.chunked_array([arr])
+ dask_result = _dask_compute_numeric_metrics(col, histogram_bins=5)
+
+ conn = duckdb.connect()
+ conn.execute("CREATE TABLE null_consistency (v DOUBLE)")
+ for v in NUMERIC_WITH_NULLS:
+ val = f"{v}" if v is not None else "NULL"
+ conn.execute(f"INSERT INTO null_consistency VALUES ({val})")
+
+ duckdb_results = _duckdb_numeric_stats(
+ conn,
+ "null_consistency",
+ ["v"],
+ "1=1",
+ histogram_bins=5,
+ )
+ conn.close()
+
+ assert pyarrow_result["null_rate"] == pytest.approx(0.5, abs=1e-6)
+ assert dask_result["null_rate"] == pytest.approx(0.5, abs=1e-6)
+ assert duckdb_results[0]["null_rate"] == pytest.approx(0.5, abs=1e-6)
diff --git a/sdk/python/tests/unit/monitoring/test_metrics_calculator.py b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py
new file mode 100644
index 00000000000..8124531d765
--- /dev/null
+++ b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py
@@ -0,0 +1,289 @@
+import json
+import math
+
+import pyarrow as pa
+import pytest
+
+from feast.monitoring.metrics_calculator import MetricsCalculator
+from feast.monitoring.monitoring_utils import opt_float
+from feast.types import PrimitiveFeastType
+
+
+def _make_calc(bins=20, top_n=10):
+ return MetricsCalculator(histogram_bins=bins, top_n=top_n)
+
+
+class TestClassifyFeature:
+ @pytest.mark.parametrize(
+ "dtype, expected",
+ [
+ (PrimitiveFeastType.INT32, "numeric"),
+ (PrimitiveFeastType.INT64, "numeric"),
+ (PrimitiveFeastType.FLOAT32, "numeric"),
+ (PrimitiveFeastType.FLOAT64, "numeric"),
+ (PrimitiveFeastType.STRING, "categorical"),
+ (PrimitiveFeastType.BOOL, "categorical"),
+ (PrimitiveFeastType.BYTES, None),
+ (PrimitiveFeastType.UNIX_TIMESTAMP, None),
+ ],
+ )
+ def test_classification(self, dtype, expected):
+ assert MetricsCalculator.classify_feature(dtype) == expected
+
+
+class TestComputeNumeric:
+ def test_basic_stats(self):
+ calc = _make_calc()
+ arr = pa.array([1.0, 2.0, 3.0, 4.0, 5.0])
+ result = calc.compute_numeric(arr)
+
+ assert result["feature_type"] == "numeric"
+ assert result["row_count"] == 5
+ assert result["null_count"] == 0
+ assert result["null_rate"] == 0.0
+ assert result["mean"] == pytest.approx(3.0)
+ assert result["min_val"] == 1.0
+ assert result["max_val"] == 5.0
+ assert result["p50"] is not None
+ assert result["histogram"] is not None
+ assert "bins" in result["histogram"]
+ assert "counts" in result["histogram"]
+
+ def test_with_nulls(self):
+ calc = _make_calc()
+ arr = pa.array([1.0, None, 3.0, None, 5.0])
+ result = calc.compute_numeric(arr)
+
+ assert result["row_count"] == 5
+ assert result["null_count"] == 2
+ assert result["null_rate"] == pytest.approx(0.4)
+ assert result["mean"] == pytest.approx(3.0)
+
+ def test_all_nulls(self):
+ calc = _make_calc()
+ arr = pa.array([None, None, None], type=pa.float64())
+ result = calc.compute_numeric(arr)
+
+ assert result["null_count"] == 3
+ assert result["mean"] is None
+ assert result["histogram"] is None
+
+ def test_empty_array(self):
+ calc = _make_calc()
+ arr = pa.array([], type=pa.float64())
+ result = calc.compute_numeric(arr)
+
+ assert result["row_count"] == 0
+ assert result["null_rate"] == 0.0
+
+ def test_single_value(self):
+ calc = _make_calc()
+ arr = pa.array([42.0])
+ result = calc.compute_numeric(arr)
+
+ assert result["mean"] == 42.0
+ assert result["min_val"] == 42.0
+ assert result["max_val"] == 42.0
+ assert result["stddev"] is None # STDDEV_SAMP of 1 value is NaN → None
+
+ def test_histogram_bin_count(self):
+ calc = _make_calc(bins=5)
+ arr = pa.array(list(range(100)), type=pa.float64())
+ result = calc.compute_numeric(arr)
+
+ assert len(result["histogram"]["counts"]) == 5
+ assert len(result["histogram"]["bins"]) == 6
+
+ def test_percentiles_order(self):
+ calc = _make_calc()
+ arr = pa.array(list(range(1000)), type=pa.float64())
+ result = calc.compute_numeric(arr)
+
+ assert result["p50"] <= result["p75"]
+ assert result["p75"] <= result["p90"]
+ assert result["p90"] <= result["p95"]
+ assert result["p95"] <= result["p99"]
+
+
+class TestComputeCategorical:
+ def test_basic(self):
+ calc = _make_calc()
+ arr = pa.array(["a", "b", "a", "c", "a", "b"])
+ result = calc.compute_categorical(arr)
+
+ assert result["feature_type"] == "categorical"
+ assert result["row_count"] == 6
+ assert result["null_count"] == 0
+ assert result["histogram"] is not None
+ assert result["histogram"]["unique_count"] == 3
+
+ top_values = {v["value"] for v in result["histogram"]["values"]}
+ assert "a" in top_values
+
+ def test_with_nulls(self):
+ calc = _make_calc()
+ arr = pa.array(["a", None, "b", None])
+ result = calc.compute_categorical(arr)
+
+ assert result["null_count"] == 2
+ assert result["null_rate"] == 0.5
+
+ def test_high_cardinality(self):
+ calc = _make_calc(top_n=3)
+ arr = pa.array([f"val_{i}" for i in range(100)])
+ result = calc.compute_categorical(arr)
+
+ assert len(result["histogram"]["values"]) == 3
+ assert result["histogram"]["unique_count"] == 100
+ assert result["histogram"]["other_count"] == 97
+
+ def test_all_nulls(self):
+ calc = _make_calc()
+ arr = pa.array([None, None], type=pa.string())
+ result = calc.compute_categorical(arr)
+
+ assert result["null_count"] == 2
+ assert result["histogram"] is None
+
+
+class TestComputeAll:
+ def test_mixed_features(self):
+ calc = _make_calc()
+ table = pa.table(
+ {
+ "age": [25, 30, 35, 40],
+ "city": ["NYC", "LA", "NYC", "SF"],
+ }
+ )
+ fields = [("age", "numeric"), ("city", "categorical")]
+ results = calc.compute_all(table, fields)
+
+ assert len(results) == 2
+ assert results[0]["feature_name"] == "age"
+ assert results[0]["feature_type"] == "numeric"
+ assert results[1]["feature_name"] == "city"
+ assert results[1]["feature_type"] == "categorical"
+
+ def test_missing_column_skipped(self):
+ calc = _make_calc()
+ table = pa.table({"age": [25, 30]})
+ fields = [("age", "numeric"), ("missing_col", "numeric")]
+ results = calc.compute_all(table, fields)
+
+ assert len(results) == 1
+ assert results[0]["feature_name"] == "age"
+
+
+class TestNaNSanitization:
+ """Verify that NaN/Inf values never leak into metric results."""
+
+ def test_opt_float_none(self):
+ assert opt_float(None) is None
+
+ def test_opt_float_normal(self):
+ assert opt_float(3.14) == pytest.approx(3.14)
+
+ def test_opt_float_nan(self):
+ assert opt_float(float("nan")) is None
+
+ def test_opt_float_inf(self):
+ assert opt_float(float("inf")) is None
+
+ def test_opt_float_neg_inf(self):
+ assert opt_float(float("-inf")) is None
+
+ def test_opt_float_zero(self):
+ assert opt_float(0) == 0.0
+
+ def test_opt_float_integer(self):
+ assert opt_float(42) == 42.0
+
+ def test_single_value_stddev_is_none_not_nan(self):
+ """pc.stddev(ddof=1) on a single value returns NaN; we must convert to None."""
+ calc = _make_calc()
+ arr = pa.array([7.0])
+ result = calc.compute_numeric(arr)
+
+ assert result["stddev"] is None
+ assert result["mean"] == pytest.approx(7.0)
+
+ def test_two_values_stddev_is_valid(self):
+ calc = _make_calc()
+ arr = pa.array([4.0, 6.0])
+ result = calc.compute_numeric(arr)
+
+ assert result["stddev"] is not None
+ assert result["stddev"] == pytest.approx(math.sqrt(2.0))
+
+ def test_all_numeric_results_json_serializable(self):
+ """Every field in a numeric result must be JSON-serializable (no NaN/Inf)."""
+ calc = _make_calc(bins=5)
+ for test_data in [
+ [42.0], # single value
+ [1.0, 2.0], # two values
+ [1.0, None, 3.0], # with nulls
+ list(range(100)), # many values
+ ]:
+ arr = pa.array(test_data, type=pa.float64())
+ result = calc.compute_numeric(arr)
+ json.dumps(result) # raises ValueError if NaN/Inf present
+
+ def test_all_categorical_results_json_serializable(self):
+ calc = _make_calc()
+ for test_data in [
+ ["a", "b", "a"],
+ ["x", None, "y"],
+ [None, None],
+ ]:
+ arr = pa.array(test_data, type=pa.string())
+ result = calc.compute_categorical(arr)
+ json.dumps(result)
+
+ def test_sanitize_floats_cleans_nan(self):
+ from feast.monitoring.monitoring_service import _sanitize_floats
+
+ row = {
+ "feature_name": "test",
+ "mean": float("nan"),
+ "stddev": float("inf"),
+ "null_rate": float("-inf"),
+ "min_val": 1.0,
+ "max_val": 10.0,
+ "p50": 5.0,
+ "p75": None,
+ "row_count": 100,
+ }
+ result = _sanitize_floats(row)
+
+ assert result["mean"] is None
+ assert result["stddev"] is None
+ assert result["null_rate"] is None
+ assert result["min_val"] == 1.0
+ assert result["max_val"] == 10.0
+ assert result["p50"] == 5.0
+ assert result["p75"] is None
+ assert result["row_count"] == 100 # non-float fields untouched
+ assert result["feature_name"] == "test"
+ json.dumps(result)
+
+ def test_sanitize_floats_preserves_valid_values(self):
+ from feast.monitoring.monitoring_service import _sanitize_floats
+
+ row = {
+ "mean": 5.5,
+ "stddev": 2.3,
+ "null_rate": 0.0,
+ "min_val": 0.0,
+ "max_val": 10.0,
+ "p50": 5.0,
+ "p75": 7.5,
+ "p90": 9.0,
+ "p95": 9.5,
+ "p99": 9.9,
+ "avg_null_rate": 0.05,
+ "max_null_rate": 0.1,
+ }
+ result = _sanitize_floats(row)
+
+ for key, val in row.items():
+ assert result[key] == val
diff --git a/sdk/python/tests/unit/test_metrics.py b/sdk/python/tests/unit/test_metrics.py
index bffde73dd91..abf2a35e389 100644
--- a/sdk/python/tests/unit/test_metrics.py
+++ b/sdk/python/tests/unit/test_metrics.py
@@ -18,9 +18,14 @@
import pytest
from feast.metrics import (
+ emit_offline_audit_log,
+ emit_online_audit_log,
feature_freshness_seconds,
materialization_duration_seconds,
materialization_result_total,
+ offline_store_request_latency_seconds,
+ offline_store_request_total,
+ offline_store_row_count,
online_features_entity_count,
online_features_request_count,
online_features_status_total,
@@ -42,13 +47,11 @@
)
-@pytest.fixture(autouse=True)
-def _enable_metrics():
- """Enable all metric categories for each test, then restore."""
+def _all_enabled_flags():
+ """Return a _MetricsFlags with every category enabled."""
import feast.metrics as m
- original = m._config
- m._config = m._MetricsFlags(
+ return m._MetricsFlags(
enabled=True,
resource=True,
request=True,
@@ -56,7 +59,18 @@ def _enable_metrics():
push=True,
materialization=True,
freshness=True,
+ offline_features=True,
+ audit_logging=True,
)
+
+
+@pytest.fixture(autouse=True)
+def _enable_metrics():
+ """Enable all metric categories for each test, then restore."""
+ import feast.metrics as m
+
+ original = m._config
+ m._config = _all_enabled_flags()
yield
m._config = original
@@ -1081,3 +1095,640 @@ def test_separate_from_read_transform_metric(self):
assert abs(read_delta - 0.01) < 0.001
assert abs(write_delta - 0.05) < 0.001
+
+
+class TestOfflineStoreMetrics:
+ """Tests for the offline store Prometheus metrics (RED pattern)."""
+
+ def test_request_total_increments_on_success(self):
+ before = offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+
+ offline_store_request_total.labels(method="to_arrow", status="success").inc()
+
+ assert (
+ offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+ == before + 1
+ )
+
+ def test_request_total_increments_on_error(self):
+ before = offline_store_request_total.labels(
+ method="to_arrow", status="error"
+ )._value.get()
+
+ offline_store_request_total.labels(method="to_arrow", status="error").inc()
+
+ assert (
+ offline_store_request_total.labels(
+ method="to_arrow", status="error"
+ )._value.get()
+ == before + 1
+ )
+
+ def test_latency_histogram_records(self):
+ before_sum = offline_store_request_latency_seconds.labels(
+ method="to_arrow"
+ )._sum.get()
+
+ offline_store_request_latency_seconds.labels(method="to_arrow").observe(2.5)
+
+ after_sum = offline_store_request_latency_seconds.labels(
+ method="to_arrow"
+ )._sum.get()
+ assert pytest.approx(after_sum - before_sum, abs=0.01) == 2.5
+
+ def test_row_count_histogram_records(self):
+ before_sum = offline_store_row_count.labels(method="to_arrow")._sum.get()
+
+ offline_store_row_count.labels(method="to_arrow").observe(1000)
+
+ after_sum = offline_store_row_count.labels(method="to_arrow")._sum.get()
+ assert pytest.approx(after_sum - before_sum, abs=1) == 1000
+
+ def test_different_methods_tracked_independently(self):
+ before_a = offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+ before_b = offline_store_request_total.labels(
+ method="other", status="success"
+ )._value.get()
+
+ offline_store_request_total.labels(method="to_arrow", status="success").inc()
+
+ assert (
+ offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+ == before_a + 1
+ )
+ assert (
+ offline_store_request_total.labels(
+ method="other", status="success"
+ )._value.get()
+ == before_b
+ )
+
+
+class TestEmitAuditLogs:
+ """Tests for structured JSON audit log emission."""
+
+ def test_emit_online_audit_log_writes_json(self):
+ import json
+ import logging
+
+ _audit_logger = logging.getLogger("feast.audit")
+ with patch.object(_audit_logger, "info") as mock_info:
+ emit_online_audit_log(
+ requestor_id="user@example.com",
+ entity_keys=["driver_id", "customer_id"],
+ entity_count=10,
+ feature_views=["driver_fv", "order_fv"],
+ feature_count=5,
+ status="success",
+ latency_ms=42.0,
+ )
+
+ mock_info.assert_called_once()
+ logged_json = mock_info.call_args[0][0]
+ record = json.loads(logged_json)
+
+ assert record["event"] == "online_feature_request"
+ assert record["requestor_id"] == "user@example.com"
+ assert record["entity_keys"] == ["driver_id", "customer_id"]
+ assert record["entity_count"] == 10
+ assert record["feature_views"] == ["driver_fv", "order_fv"]
+ assert record["feature_count"] == 5
+ assert record["status"] == "success"
+ assert record["latency_ms"] == pytest.approx(42.0)
+ assert "timestamp" in record
+
+ def test_emit_online_audit_log_noop_when_disabled(self):
+ import logging
+
+ import feast.metrics as m
+
+ m._config = m._MetricsFlags(enabled=True, audit_logging=False)
+ _audit_logger = logging.getLogger("feast.audit")
+ with patch.object(_audit_logger, "info") as mock_info:
+ emit_online_audit_log(
+ requestor_id="user@example.com",
+ entity_keys=["driver_id"],
+ entity_count=1,
+ feature_views=["driver_fv"],
+ feature_count=1,
+ status="success",
+ latency_ms=10.0,
+ )
+ mock_info.assert_not_called()
+
+ def test_emit_offline_audit_log_writes_json(self):
+ import json
+ import logging
+
+ _audit_logger = logging.getLogger("feast.audit")
+ with patch.object(_audit_logger, "info") as mock_info:
+ emit_offline_audit_log(
+ method="to_arrow",
+ feature_views=["driver_fv"],
+ feature_count=3,
+ row_count=500,
+ status="success",
+ start_time="2026-04-27T12:00:00+00:00",
+ end_time="2026-04-27T12:00:01+00:00",
+ duration_ms=1230.0,
+ )
+
+ mock_info.assert_called_once()
+ logged_json = mock_info.call_args[0][0]
+ record = json.loads(logged_json)
+
+ assert record["event"] == "offline_feature_retrieval"
+ assert "timestamp" in record
+ assert record["method"] == "to_arrow"
+ assert record["feature_views"] == ["driver_fv"]
+ assert record["feature_count"] == 3
+ assert record["row_count"] == 500
+ assert record["status"] == "success"
+ assert record["duration_ms"] == pytest.approx(1230.0)
+ assert record["start_time"] == "2026-04-27T12:00:00+00:00"
+ assert record["end_time"] == "2026-04-27T12:00:01+00:00"
+
+ def test_emit_offline_audit_log_noop_when_disabled(self):
+ import logging
+
+ import feast.metrics as m
+
+ m._config = m._MetricsFlags(enabled=True, audit_logging=False)
+ _audit_logger = logging.getLogger("feast.audit")
+ with patch.object(_audit_logger, "info") as mock_info:
+ emit_offline_audit_log(
+ method="to_arrow",
+ feature_views=["fv"],
+ feature_count=1,
+ row_count=10,
+ status="success",
+ start_time="t0",
+ end_time="t1",
+ duration_ms=500.0,
+ )
+ mock_info.assert_not_called()
+
+ def test_emit_online_audit_log_with_error_status(self):
+ import json
+ import logging
+
+ _audit_logger = logging.getLogger("feast.audit")
+ with patch.object(_audit_logger, "info") as mock_info:
+ emit_online_audit_log(
+ requestor_id="unknown",
+ entity_keys=[],
+ entity_count=0,
+ feature_views=[],
+ feature_count=0,
+ status="error",
+ latency_ms=1.0,
+ )
+
+ record = json.loads(mock_info.call_args[0][0])
+ assert record["status"] == "error"
+
+
+class TestBuildMetricsFlagsOfflineAndAudit:
+ """Tests for the new offline_features and audit_logging flags."""
+
+ def test_no_config_defaults_for_new_flags(self):
+ from feast.metrics import build_metrics_flags
+
+ flags = build_metrics_flags(None)
+ assert flags.offline_features is True
+ assert flags.audit_logging is False
+
+ def test_explicit_enable(self):
+ from types import SimpleNamespace
+
+ from feast.metrics import build_metrics_flags
+
+ mc = SimpleNamespace(
+ enabled=True,
+ resource=True,
+ request=True,
+ online_features=True,
+ push=True,
+ materialization=True,
+ freshness=True,
+ offline_features=True,
+ audit_logging=True,
+ )
+ flags = build_metrics_flags(mc)
+ assert flags.offline_features is True
+ assert flags.audit_logging is True
+
+ def test_explicit_disable(self):
+ from types import SimpleNamespace
+
+ from feast.metrics import build_metrics_flags
+
+ mc = SimpleNamespace(
+ enabled=True,
+ resource=True,
+ request=True,
+ online_features=True,
+ push=True,
+ materialization=True,
+ freshness=True,
+ offline_features=False,
+ audit_logging=False,
+ )
+ flags = build_metrics_flags(mc)
+ assert flags.offline_features is False
+ assert flags.audit_logging is False
+
+ def test_missing_new_attrs_fall_back_to_defaults(self):
+ from types import SimpleNamespace
+
+ from feast.metrics import build_metrics_flags
+
+ mc = SimpleNamespace(
+ enabled=True,
+ resource=True,
+ request=True,
+ online_features=True,
+ push=True,
+ materialization=True,
+ freshness=True,
+ )
+ flags = build_metrics_flags(mc)
+ assert flags.offline_features is True
+ assert flags.audit_logging is False
+
+
+class TestExtractRetrievalMetadata:
+ """Tests for _extract_retrieval_metadata helper."""
+
+ def test_extracts_feature_views_and_count(self):
+ from feast.infra.offline_stores.offline_store import (
+ RetrievalMetadata,
+ _extract_retrieval_metadata,
+ )
+
+ job = MagicMock()
+ job.metadata = RetrievalMetadata(
+ features=[
+ "driver_fv:conv_rate",
+ "driver_fv:acc_rate",
+ "vehicle_fv:mileage",
+ ],
+ keys=["driver_id"],
+ )
+
+ fv_names, feat_count = _extract_retrieval_metadata(job)
+ assert feat_count == 3
+ assert set(fv_names) == {"driver_fv", "vehicle_fv"}
+
+ def test_returns_empty_when_no_metadata(self):
+ from feast.infra.offline_stores.offline_store import (
+ _extract_retrieval_metadata,
+ )
+
+ job = MagicMock()
+ job.metadata = None
+
+ fv_names, feat_count = _extract_retrieval_metadata(job)
+ assert fv_names == []
+ assert feat_count == 0
+
+ def test_handles_not_implemented_metadata(self):
+ from feast.infra.offline_stores.offline_store import (
+ _extract_retrieval_metadata,
+ )
+
+ job = MagicMock()
+ type(job).metadata = property(
+ lambda self: (_ for _ in ()).throw(NotImplementedError())
+ )
+
+ fv_names, feat_count = _extract_retrieval_metadata(job)
+ assert fv_names == []
+ assert feat_count == 0
+
+
+class TestRetrievalJobToArrowInstrumentation:
+ """Tests for the metrics/audit instrumentation in RetrievalJob.to_arrow()."""
+
+ def _make_job(
+ self, table, on_demand_fvs=None, metadata=None, raise_on_internal=None
+ ):
+ """Create a concrete RetrievalJob subclass for testing."""
+ from feast.infra.offline_stores.offline_store import RetrievalJob
+
+ class _TestJob(RetrievalJob):
+ def __init__(self):
+ self._table = table
+ self._odfvs = on_demand_fvs or []
+ self._metadata = metadata
+ self._raise = raise_on_internal
+
+ def _to_arrow_internal(self, timeout=None):
+ if self._raise:
+ raise self._raise
+ return self._table
+
+ @property
+ def full_feature_names(self):
+ return False
+
+ @property
+ def on_demand_feature_views(self):
+ return self._odfvs
+
+ @property
+ def metadata(self):
+ return self._metadata
+
+ return _TestJob()
+
+ def test_success_increments_counter_and_records_latency(self):
+ import pyarrow as pa
+
+ table = pa.table({"col": [1, 2, 3]})
+ job = self._make_job(table)
+
+ before_count = offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+ before_latency = offline_store_request_latency_seconds.labels(
+ method="to_arrow"
+ )._sum.get()
+
+ result = job.to_arrow()
+
+ assert result.num_rows == 3
+ assert (
+ offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+ == before_count + 1
+ )
+ assert (
+ offline_store_request_latency_seconds.labels(method="to_arrow")._sum.get()
+ > before_latency
+ )
+
+ def test_error_increments_error_counter(self):
+ job = self._make_job(None, raise_on_internal=RuntimeError("query failed"))
+
+ before_error = offline_store_request_total.labels(
+ method="to_arrow", status="error"
+ )._value.get()
+
+ with pytest.raises(RuntimeError, match="query failed"):
+ job.to_arrow()
+
+ assert (
+ offline_store_request_total.labels(
+ method="to_arrow", status="error"
+ )._value.get()
+ == before_error + 1
+ )
+
+ def test_row_count_recorded_on_success(self):
+ import pyarrow as pa
+
+ table = pa.table({"a": list(range(500))})
+ job = self._make_job(table)
+
+ before_sum = offline_store_row_count.labels(method="to_arrow")._sum.get()
+
+ job.to_arrow()
+
+ assert (
+ offline_store_row_count.labels(method="to_arrow")._sum.get()
+ >= before_sum + 500
+ )
+
+ def test_row_count_recorded_when_zero(self):
+ import pyarrow as pa
+
+ table = pa.table({"a": pa.array([], type=pa.int64())})
+ job = self._make_job(table)
+
+ hist = offline_store_row_count.labels(method="to_arrow")
+ before_bucket = hist._buckets[0].get()
+
+ job.to_arrow()
+
+ assert hist._buckets[0].get() == before_bucket + 1
+
+ def test_metrics_skipped_when_offline_features_disabled(self):
+ import pyarrow as pa
+
+ import feast.metrics as m
+
+ m._config = m._MetricsFlags(
+ enabled=True, offline_features=False, audit_logging=False
+ )
+
+ table = pa.table({"col": [1, 2]})
+ job = self._make_job(table)
+
+ before_count = offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+
+ job.to_arrow()
+
+ assert (
+ offline_store_request_total.labels(
+ method="to_arrow", status="success"
+ )._value.get()
+ == before_count
+ )
+
+ def test_audit_log_emitted_on_success(self):
+ import pyarrow as pa
+
+ from feast.infra.offline_stores.offline_store import RetrievalMetadata
+
+ meta = RetrievalMetadata(
+ features=["driver_fv:conv_rate", "driver_fv:acc_rate"],
+ keys=["driver_id"],
+ )
+ table = pa.table({"col": [1, 2, 3]})
+ job = self._make_job(table, metadata=meta)
+
+ with patch("feast.metrics.emit_offline_audit_log") as mock_audit:
+ job.to_arrow()
+
+ mock_audit.assert_called_once()
+ call_kwargs = mock_audit.call_args[1]
+ assert call_kwargs["method"] == "to_arrow"
+ assert call_kwargs["status"] == "success"
+ assert call_kwargs["row_count"] == 3
+ assert call_kwargs["feature_count"] == 2
+ assert set(call_kwargs["feature_views"]) == {"driver_fv"}
+
+ def test_audit_log_skipped_when_disabled(self):
+ import pyarrow as pa
+
+ import feast.metrics as m
+
+ m._config = m._MetricsFlags(
+ enabled=True, offline_features=True, audit_logging=False
+ )
+
+ table = pa.table({"col": [1]})
+ job = self._make_job(table)
+
+ with patch("feast.metrics.emit_offline_audit_log") as mock_audit:
+ job.to_arrow()
+ mock_audit.assert_not_called()
+
+ def test_instrumentation_failure_does_not_mask_query_error(self):
+ """If metrics code itself throws, the original query error still propagates."""
+ import pyarrow as pa
+
+ table = pa.table({"col": [1]})
+ job = self._make_job(table)
+
+ with patch(
+ "feast.metrics._config",
+ new_callable=lambda: property(
+ lambda self: (_ for _ in ()).throw(RuntimeError("metrics broken"))
+ ),
+ ):
+ result = job.to_arrow()
+ assert result.num_rows == 1
+
+
+class TestParseFeatureInfo:
+ """Tests for _parse_feature_info in feature_server."""
+
+ def test_feature_ref_list(self):
+ from feast.feature_server import _parse_feature_info
+
+ refs = ["driver_fv:conv_rate", "driver_fv:acc_rate", "vehicle_fv:mileage"]
+ fv_names, feat_count = _parse_feature_info(refs)
+ assert feat_count == 3
+ assert set(fv_names) == {"driver_fv", "vehicle_fv"}
+
+ def test_empty_list(self):
+ from feast.feature_server import _parse_feature_info
+
+ fv_names, feat_count = _parse_feature_info([])
+ assert fv_names == []
+ assert feat_count == 0
+
+ def test_feature_service(self):
+ from feast.feature_server import _parse_feature_info
+
+ proj1 = MagicMock()
+ proj1.name = "driver_fv"
+ proj1.features = [MagicMock(), MagicMock()]
+ proj2 = MagicMock()
+ proj2.name = "order_fv"
+ proj2.features = [MagicMock()]
+
+ fs_svc = MagicMock()
+ fs_svc.feature_view_projections = [proj1, proj2]
+
+ from feast.feature_service import FeatureService
+
+ fs_svc.__class__ = FeatureService
+
+ fv_names, feat_count = _parse_feature_info(fs_svc)
+ assert feat_count == 3
+ assert fv_names == ["driver_fv", "order_fv"]
+
+ def test_strips_version_suffix(self):
+ from feast.feature_server import _parse_feature_info
+
+ refs = ["driver_fv@v2:conv_rate"]
+ fv_names, feat_count = _parse_feature_info(refs)
+ assert feat_count == 1
+ assert fv_names == ["driver_fv"]
+
+
+class TestEmitOnlineAudit:
+ """Tests for the _emit_online_audit helper in feature_server."""
+
+ def test_emits_audit_log_with_anonymous_user(self):
+ from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit
+
+ request = GetOnlineFeaturesRequest(
+ entities={"driver_id": [1, 2]},
+ features=["driver_fv:conv_rate"],
+ )
+
+ with (
+ patch("feast.feature_server.feast_metrics") as mock_metrics,
+ patch(
+ "feast.permissions.security_manager.get_security_manager",
+ return_value=None,
+ ),
+ ):
+ _emit_online_audit(
+ request=request,
+ features=request.features,
+ entity_count=2,
+ status="success",
+ latency_ms=15.0,
+ )
+
+ mock_metrics.emit_online_audit_log.assert_called_once()
+ kwargs = mock_metrics.emit_online_audit_log.call_args[1]
+ assert kwargs["requestor_id"] == "anonymous"
+ assert kwargs["entity_keys"] == ["driver_id"]
+ assert kwargs["entity_count"] == 2
+ assert kwargs["status"] == "success"
+
+ def test_emits_audit_log_with_authenticated_user(self):
+ from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit
+
+ request = GetOnlineFeaturesRequest(
+ entities={"driver_id": [1]},
+ features=["driver_fv:conv_rate"],
+ )
+
+ mock_sm = MagicMock()
+ mock_sm.current_user.username = "jdoe"
+
+ with (
+ patch("feast.feature_server.feast_metrics") as mock_metrics,
+ patch(
+ "feast.permissions.security_manager.get_security_manager",
+ return_value=mock_sm,
+ ),
+ ):
+ _emit_online_audit(
+ request=request,
+ features=request.features,
+ entity_count=1,
+ status="success",
+ latency_ms=10.0,
+ )
+
+ kwargs = mock_metrics.emit_online_audit_log.call_args[1]
+ assert kwargs["requestor_id"] == "jdoe"
+
+ def test_does_not_raise_on_failure(self):
+ from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit
+
+ request = GetOnlineFeaturesRequest(
+ entities={"driver_id": [1]},
+ features=["driver_fv:conv_rate"],
+ )
+
+ with patch(
+ "feast.permissions.security_manager.get_security_manager",
+ side_effect=RuntimeError("auth broken"),
+ ):
+ _emit_online_audit(
+ request=request,
+ features=request.features,
+ entity_count=1,
+ status="error",
+ latency_ms=5.0,
+ )
diff --git a/ui/package-lock.json b/ui/package-lock.json
index 1c6ce720e02..2552b4367bc 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "@feast-dev/feast-ui",
- "version": "0.57.0",
+ "version": "0.63.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@feast-dev/feast-ui",
- "version": "0.57.0",
+ "version": "0.63.0",
"license": "Apache-2.0",
"dependencies": {
"@elastic/datemath": "^5.0.3",
@@ -163,7 +163,6 @@
"integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/code-frame": "^7.27.1",
"@babel/generator": "^7.28.5",
@@ -860,7 +859,6 @@
"integrity": "sha512-p9OkPbZ5G7UT1MofwYFigGebnrzGJacoBSQM0/6bi/PUMVE+qlWDD/OalvQKbwgQzU6dl0xAv6r4X7Jme0RYxA==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/helper-plugin-utils": "^7.27.1"
},
@@ -1802,7 +1800,6 @@
"integrity": "sha512-2KH4LWGSrJIkVf5tSiBFYuXDAoWRq2MMwgivCf+93dd0GQi8RXLjKA/0EvRnVV5G0hrHczsquXuD01L8s6dmBw==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/helper-annotate-as-pure": "^7.27.1",
"@babel/helper-module-imports": "^7.27.1",
@@ -2753,7 +2750,6 @@
"resolved": "https://registry.npmjs.org/@emotion/css/-/css-11.13.5.tgz",
"integrity": "sha512-wQdD0Xhkn3Qy2VNcIzbLP9MR8TafI0MJb7BEAXKp+w4+XqErksWR4OXomuDzPsN4InLdGhVe6EYcn2ZIUCpB8w==",
"license": "MIT",
- "peer": true,
"dependencies": {
"@emotion/babel-plugin": "^11.13.5",
"@emotion/cache": "^11.13.5",
@@ -2794,7 +2790,6 @@
"resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.14.0.tgz",
"integrity": "sha512-O000MLDBDdk/EohJPFUqvnp4qnHeYkVP5B0xEG0D/L7cOKP9kefu2DXn8dj74cQfsEzUqh+sr1RzFqiL1o+PpA==",
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/runtime": "^7.18.3",
"@emotion/babel-plugin": "^11.13.5",
@@ -4725,7 +4720,6 @@
"integrity": "sha512-8QqtOQT5ACVlmsvKOJNEaWmRPmcojMOzCz4Hs2BGG/toAp/K38LcsMRyLp349glq5AzJbCEeimEoxaX6v/fLrA==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/core": "^7.21.3",
"@svgr/babel-preset": "8.1.0",
@@ -4834,7 +4828,6 @@
"integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/code-frame": "^7.10.4",
"@babel/runtime": "^7.12.5",
@@ -5524,7 +5517,6 @@
"integrity": "sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@types/linkify-it": "^5",
"@types/mdurl": "^2"
@@ -5632,7 +5624,6 @@
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.27.tgz",
"integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==",
"license": "MIT",
- "peer": true,
"dependencies": {
"@types/prop-types": "*",
"csstype": "^3.2.2"
@@ -5643,7 +5634,6 @@
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.7.tgz",
"integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==",
"license": "MIT",
- "peer": true,
"peerDependencies": {
"@types/react": "^18.0.0"
}
@@ -5833,7 +5823,6 @@
"integrity": "sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@eslint-community/regexpp": "^4.4.0",
"@typescript-eslint/scope-manager": "5.62.0",
@@ -5889,7 +5878,6 @@
"integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==",
"dev": true,
"license": "BSD-2-Clause",
- "peer": true,
"dependencies": {
"@typescript-eslint/scope-manager": "5.62.0",
"@typescript-eslint/types": "5.62.0",
@@ -6289,7 +6277,6 @@
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"dev": true,
"license": "MIT",
- "peer": true,
"bin": {
"acorn": "bin/acorn"
},
@@ -6387,7 +6374,6 @@
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.1",
"fast-json-stable-stringify": "^2.0.0",
@@ -7398,7 +7384,6 @@
}
],
"license": "MIT",
- "peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.8.25",
"caniuse-lite": "^1.0.30001754",
@@ -8677,7 +8662,6 @@
"resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
"license": "ISC",
- "peer": true,
"engines": {
"node": ">=12"
}
@@ -9612,7 +9596,6 @@
"deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.6.1",
@@ -12242,7 +12225,6 @@
"integrity": "sha512-bc4NBHqOqSfRW7POMkHd51LvClaeMXpm8dx0e8oE2GORbq5aRK7Bxl4FyzVLdGtLmvLKL7BTDBG5ACQm4HWjTA==",
"devOptional": true,
"license": "MIT",
- "peer": true,
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/immer"
@@ -13208,7 +13190,6 @@
"integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@jest/core": "^29.7.0",
"@jest/types": "^29.6.3",
@@ -15825,7 +15806,6 @@
"resolved": "https://registry.npmjs.org/moment/-/moment-2.30.1.tgz",
"integrity": "sha512-uEmtNhbDOrWPFS+hdjFCBfy9f2YoyzRpwcl+DqpC6taX21FzsTLQVbMV/W7PzNSX6x/bhC1zA3c2UQ5NzH6how==",
"license": "MIT",
- "peer": true,
"engines": {
"node": "*"
}
@@ -16735,7 +16715,6 @@
}
],
"license": "MIT",
- "peer": true,
"dependencies": {
"nanoid": "^3.3.11",
"picocolors": "^1.1.1",
@@ -17885,7 +17864,6 @@
"integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"cssesc": "^3.0.0",
"util-deprecate": "^1.0.2"
@@ -18162,7 +18140,6 @@
"integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
- "peer": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
@@ -18556,7 +18533,6 @@
"resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
"integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
"license": "MIT",
- "peer": true,
"dependencies": {
"loose-envify": "^1.1.0"
},
@@ -18791,7 +18767,6 @@
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
"integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
"license": "MIT",
- "peer": true,
"dependencies": {
"loose-envify": "^1.1.0",
"scheduler": "^0.23.2"
@@ -18977,7 +18952,6 @@
"integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==",
"dev": true,
"license": "MIT",
- "peer": true,
"engines": {
"node": ">=0.10.0"
}
@@ -19049,7 +19023,6 @@
"resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-6.30.2.tgz",
"integrity": "sha512-l2OwHn3UUnEVUqc6/1VMmR1cvZryZ3j3NzapC2eUXO1dB0sYp5mvwdjiXhpUbRb21eFow3qSxpP8Yv6oAU824Q==",
"license": "MIT",
- "peer": true,
"dependencies": {
"@remix-run/router": "1.23.1",
"react-router": "6.30.2"
@@ -19225,7 +19198,6 @@
"resolved": "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz",
"integrity": "sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==",
"license": "MIT",
- "peer": true,
"dependencies": {
"@babel/runtime": "^7.9.2"
}
@@ -19685,7 +19657,6 @@
"integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==",
"dev": true,
"license": "MIT",
- "peer": true,
"bin": {
"rollup": "dist/bin/rollup"
},
@@ -20207,7 +20178,6 @@
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.3",
"fast-uri": "^3.0.1",
@@ -21874,7 +21844,6 @@
"integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==",
"dev": true,
"license": "(MIT OR CC0-1.0)",
- "peer": true,
"engines": {
"node": ">=10"
},
@@ -21989,7 +21958,6 @@
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz",
"integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==",
"license": "Apache-2.0",
- "peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@@ -22591,7 +22559,6 @@
"integrity": "sha512-HU1JOuV1OavsZ+mfigY0j8d1TgQgbZ6M+J75zDkpEAwYeXjWSqrGJtgnPblJjd/mAyTNQ7ygw0MiKOn6etz8yw==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@types/eslint-scope": "^3.7.7",
"@types/estree": "^1.0.8",
@@ -22672,7 +22639,6 @@
"integrity": "sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"@types/bonjour": "^3.5.9",
"@types/connect-history-api-fallback": "^1.3.5",
@@ -23114,7 +23080,6 @@
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"dev": true,
"license": "MIT",
- "peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.3",
"fast-uri": "^3.0.1",
diff --git a/ui/src/FeastUISansProviders.tsx b/ui/src/FeastUISansProviders.tsx
index 50de27b5944..801229cc40a 100644
--- a/ui/src/FeastUISansProviders.tsx
+++ b/ui/src/FeastUISansProviders.tsx
@@ -26,10 +26,15 @@ import DocumentLabelingPage from "./pages/document-labeling/DocumentLabelingPage
import PermissionsIndex from "./pages/permissions/Index";
import LineageIndex from "./pages/lineage/Index";
import NoProjectGuard from "./components/NoProjectGuard";
+import MonitoringIndex from "./pages/monitoring/Index";
+import FeatureMetricsDetail from "./pages/monitoring/FeatureMetricsDetail";
import TabsRegistryContext, {
FeastTabsRegistryInterface,
} from "./custom-tabs/TabsRegistryContext";
+import MonitoringContext, {
+ MonitoringConfig,
+} from "./contexts/MonitoringContext";
import CurlGeneratorTab from "./pages/feature-views/CurlGeneratorTab";
import FeatureFlagsContext, {
FeatureFlags,
@@ -46,6 +51,7 @@ interface FeastUIConfigs {
featureFlags?: FeatureFlags;
projectListPromise?: Promise;
fetchOptions?: FetchOptions;
+ monitoringConfig?: MonitoringConfig;
}
const defaultProjectListPromise = (basename: string) => {
@@ -134,13 +140,21 @@ const FeastUISansProvidersInner = ({
feastUIConfigs?.tabsRegistry?.DatasetCustomTabs || [],
}}
>
-
-
-
- }>
- } />
+
+
+ }>
+ } />
+ }>
+ } />
+ } />
}
@@ -181,28 +195,33 @@ const FeastUISansProvidersInner = ({
element={ }
/>
- } />
- }
- />
- }
- />
- }
- />
- } />
-
+ } />
+ }
+ />
+ }
+ />
+ } />
+ } />
+ }
+ />
+ }
+ />
- } />
-
-
-
-
-
+
+ } />
+
+
+
+
+
);
diff --git a/ui/src/contexts/MonitoringContext.ts b/ui/src/contexts/MonitoringContext.ts
new file mode 100644
index 00000000000..985f00080e9
--- /dev/null
+++ b/ui/src/contexts/MonitoringContext.ts
@@ -0,0 +1,14 @@
+import React from "react";
+
+interface MonitoringConfig {
+ apiBaseUrl: string;
+ enabled: boolean;
+}
+
+const MonitoringContext = React.createContext({
+ apiBaseUrl: "/api/v1",
+ enabled: true,
+});
+
+export default MonitoringContext;
+export type { MonitoringConfig };
diff --git a/ui/src/pages/Sidebar.tsx b/ui/src/pages/Sidebar.tsx
index cf3d64a6816..b5590ffac0c 100644
--- a/ui/src/pages/Sidebar.tsx
+++ b/ui/src/pages/Sidebar.tsx
@@ -156,6 +156,15 @@ const SideNav = () => {
renderItem: (props) => ,
isSelected: useMatchSubpath(`${baseUrl}/data-set`),
},
+ {
+ name: "Monitoring",
+ id: htmlIdGenerator("monitoring")(),
+ icon: ,
+ renderItem: (props) => (
+
+ ),
+ isSelected: useMatchSubpath(`${baseUrl}/monitoring`),
+ },
{
name: "Data Labeling",
id: htmlIdGenerator("dataLabeling")(),
diff --git a/ui/src/pages/features/FeatureInstance.tsx b/ui/src/pages/features/FeatureInstance.tsx
index fe81c6e619f..aa73db7c8c1 100644
--- a/ui/src/pages/features/FeatureInstance.tsx
+++ b/ui/src/pages/features/FeatureInstance.tsx
@@ -3,8 +3,9 @@ import { Route, Routes, useNavigate, useParams } from "react-router-dom";
import { EuiPageTemplate } from "@elastic/eui";
import { FeatureIcon } from "../../graphics/FeatureIcon";
-import { useMatchExact } from "../../hooks/useMatchSubpath";
+import { useMatchExact, useMatchSubpath } from "../../hooks/useMatchSubpath";
import FeatureOverviewTab from "./FeatureOverviewTab";
+import FeatureMonitoringTab from "./FeatureMonitoringTab";
import { useDocumentTitle } from "../../hooks/useDocumentTitle";
import {
useFeatureCustomTabs,
@@ -34,12 +35,20 @@ const FeatureInstance = () => {
navigate("");
},
},
+ {
+ label: "Monitoring",
+ isSelected: useMatchSubpath("monitoring"),
+ onClick: () => {
+ navigate("monitoring");
+ },
+ },
...customNavigationTabs,
]}
/>
} />
+ } />
{CustomTabRoutes}
diff --git a/ui/src/pages/features/FeatureMonitoringTab.tsx b/ui/src/pages/features/FeatureMonitoringTab.tsx
new file mode 100644
index 00000000000..fdf7b38bc86
--- /dev/null
+++ b/ui/src/pages/features/FeatureMonitoringTab.tsx
@@ -0,0 +1,122 @@
+import React from "react";
+import { useParams } from "react-router-dom";
+import {
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiSpacer,
+ EuiSkeletonText,
+ EuiEmptyPrompt,
+ EuiButton,
+} from "@elastic/eui";
+import {
+ useFeatureMetrics,
+ useBaselineMetrics,
+} from "../../queries/useMonitoringApi";
+import type {
+ NumericHistogram,
+ CategoricalHistogram,
+} from "../../queries/useMonitoringApi";
+import {
+ NumericHistogramChart,
+ CategoricalHistogramChart,
+} from "../monitoring/components/HistogramChart";
+import StatsPanel from "../monitoring/components/StatsPanel";
+
+const FeatureMonitoringTab = () => {
+ const { projectName, FeatureViewName, FeatureName } = useParams();
+
+ const {
+ data: metrics,
+ isLoading,
+ isError,
+ } = useFeatureMetrics({
+ project: projectName || "",
+ feature_view_name: FeatureViewName,
+ feature_name: FeatureName,
+ });
+
+ const { data: baselineMetrics } = useBaselineMetrics(
+ projectName || "",
+ FeatureViewName,
+ FeatureName,
+ );
+
+ if (isLoading) {
+ return ;
+ }
+
+ const latestMetric = (() => {
+ if (!metrics || metrics.length === 0) return null;
+ const withData = metrics.filter((m) => m.row_count > 0);
+ const candidates = withData.length > 0 ? withData : metrics;
+ return candidates.reduce((a, b) =>
+ a.metric_date > b.metric_date ? a : b,
+ );
+ })();
+
+ const baselineMetric =
+ baselineMetrics && baselineMetrics.length > 0
+ ? baselineMetrics[0]
+ : null;
+
+ if (isError || !latestMetric) {
+ return (
+ No Monitoring Data}
+ body={
+
+ No monitoring metrics available for this feature. Run a
+ monitoring compute job to generate data quality metrics.
+
+ }
+ actions={
+
+ Go to Monitoring
+
+ }
+ />
+ );
+ }
+
+ const isNumeric = latestMetric.feature_type === "numeric";
+
+ return (
+ <>
+
+
+ {isNumeric && latestMetric.histogram && (
+
+ )}
+ {!isNumeric && latestMetric.histogram && (
+
+ )}
+ {!latestMetric.histogram && (
+ No Histogram}
+ body={Histogram data is not available.
}
+ />
+ )}
+
+
+
+
+
+
+ >
+ );
+};
+
+export default FeatureMonitoringTab;
diff --git a/ui/src/pages/monitoring/FeatureMetricsDetail.tsx b/ui/src/pages/monitoring/FeatureMetricsDetail.tsx
new file mode 100644
index 00000000000..7ace799742b
--- /dev/null
+++ b/ui/src/pages/monitoring/FeatureMetricsDetail.tsx
@@ -0,0 +1,249 @@
+import React from "react";
+import { useParams, useNavigate } from "react-router-dom";
+import {
+ EuiPageTemplate,
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiSpacer,
+ EuiSkeletonText,
+ EuiEmptyPrompt,
+ EuiButton,
+ EuiBreadcrumbs,
+} from "@elastic/eui";
+import { FeatureIcon } from "../../graphics/FeatureIcon";
+import {
+ useFeatureMetrics,
+ useBaselineMetrics,
+} from "../../queries/useMonitoringApi";
+import type {
+ NumericHistogram,
+ CategoricalHistogram,
+} from "../../queries/useMonitoringApi";
+import {
+ NumericHistogramChart,
+ CategoricalHistogramChart,
+} from "./components/HistogramChart";
+import StatsPanel from "./components/StatsPanel";
+import { useDocumentTitle } from "../../hooks/useDocumentTitle";
+
+const FeatureMetricsDetail = () => {
+ const { projectName, featureViewName, featureName } = useParams();
+ const navigate = useNavigate();
+
+ useDocumentTitle(
+ `${featureName} Monitoring | ${featureViewName} | Feast`,
+ );
+
+ const {
+ data: metrics,
+ isLoading,
+ isError,
+ } = useFeatureMetrics({
+ project: projectName || "",
+ feature_view_name: featureViewName,
+ feature_name: featureName,
+ });
+
+ const { data: baselineMetrics } = useBaselineMetrics(
+ projectName || "",
+ featureViewName,
+ featureName,
+ );
+
+ const latestMetric = (() => {
+ if (!metrics || metrics.length === 0) return null;
+ const withData = metrics.filter((m) => m.row_count > 0);
+ const candidates = withData.length > 0 ? withData : metrics;
+ return candidates.reduce((a, b) =>
+ a.metric_date > b.metric_date ? a : b,
+ );
+ })();
+
+ const baselineMetric =
+ baselineMetrics && baselineMetrics.length > 0
+ ? baselineMetrics[0]
+ : null;
+
+ const breadcrumbs = [
+ {
+ text: "Monitoring",
+ onClick: () => navigate(`/p/${projectName}/monitoring`),
+ },
+ {
+ text: featureViewName || "",
+ },
+ {
+ text: featureName || "",
+ },
+ ];
+
+ if (isLoading) {
+ return (
+
+
+
+
+
+ );
+ }
+
+ if (isError || !latestMetric) {
+ return (
+
+
+
+
+ No Metrics Available}
+ body={
+
+ No monitoring metrics found for feature{" "}
+ {featureName} in feature view{" "}
+ {featureViewName} . Run a monitoring
+ compute job first.
+
+ }
+ actions={
+ navigate(`/p/${projectName}/monitoring`)}
+ >
+ Back to Monitoring
+
+ }
+ />
+
+
+ );
+ }
+
+ const isNumeric = latestMetric.feature_type === "numeric";
+
+ return (
+
+ navigate(`/p/${projectName}/monitoring`)}
+ >
+ Back to Monitoring
+ ,
+ ]}
+ />
+
+
+
+
+
+
+ {isNumeric && latestMetric.histogram && (
+
+ )}
+ {!isNumeric && latestMetric.histogram && (
+
+ )}
+ {!latestMetric.histogram && (
+ No Histogram Data}
+ body={Histogram data is not available for this metric.
}
+ />
+ )}
+
+
+
+
+
+
+
+ {metrics && metrics.length > 1 && (
+ <>
+
+
+ >
+ )}
+
+
+ );
+};
+
+const NullRateTimeline = ({
+ metrics,
+}: {
+ metrics: { metric_date: string; null_rate: number }[];
+}) => {
+ const sorted = [...metrics].sort(
+ (a, b) => a.metric_date.localeCompare(b.metric_date),
+ );
+ const maxRate = Math.max(...sorted.map((m) => m.null_rate), 0.01);
+ const chartWidth = Math.max(sorted.length * 50, 200);
+ const chartHeight = 80;
+
+ const points = sorted.map((m, i) => {
+ const x = (i / Math.max(sorted.length - 1, 1)) * (chartWidth - 20) + 10;
+ const y = chartHeight - (m.null_rate / maxRate) * (chartHeight - 10);
+ return { x, y, ...m };
+ });
+
+ const polyline = points.map((p) => `${p.x},${p.y}`).join(" ");
+
+ return (
+
+
+ Null Rate Over Time
+
+
+
+ {points.map((p, i) => (
+
+ ))}
+
+ {points.length > 0 && (
+ <>
+
+ {points[0].metric_date}
+
+
+ {points[points.length - 1].metric_date}
+
+ >
+ )}
+
+
+ );
+};
+
+export default FeatureMetricsDetail;
diff --git a/ui/src/pages/monitoring/FeatureMetricsTable.tsx b/ui/src/pages/monitoring/FeatureMetricsTable.tsx
new file mode 100644
index 00000000000..d0a2e4e9573
--- /dev/null
+++ b/ui/src/pages/monitoring/FeatureMetricsTable.tsx
@@ -0,0 +1,296 @@
+import React, { useState, useMemo, useEffect } from "react";
+import {
+ EuiBasicTable,
+ EuiBasicTableColumn,
+ EuiBadge,
+ EuiHealth,
+ EuiLink,
+ EuiProgress,
+ EuiToolTip,
+ Criteria,
+} from "@elastic/eui";
+import type {
+ FeatureMetric,
+ NumericHistogram,
+ CategoricalHistogram,
+} from "../../queries/useMonitoringApi";
+
+const healthColor = (nullRate: number): string => {
+ if (nullRate >= 0.5) return "danger";
+ if (nullRate >= 0.1) return "warning";
+ return "success";
+};
+
+const healthLabel = (nullRate: number): string => {
+ if (nullRate >= 0.5) return "High null rate";
+ if (nullRate >= 0.1) return "Moderate null rate";
+ return "Healthy";
+};
+
+const formatNum = (val: number | null, decimals = 2): string => {
+ if (val === null || val === undefined) return "—";
+ if (Number.isInteger(val)) return val.toLocaleString();
+ return val.toFixed(decimals);
+};
+
+const MiniHistogram = ({ metric }: { metric: FeatureMetric }) => {
+ if (!metric.histogram) return — ;
+
+ const width = 120;
+ const height = 28;
+
+ if (metric.feature_type === "numeric") {
+ const hist = metric.histogram as NumericHistogram;
+ const maxCount = Math.max(...hist.counts, 1);
+ const barW = Math.max(Math.floor(width / hist.counts.length) - 1, 2);
+
+ return (
+
+
+ {hist.counts.map((count, i) => {
+ const h = (count / maxCount) * (height - 2);
+ return (
+
+ );
+ })}
+
+
+ );
+ }
+
+ const hist = metric.histogram as CategoricalHistogram;
+ const maxCount = Math.max(...hist.values.map((v) => v.count), 1);
+ const barW = Math.max(
+ Math.floor(width / Math.min(hist.values.length, 10)) - 1,
+ 6,
+ );
+
+ return (
+
+
+ {hist.values.slice(0, 10).map((v, i) => {
+ const h = (v.count / maxCount) * (height - 2);
+ return (
+
+ );
+ })}
+
+
+ );
+};
+
+interface FeatureMetricsTableProps {
+ metrics: FeatureMetric[];
+ isLoading: boolean;
+ onFeatureClick: (fvName: string, featureName: string) => void;
+}
+
+const PAGE_SIZE_OPTIONS = [10, 20, 50];
+
+const FeatureMetricsTable = ({
+ metrics,
+ isLoading,
+ onFeatureClick,
+}: FeatureMetricsTableProps) => {
+ const [sortField, setSortField] =
+ useState("feature_view_name");
+ const [sortDirection, setSortDirection] = useState<"asc" | "desc">("asc");
+ const [pageIndex, setPageIndex] = useState(0);
+ const [pageSize, setPageSize] = useState(20);
+
+ useEffect(() => {
+ setPageIndex(0);
+ }, [metrics]);
+
+ const latestMetrics = useMemo(() => {
+ const byKey = new Map();
+ for (const m of metrics) {
+ const key = `${m.feature_view_name}::${m.feature_name}`;
+ const existing = byKey.get(key);
+ if (!existing) {
+ byKey.set(key, m);
+ } else {
+ const preferNew =
+ m.row_count > 0 && existing.row_count === 0
+ ? true
+ : existing.row_count > 0 && m.row_count === 0
+ ? false
+ : m.metric_date > existing.metric_date;
+ if (preferNew) byKey.set(key, m);
+ }
+ }
+ return Array.from(byKey.values());
+ }, [metrics]);
+
+ const sortedItems = useMemo(() => {
+ return [...latestMetrics].sort((a, b) => {
+ const aVal = a[sortField];
+ const bVal = b[sortField];
+ if (aVal == null && bVal == null) return 0;
+ if (aVal == null) return 1;
+ if (bVal == null) return -1;
+ if (aVal < bVal) return sortDirection === "asc" ? -1 : 1;
+ if (aVal > bVal) return sortDirection === "asc" ? 1 : -1;
+ return 0;
+ });
+ }, [latestMetrics, sortField, sortDirection]);
+
+ const pageOfItems = useMemo(() => {
+ const start = pageIndex * pageSize;
+ return sortedItems.slice(start, start + pageSize);
+ }, [sortedItems, pageIndex, pageSize]);
+
+ const pagination = useMemo(
+ () => ({
+ pageIndex,
+ pageSize,
+ totalItemCount: sortedItems.length,
+ pageSizeOptions: PAGE_SIZE_OPTIONS,
+ }),
+ [pageIndex, pageSize, sortedItems.length],
+ );
+
+ const onTableChange = ({ sort, page }: Criteria) => {
+ if (sort) {
+ setSortField(sort.field as keyof FeatureMetric);
+ setSortDirection(sort.direction);
+ }
+ if (page) {
+ setPageIndex(page.index);
+ setPageSize(page.size);
+ }
+ };
+
+ const columns: EuiBasicTableColumn[] = [
+ {
+ field: "feature_name",
+ name: "Feature",
+ sortable: true,
+ render: (name: string, item: FeatureMetric) => (
+ onFeatureClick(item.feature_view_name, name)}
+ >
+ {name}
+
+ ),
+ },
+ {
+ field: "feature_view_name",
+ name: "Feature View",
+ sortable: true,
+ },
+ {
+ field: "feature_type",
+ name: "Type",
+ sortable: true,
+ width: "100px",
+ render: (type: string) => (
+
+ {type}
+
+ ),
+ },
+ {
+ name: "Distribution",
+ width: "140px",
+ render: (item: FeatureMetric) => ,
+ },
+ {
+ field: "row_count",
+ name: "Rows",
+ sortable: true,
+ width: "90px",
+ render: (val: number) => formatNum(val, 0),
+ },
+ {
+ field: "null_rate",
+ name: "Null Rate",
+ sortable: true,
+ width: "150px",
+ render: (val: number) => (
+
+
+ {(val * 100).toFixed(1)}%
+
+ ),
+ },
+ {
+ field: "null_rate",
+ name: "Health",
+ width: "130px",
+ render: (val: number) => (
+ {healthLabel(val)}
+ ),
+ },
+ {
+ field: "mean",
+ name: "Mean",
+ sortable: true,
+ width: "100px",
+ render: (val: number | null) => formatNum(val),
+ },
+ {
+ field: "stddev",
+ name: "Std Dev",
+ sortable: true,
+ width: "100px",
+ render: (val: number | null) => formatNum(val),
+ },
+ {
+ field: "data_source_type",
+ name: "Source",
+ width: "80px",
+ render: (val: string) => {val} ,
+ },
+ ];
+
+ return (
+ ({
+ "data-test-subj": `row-${item.feature_name}`,
+ })}
+ noItemsMessage={
+ isLoading
+ ? "Loading metrics..."
+ : "No metrics found. Run a monitoring compute job to generate metrics."
+ }
+ />
+ );
+};
+
+export default FeatureMetricsTable;
diff --git a/ui/src/pages/monitoring/FeatureServiceMetricsPanel.tsx b/ui/src/pages/monitoring/FeatureServiceMetricsPanel.tsx
new file mode 100644
index 00000000000..c3fa61b25a8
--- /dev/null
+++ b/ui/src/pages/monitoring/FeatureServiceMetricsPanel.tsx
@@ -0,0 +1,224 @@
+import React, { useState, useMemo } from "react";
+import {
+ EuiPanel,
+ EuiTitle,
+ EuiSpacer,
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiStat,
+ EuiBasicTable,
+ EuiBasicTableColumn,
+ EuiProgress,
+ EuiBadge,
+ EuiSkeletonText,
+ Criteria,
+} from "@elastic/eui";
+import type { FeatureServiceMetric } from "../../queries/useMonitoringApi";
+
+const healthColor = (nullRate: number): string => {
+ if (nullRate >= 0.5) return "danger";
+ if (nullRate >= 0.1) return "warning";
+ return "success";
+};
+
+interface FeatureServiceMetricsPanelProps {
+ metrics: FeatureServiceMetric[];
+ isLoading: boolean;
+}
+
+const FeatureServiceMetricsPanel = ({
+ metrics,
+ isLoading,
+}: FeatureServiceMetricsPanelProps) => {
+ if (isLoading) {
+ return (
+
+
+ Feature Service Metrics
+
+
+
+
+ );
+ }
+
+ const latestByFS = new Map();
+ for (const m of metrics) {
+ const existing = latestByFS.get(m.feature_service_name);
+ if (!existing || m.metric_date > existing.metric_date) {
+ latestByFS.set(m.feature_service_name, m);
+ }
+ }
+ const latestMetrics = Array.from(latestByFS.values());
+
+ const totalViews = latestMetrics.reduce(
+ (sum, m) => sum + (m.total_feature_views || 0),
+ 0,
+ );
+ const totalFeatures = latestMetrics.reduce(
+ (sum, m) => sum + (m.total_features || 0),
+ 0,
+ );
+ const avgNullRate =
+ latestMetrics.length > 0
+ ? latestMetrics.reduce((sum, m) => sum + (m.avg_null_rate || 0), 0) /
+ latestMetrics.length
+ : 0;
+
+ const columns: EuiBasicTableColumn[] = [
+ {
+ field: "feature_service_name",
+ name: "Feature Service",
+ sortable: true,
+ },
+ {
+ field: "total_feature_views",
+ name: "Feature Views",
+ sortable: true,
+ width: "110px",
+ },
+ {
+ field: "total_features",
+ name: "Features",
+ sortable: true,
+ width: "80px",
+ },
+ {
+ field: "avg_null_rate",
+ name: "Avg Null Rate",
+ sortable: true,
+ render: (val: number) => (
+
+
+ {((val || 0) * 100).toFixed(1)}%
+
+ ),
+ },
+ {
+ field: "max_null_rate",
+ name: "Max Null Rate",
+ sortable: true,
+ width: "110px",
+ render: (val: number) => `${((val || 0) * 100).toFixed(1)}%`,
+ },
+ {
+ field: "metric_date",
+ name: "Date",
+ sortable: true,
+ width: "110px",
+ },
+ {
+ field: "data_source_type",
+ name: "Source",
+ width: "80px",
+ render: (val: string) => (
+ {val}
+ ),
+ },
+ ];
+
+ return (
+
+
+ Feature Service Metrics
+
+
+ Aggregated data quality metrics across feature services.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {latestMetrics.length > 0 && (
+
+ )}
+
+ );
+};
+
+const SortableFSTable = ({
+ items,
+ columns,
+}: {
+ items: FeatureServiceMetric[];
+ columns: EuiBasicTableColumn[];
+}) => {
+ const [sortField, setSortField] = useState("feature_service_name");
+ const [sortDirection, setSortDirection] = useState<"asc" | "desc">("asc");
+
+ const sortedItems = useMemo(() => {
+ return [...items].sort((a, b) => {
+ const aVal = (a as any)[sortField];
+ const bVal = (b as any)[sortField];
+ if (aVal == null && bVal == null) return 0;
+ if (aVal == null) return 1;
+ if (bVal == null) return -1;
+ if (aVal < bVal) return sortDirection === "asc" ? -1 : 1;
+ if (aVal > bVal) return sortDirection === "asc" ? 1 : -1;
+ return 0;
+ });
+ }, [items, sortField, sortDirection]);
+
+ const onTableChange = ({ sort }: Criteria) => {
+ if (sort) {
+ setSortField(sort.field as string);
+ setSortDirection(sort.direction);
+ }
+ };
+
+ return (
+
+ );
+};
+
+export default FeatureServiceMetricsPanel;
diff --git a/ui/src/pages/monitoring/FeatureViewMetricsPanel.tsx b/ui/src/pages/monitoring/FeatureViewMetricsPanel.tsx
new file mode 100644
index 00000000000..267f1292fe1
--- /dev/null
+++ b/ui/src/pages/monitoring/FeatureViewMetricsPanel.tsx
@@ -0,0 +1,240 @@
+import React, { useState, useMemo } from "react";
+import {
+ EuiPanel,
+ EuiTitle,
+ EuiSpacer,
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiStat,
+ EuiBasicTable,
+ EuiBasicTableColumn,
+ EuiProgress,
+ EuiBadge,
+ EuiSkeletonText,
+ Criteria,
+} from "@elastic/eui";
+import type { FeatureViewMetric } from "../../queries/useMonitoringApi";
+
+const healthColor = (nullRate: number): string => {
+ if (nullRate >= 0.5) return "danger";
+ if (nullRate >= 0.1) return "warning";
+ return "success";
+};
+
+interface FeatureViewMetricsPanelProps {
+ metrics: FeatureViewMetric[];
+ isLoading: boolean;
+ title: string;
+ description?: string;
+}
+
+const FeatureViewMetricsPanel = ({
+ metrics,
+ isLoading,
+ title,
+ description,
+}: FeatureViewMetricsPanelProps) => {
+ if (isLoading) {
+ return (
+
+
+ {title}
+
+
+
+
+ );
+ }
+
+ const latestByFV = new Map();
+ for (const m of metrics) {
+ const existing = latestByFV.get(m.feature_view_name);
+ if (!existing || m.metric_date > existing.metric_date) {
+ latestByFV.set(m.feature_view_name, m);
+ }
+ }
+ const latestMetrics = Array.from(latestByFV.values());
+
+ const totalRows = latestMetrics.reduce(
+ (sum, m) => sum + (m.total_row_count || 0),
+ 0,
+ );
+ const totalFeatures = latestMetrics.reduce(
+ (sum, m) => sum + (m.total_features || 0),
+ 0,
+ );
+ const avgNullRate =
+ latestMetrics.length > 0
+ ? latestMetrics.reduce((sum, m) => sum + (m.avg_null_rate || 0), 0) /
+ latestMetrics.length
+ : 0;
+ const healthyViews = latestMetrics.filter(
+ (m) => m.avg_null_rate < 0.1,
+ ).length;
+
+ const columns: EuiBasicTableColumn[] = [
+ {
+ field: "feature_view_name",
+ name: "Feature View",
+ sortable: true,
+ },
+ {
+ field: "total_row_count",
+ name: "Total Rows",
+ sortable: true,
+ render: (val: number) => (val || 0).toLocaleString(),
+ },
+ {
+ field: "total_features",
+ name: "Features",
+ sortable: true,
+ width: "80px",
+ },
+ {
+ field: "features_with_nulls",
+ name: "With Nulls",
+ sortable: true,
+ width: "90px",
+ },
+ {
+ field: "avg_null_rate",
+ name: "Avg Null Rate",
+ sortable: true,
+ render: (val: number) => (
+
+
+ {((val || 0) * 100).toFixed(1)}%
+
+ ),
+ },
+ {
+ field: "max_null_rate",
+ name: "Max Null Rate",
+ sortable: true,
+ width: "110px",
+ render: (val: number) => `${((val || 0) * 100).toFixed(1)}%`,
+ },
+ {
+ field: "metric_date",
+ name: "Date",
+ sortable: true,
+ width: "110px",
+ },
+ {
+ field: "data_source_type",
+ name: "Source",
+ width: "80px",
+ render: (val: string) => (
+ {val}
+ ),
+ },
+ ];
+
+ return (
+
+
+ {title}
+
+ {description && (
+
+ {description}
+
+ )}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {latestMetrics.length > 0 && (
+
+ )}
+
+ );
+};
+
+const SortableTable = ({
+ items,
+ columns,
+}: {
+ items: FeatureViewMetric[];
+ columns: EuiBasicTableColumn[];
+}) => {
+ const [sortField, setSortField] = useState("feature_view_name");
+ const [sortDirection, setSortDirection] = useState<"asc" | "desc">("asc");
+
+ const sortedItems = useMemo(() => {
+ return [...items].sort((a, b) => {
+ const aVal = (a as any)[sortField];
+ const bVal = (b as any)[sortField];
+ if (aVal == null && bVal == null) return 0;
+ if (aVal == null) return 1;
+ if (bVal == null) return -1;
+ if (aVal < bVal) return sortDirection === "asc" ? -1 : 1;
+ if (aVal > bVal) return sortDirection === "asc" ? 1 : -1;
+ return 0;
+ });
+ }, [items, sortField, sortDirection]);
+
+ const onTableChange = ({ sort }: Criteria) => {
+ if (sort) {
+ setSortField(sort.field as string);
+ setSortDirection(sort.direction);
+ }
+ };
+
+ return (
+
+ );
+};
+
+export default FeatureViewMetricsPanel;
diff --git a/ui/src/pages/monitoring/Index.tsx b/ui/src/pages/monitoring/Index.tsx
new file mode 100644
index 00000000000..1af792b119b
--- /dev/null
+++ b/ui/src/pages/monitoring/Index.tsx
@@ -0,0 +1,265 @@
+import React, { useState, useContext, useMemo } from "react";
+import { useParams, useNavigate } from "react-router-dom";
+import {
+ EuiPageTemplate,
+ EuiSpacer,
+ EuiTabbedContent,
+ EuiTabbedContentTab,
+ EuiEmptyPrompt,
+ EuiButton,
+ EuiCallOut,
+} from "@elastic/eui";
+
+import { useDocumentTitle } from "../../hooks/useDocumentTitle";
+import useLoadRegistry from "../../queries/useLoadRegistry";
+import RegistryPathContext from "../../contexts/RegistryPathContext";
+import {
+ useFeatureMetrics,
+ useFeatureViewMetrics,
+ useFeatureServiceMetrics,
+ useComputeMetrics,
+} from "../../queries/useMonitoringApi";
+import FeatureMetricsTable from "./FeatureMetricsTable";
+import FeatureViewMetricsPanel from "./FeatureViewMetricsPanel";
+import FeatureServiceMetricsPanel from "./FeatureServiceMetricsPanel";
+import MetricsFilters from "./components/MetricsFilters";
+
+const MonitoringIndex = () => {
+ useDocumentTitle("Monitoring | Feast");
+
+ const { projectName } = useParams();
+ const navigate = useNavigate();
+ const registryUrl = useContext(RegistryPathContext);
+ const { data: registryData } = useLoadRegistry(registryUrl, projectName);
+
+ const [selectedFV, setSelectedFV] = useState("");
+ const [granularity, setGranularity] = useState("");
+ const [dataSourceType, setDataSourceType] = useState("");
+ const [startDate, setStartDate] = useState("");
+ const [endDate, setEndDate] = useState("");
+
+ const filters = useMemo(
+ () => ({
+ project: projectName || "",
+ feature_view_name: selectedFV || undefined,
+ granularity: granularity || undefined,
+ data_source_type: dataSourceType || undefined,
+ start_date: startDate || undefined,
+ end_date: endDate || undefined,
+ }),
+ [projectName, selectedFV, granularity, dataSourceType, startDate, endDate],
+ );
+
+ const featureQuery = useFeatureMetrics(filters);
+ const fvQuery = useFeatureViewMetrics(filters);
+ const fsQuery = useFeatureServiceMetrics({
+ project: projectName || "",
+ granularity: granularity || undefined,
+ data_source_type: dataSourceType || undefined,
+ start_date: startDate || undefined,
+ end_date: endDate || undefined,
+ });
+ const computeMutation = useComputeMetrics();
+
+ const featureViews = useMemo(() => {
+ if (!registryData?.mergedFVList) return [];
+ return registryData.mergedFVList.map((fv: any) => fv.name as string);
+ }, [registryData]);
+
+ const handleFeatureClick = (fvName: string, featureName: string) => {
+ navigate(
+ `/p/${projectName}/monitoring/feature/${fvName}/${featureName}`,
+ );
+ };
+
+ const uniqueFeatureCount = useMemo(() => {
+ if (!featureQuery.data) return 0;
+ const seen = new Set();
+ for (const m of featureQuery.data) {
+ seen.add(`${m.feature_view_name}::${m.feature_name}`);
+ }
+ return seen.size;
+ }, [featureQuery.data]);
+
+ const handleRefresh = () => {
+ featureQuery.refetch();
+ fvQuery.refetch();
+ fsQuery.refetch();
+ };
+
+ const handleCompute = () => {
+ computeMutation.mutate({
+ project: projectName || "",
+ feature_view_name: selectedFV || undefined,
+ });
+ };
+
+ const hasError =
+ featureQuery.isError && fvQuery.isError && fsQuery.isError;
+ const hasData =
+ (featureQuery.data && featureQuery.data.length > 0) ||
+ (fvQuery.data && fvQuery.data.length > 0);
+
+ const tabs: EuiTabbedContentTab[] = [
+ {
+ id: "features",
+ name: `Features${uniqueFeatureCount > 0 ? ` (${uniqueFeatureCount})` : ""}`,
+ content: (
+ <>
+
+
+ >
+ ),
+ },
+ {
+ id: "feature-views",
+ name: "Feature Views",
+ content: (
+ <>
+
+
+ >
+ ),
+ },
+ {
+ id: "feature-services",
+ name: "Feature Services",
+ content: (
+ <>
+
+
+ >
+ ),
+ },
+ ];
+
+ return (
+
+
+ Compute Metrics
+ ,
+ ]}
+ />
+
+ {hasError && (
+ <>
+
+
+ Could not connect to the monitoring API. Make sure the Feast
+ registry server is running with monitoring enabled.
+
+
+
+ >
+ )}
+
+
+
+
+
+ {!hasData && !featureQuery.isLoading && !hasError && (
+ No Metrics Yet}
+ body={
+
+ No monitoring data has been computed for this project. Click
+ "Compute Metrics" to run data quality analysis on your
+ feature views, or use the CLI:{" "}
+ feast monitor run --data-source batch
+
+ }
+ actions={
+
+ Compute Metrics
+
+ }
+ />
+ )}
+
+ {(hasData || featureQuery.isLoading) && (
+
+ )}
+
+ {computeMutation.isSuccess && (
+ <>
+
+
+
+ Data quality metrics have been computed. The table above has
+ been refreshed.
+
+
+ >
+ )}
+
+ {computeMutation.isError && (
+ <>
+
+
+ {(computeMutation.error as Error)?.message}
+
+ >
+ )}
+
+
+ );
+};
+
+export default MonitoringIndex;
diff --git a/ui/src/pages/monitoring/components/HistogramChart.tsx b/ui/src/pages/monitoring/components/HistogramChart.tsx
new file mode 100644
index 00000000000..188bcba7c0b
--- /dev/null
+++ b/ui/src/pages/monitoring/components/HistogramChart.tsx
@@ -0,0 +1,245 @@
+import React from "react";
+import {
+ EuiPanel,
+ EuiTitle,
+ EuiSpacer,
+ EuiText,
+} from "@elastic/eui";
+import type {
+ NumericHistogram,
+ CategoricalHistogram,
+} from "../../../queries/useMonitoringApi";
+
+const BAR_COLOR = "#006BB4";
+const BAR_COLOR_BASELINE = "#BD271E55";
+const CHART_HEIGHT = 160;
+const AXIS_HEIGHT = 24;
+const LEFT_PAD = 50;
+
+const NumericHistogramChart = ({
+ histogram,
+ baseline,
+ title,
+}: {
+ histogram: NumericHistogram;
+ baseline?: NumericHistogram | null;
+ title?: string;
+}) => {
+ const maxCount = Math.max(...histogram.counts, 1);
+ const numBars = histogram.counts.length;
+ const barWidth = Math.max(Math.floor(460 / numBars) - 2, 6);
+ const barsWidth = (barWidth + 2) * numBars;
+ const svgWidth = LEFT_PAD + barsWidth + 20;
+
+ const yTicks = [0, 0.25, 0.5, 0.75, 1].map((f) => ({
+ value: Math.round(maxCount * f),
+ y: CHART_HEIGHT - f * CHART_HEIGHT,
+ }));
+
+ return (
+
+ {title && (
+ <>
+
+ {title}
+
+
+ >
+ )}
+
+
+ {yTicks.map((t) => (
+
+
+
+ {t.value.toLocaleString()}
+
+
+ ))}
+ {histogram.counts.map((count, i) => {
+ const height = (count / maxCount) * CHART_HEIGHT;
+ const x = LEFT_PAD + i * (barWidth + 2);
+ const binStart = histogram.bins[i];
+ const binEnd =
+ i < histogram.bins.length - 1
+ ? histogram.bins[i + 1]
+ : binStart + histogram.bin_width;
+ const baselineHeight =
+ baseline && baseline.counts[i]
+ ? (baseline.counts[i] / maxCount) * CHART_HEIGHT
+ : 0;
+
+ return (
+
+ {baselineHeight > 0 && (
+
+ )}
+
+ {`${binStart.toFixed(2)} – ${binEnd.toFixed(2)}: ${count.toLocaleString()}`}
+
+
+ );
+ })}
+
+
+ {histogram.bins[0]?.toLocaleString(undefined, { maximumFractionDigits: 1 })}
+
+
+ {histogram.bins[histogram.bins.length - 1]?.toLocaleString(undefined, { maximumFractionDigits: 1 })}
+
+
+
+ {baseline && (
+
+
+ Baseline
+
+ )}
+
+ );
+};
+
+const LABEL_WIDTH = 60;
+const BAR_MAX_WIDTH = 320;
+const COUNT_PAD = 80;
+const CAT_SVG_WIDTH = LABEL_WIDTH + BAR_MAX_WIDTH + COUNT_PAD;
+
+const CategoricalHistogramChart = ({
+ histogram,
+ title,
+}: {
+ histogram: CategoricalHistogram;
+ title?: string;
+}) => {
+ const maxCount = Math.max(
+ ...histogram.values.map((v) => v.count),
+ 1,
+ );
+ const barHeight = 24;
+ const rowHeight = barHeight + 6;
+ const chartHeight = histogram.values.length * rowHeight;
+
+ return (
+
+ {title && (
+ <>
+
+ {title}
+
+
+ >
+ )}
+
+
+ {histogram.values.map((v, i) => {
+ const width = (v.count / maxCount) * BAR_MAX_WIDTH;
+ const y = i * rowHeight;
+ return (
+
+
+ {v.value.length > 8 ? v.value.slice(0, 8) + "…" : v.value}
+
+
+ {`${v.value}: ${v.count.toLocaleString()}`}
+
+
+ {v.count.toLocaleString()}
+
+
+ );
+ })}
+
+
+
+ {histogram.unique_count} unique values
+ {histogram.other_count > 0 &&
+ ` (${histogram.other_count.toLocaleString()} in other categories)`}
+
+
+ );
+};
+
+export { NumericHistogramChart, CategoricalHistogramChart };
diff --git a/ui/src/pages/monitoring/components/MetricsFilters.tsx b/ui/src/pages/monitoring/components/MetricsFilters.tsx
new file mode 100644
index 00000000000..081e380da74
--- /dev/null
+++ b/ui/src/pages/monitoring/components/MetricsFilters.tsx
@@ -0,0 +1,128 @@
+import React from "react";
+import {
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiSelect,
+ EuiFieldText,
+ EuiFormRow,
+ EuiButton,
+} from "@elastic/eui";
+
+interface MetricsFiltersProps {
+ featureViews: string[];
+ selectedFeatureView: string;
+ onFeatureViewChange: (fv: string) => void;
+ granularity: string;
+ onGranularityChange: (g: string) => void;
+ dataSourceType: string;
+ onDataSourceTypeChange: (ds: string) => void;
+ startDate: string;
+ onStartDateChange: (d: string) => void;
+ endDate: string;
+ onEndDateChange: (d: string) => void;
+ onRefresh: () => void;
+ isLoading?: boolean;
+}
+
+const GRANULARITY_OPTIONS = [
+ { value: "", text: "All" },
+ { value: "daily", text: "Daily" },
+ { value: "weekly", text: "Weekly" },
+ { value: "biweekly", text: "Biweekly" },
+ { value: "monthly", text: "Monthly" },
+ { value: "quarterly", text: "Quarterly" },
+];
+
+const DATA_SOURCE_OPTIONS = [
+ { value: "", text: "All Sources" },
+ { value: "batch", text: "Batch" },
+ { value: "log", text: "Log" },
+];
+
+const MetricsFilters = ({
+ featureViews,
+ selectedFeatureView,
+ onFeatureViewChange,
+ granularity,
+ onGranularityChange,
+ dataSourceType,
+ onDataSourceTypeChange,
+ startDate,
+ onStartDateChange,
+ endDate,
+ onEndDateChange,
+ onRefresh,
+ isLoading,
+}: MetricsFiltersProps) => {
+ const fvOptions = [
+ { value: "", text: "All Feature Views" },
+ ...featureViews.map((fv) => ({ value: fv, text: fv })),
+ ];
+
+ return (
+
+
+
+ onFeatureViewChange(e.target.value)}
+ compressed
+ />
+
+
+
+
+ onGranularityChange(e.target.value)}
+ compressed
+ />
+
+
+
+
+ onDataSourceTypeChange(e.target.value)}
+ compressed
+ />
+
+
+
+
+ onStartDateChange(e.target.value)}
+ compressed
+ />
+
+
+
+
+ onEndDateChange(e.target.value)}
+ compressed
+ />
+
+
+
+
+ Refresh
+
+
+
+ );
+};
+
+export default MetricsFilters;
diff --git a/ui/src/pages/monitoring/components/StatsPanel.tsx b/ui/src/pages/monitoring/components/StatsPanel.tsx
new file mode 100644
index 00000000000..070b99373e7
--- /dev/null
+++ b/ui/src/pages/monitoring/components/StatsPanel.tsx
@@ -0,0 +1,130 @@
+import React from "react";
+import {
+ EuiPanel,
+ EuiTitle,
+ EuiSpacer,
+ EuiDescriptionList,
+ EuiDescriptionListTitle,
+ EuiDescriptionListDescription,
+ EuiFlexGroup,
+ EuiFlexItem,
+ EuiBadge,
+} from "@elastic/eui";
+import type { FeatureMetric } from "../../../queries/useMonitoringApi";
+
+const formatNumber = (val: number | null, decimals = 4): string => {
+ if (val === null || val === undefined) return "—";
+ if (Number.isInteger(val)) return val.toLocaleString();
+ return val.toFixed(decimals);
+};
+
+const formatPercent = (val: number | null): string => {
+ if (val === null || val === undefined) return "—";
+ return `${(val * 100).toFixed(2)}%`;
+};
+
+const StatsPanel = ({
+ metric,
+ baseline,
+}: {
+ metric: FeatureMetric;
+ baseline?: FeatureMetric | null;
+}) => {
+ const isNumeric = metric.feature_type === "numeric";
+
+ return (
+
+
+
+
+ Statistics
+
+
+
+
+ {metric.feature_type}
+
+
+
+
+
+ Row Count
+
+ {formatNumber(metric.row_count, 0)}
+ {baseline && (
+
+ (baseline: {formatNumber(baseline.row_count, 0)})
+
+ )}
+
+
+ Null Rate
+
+ 0.1 ? "#BD271E" : "inherit",
+ fontWeight: metric.null_rate > 0.1 ? 600 : 400,
+ }}
+ >
+ {formatPercent(metric.null_rate)}
+
+ {baseline && (
+
+ (baseline: {formatPercent(baseline.null_rate)})
+
+ )}
+
+
+ {isNumeric && (
+ <>
+ Mean
+
+ {formatNumber(metric.mean)}
+ {baseline && (
+
+ (baseline: {formatNumber(baseline.mean)})
+
+ )}
+
+
+ Std Dev
+
+ {formatNumber(metric.stddev)}
+
+
+ Min / Max
+
+ {formatNumber(metric.min_val)} / {formatNumber(metric.max_val)}
+
+
+ Percentiles
+
+ P50: {formatNumber(metric.p50)} | P75: {formatNumber(metric.p75)}{" "}
+ | P90: {formatNumber(metric.p90)} | P95:{" "}
+ {formatNumber(metric.p95)} | P99: {formatNumber(metric.p99)}
+
+ >
+ )}
+
+ Data Source
+
+ {metric.data_source_type}
+
+
+ Granularity
+
+ {metric.granularity}
+
+
+ Computed At
+
+ {metric.computed_at
+ ? new Date(metric.computed_at).toLocaleString()
+ : "—"}
+
+
+
+ );
+};
+
+export default StatsPanel;
diff --git a/ui/src/queries/useMonitoringApi.ts b/ui/src/queries/useMonitoringApi.ts
new file mode 100644
index 00000000000..fde01f29d6d
--- /dev/null
+++ b/ui/src/queries/useMonitoringApi.ts
@@ -0,0 +1,250 @@
+import { useContext } from "react";
+import { useQuery, useMutation, useQueryClient } from "react-query";
+import MonitoringContext from "../contexts/MonitoringContext";
+
+interface FeatureMetric {
+ project_id: string;
+ feature_view_name: string;
+ feature_name: string;
+ metric_date: string;
+ granularity: string;
+ data_source_type: string;
+ computed_at: string;
+ is_baseline: boolean;
+ feature_type: string;
+ row_count: number;
+ null_count: number;
+ null_rate: number;
+ mean: number | null;
+ stddev: number | null;
+ min_val: number | null;
+ max_val: number | null;
+ p50: number | null;
+ p75: number | null;
+ p90: number | null;
+ p95: number | null;
+ p99: number | null;
+ histogram: NumericHistogram | CategoricalHistogram | null;
+}
+
+interface NumericHistogram {
+ bins: number[];
+ counts: number[];
+ bin_width: number;
+}
+
+interface CategoricalHistogram {
+ values: { value: string; count: number }[];
+ other_count: number;
+ unique_count: number;
+}
+
+interface FeatureViewMetric {
+ project_id: string;
+ feature_view_name: string;
+ metric_date: string;
+ granularity: string;
+ data_source_type: string;
+ computed_at: string;
+ is_baseline: boolean;
+ total_row_count: number;
+ total_features: number;
+ features_with_nulls: number;
+ avg_null_rate: number;
+ max_null_rate: number;
+}
+
+interface FeatureServiceMetric {
+ project_id: string;
+ feature_service_name: string;
+ metric_date: string;
+ granularity: string;
+ data_source_type: string;
+ computed_at: string;
+ is_baseline: boolean;
+ total_feature_views: number;
+ total_features: number;
+ avg_null_rate: number;
+ max_null_rate: number;
+}
+
+interface MonitoringFilters {
+ project: string;
+ feature_view_name?: string;
+ feature_name?: string;
+ feature_service_name?: string;
+ granularity?: string;
+ data_source_type?: string;
+ start_date?: string;
+ end_date?: string;
+}
+
+const toQueryParams = (
+ filters: MonitoringFilters,
+): Record => {
+ return {
+ project: filters.project,
+ feature_view_name: filters.feature_view_name,
+ feature_name: filters.feature_name,
+ feature_service_name: filters.feature_service_name,
+ granularity: filters.granularity,
+ data_source_type: filters.data_source_type,
+ start_date: filters.start_date,
+ end_date: filters.end_date,
+ };
+};
+
+const buildQueryString = (params: Record) => {
+ const entries = Object.entries(params).filter(
+ ([, v]) => v !== undefined && v !== "",
+ );
+ if (entries.length === 0) return "";
+ return "?" + entries.map(([k, v]) => `${k}=${encodeURIComponent(v!)}`).join("&");
+};
+
+const fetchMonitoring = async (
+ baseUrl: string,
+ path: string,
+ params: Record,
+): Promise => {
+ const qs = buildQueryString(params);
+ const res = await fetch(`${baseUrl}${path}${qs}`);
+ if (!res.ok) {
+ throw new Error(`Failed to fetch ${path}: ${res.status} ${res.statusText}`);
+ }
+ const text = await res.text();
+ const sanitized = text.replace(/:\s*NaN/g, ": null").replace(/:\s*Infinity/g, ": null").replace(/:\s*-Infinity/g, ": null");
+ return JSON.parse(sanitized);
+};
+
+const STALE_TIME = 30_000;
+
+const useFeatureMetrics = (filters: MonitoringFilters) => {
+ const { apiBaseUrl, enabled } = useContext(MonitoringContext);
+ return useQuery(
+ ["monitoring-features", filters],
+ () =>
+ fetchMonitoring(
+ apiBaseUrl,
+ "/monitoring/metrics/features",
+ toQueryParams(filters),
+ ),
+ { staleTime: STALE_TIME, enabled, retry: 1 },
+ );
+};
+
+const useFeatureViewMetrics = (filters: MonitoringFilters) => {
+ const { apiBaseUrl, enabled } = useContext(MonitoringContext);
+ return useQuery(
+ ["monitoring-feature-views", filters],
+ () =>
+ fetchMonitoring(
+ apiBaseUrl,
+ "/monitoring/metrics/feature_views",
+ toQueryParams(filters),
+ ),
+ { staleTime: STALE_TIME, enabled, retry: 1 },
+ );
+};
+
+const useFeatureServiceMetrics = (filters: MonitoringFilters) => {
+ const { apiBaseUrl, enabled } = useContext(MonitoringContext);
+ return useQuery(
+ ["monitoring-feature-services", filters],
+ () =>
+ fetchMonitoring(
+ apiBaseUrl,
+ "/monitoring/metrics/feature_services",
+ toQueryParams(filters),
+ ),
+ { staleTime: STALE_TIME, enabled, retry: 1 },
+ );
+};
+
+const useBaselineMetrics = (
+ project: string,
+ featureViewName?: string,
+ featureName?: string,
+ dataSourceType?: string,
+) => {
+ const { apiBaseUrl, enabled } = useContext(MonitoringContext);
+ return useQuery(
+ ["monitoring-baseline", project, featureViewName, featureName],
+ () =>
+ fetchMonitoring(
+ apiBaseUrl,
+ "/monitoring/metrics/baseline",
+ {
+ project,
+ feature_view_name: featureViewName,
+ feature_name: featureName,
+ data_source_type: dataSourceType,
+ },
+ ),
+ { staleTime: STALE_TIME, enabled, retry: 1 },
+ );
+};
+
+const useTimeseriesMetrics = (filters: MonitoringFilters) => {
+ const { apiBaseUrl, enabled } = useContext(MonitoringContext);
+ return useQuery(
+ ["monitoring-timeseries", filters],
+ () =>
+ fetchMonitoring(
+ apiBaseUrl,
+ "/monitoring/metrics/timeseries",
+ toQueryParams(filters),
+ ),
+ { staleTime: STALE_TIME, enabled, retry: 1 },
+ );
+};
+
+const useComputeMetrics = () => {
+ const { apiBaseUrl } = useContext(MonitoringContext);
+ const queryClient = useQueryClient();
+ return useMutation(
+ async (body: {
+ project: string;
+ feature_view_name?: string;
+ feature_names?: string[];
+ start_date?: string;
+ end_date?: string;
+ granularity?: string;
+ set_baseline?: boolean;
+ }) => {
+ const res = await fetch(`${apiBaseUrl}/monitoring/compute`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify(body),
+ });
+ if (!res.ok) {
+ throw new Error(`Failed to trigger compute: ${res.status}`);
+ }
+ return res.json();
+ },
+ {
+ onSuccess: () => {
+ queryClient.invalidateQueries("monitoring-features");
+ queryClient.invalidateQueries("monitoring-feature-views");
+ queryClient.invalidateQueries("monitoring-feature-services");
+ },
+ },
+ );
+};
+
+export {
+ useFeatureMetrics,
+ useFeatureViewMetrics,
+ useFeatureServiceMetrics,
+ useBaselineMetrics,
+ useTimeseriesMetrics,
+ useComputeMetrics,
+};
+export type {
+ FeatureMetric,
+ FeatureViewMetric,
+ FeatureServiceMetric,
+ NumericHistogram,
+ CategoricalHistogram,
+ MonitoringFilters,
+};