code
<skip-when> <condition>Individual test case tracking (we track job-level, not test-level)</condition> <condition>Non-test jobs (build, deploy, lint, security scans)</condition> <condition>Non-OSDU projects or non-GitLab CI systems</condition> <condition>Real-time monitoring (data is from completed pipelines only)</condition> </skip-when>
code
<capabilities> <supported>Test job pass/fail status across multiple pipeline runs</supported> <supported>Flaky test job detection (jobs that intermittently fail)</supported> <supported>Stage-level metrics (unit/integration/acceptance)</supported> <supported>Cloud provider breakdown (azure, aws, gcp, ibm, cimpl)</supported> <unsupported>Individual test results (not tracked)</unsupported> <unsupported>Non-test jobs like build, deploy, lint</unsupported> </capabilities> <example> Pipeline #1: job "unit-tests-azure" → PASS (100/100 tests passed) Pipeline #2: job "unit-tests-azure" → FAIL (99/100 tests passed) Pipeline #3: job "unit-tests-azure" → PASS (100/100 tests passed) Result: This job is FLAKY (unreliable across runs) </example>
code
<progressive-approach mandatory="true">
<step number="1" name="start-light">
<action>Use status.py for quick overview</action>
<command>script_run osdu status.py --format json --pipelines 3 --project {name}</command>
<rationale>Lightweight, fast, safe token usage</rationale>
</step>
<step number="2" name="deep-dive" condition="only-if-needed">
<action>Use analyze.py with strict filters</action>
<command>script_run osdu analyze.py --format json --pipelines 5 --project {name} --stage unit</command>
<rationale>Heavy query, use only when status insufficient</rationale>
</step>
<step number="3" name="never-query-all">
<action>ALWAYS specify --project to avoid 30-project scan</action>
<rationale>Prevents token limit exceeded error</rationale>
</step>
</progressive-approach>
<format-selection>
<format type="json">
<use-when>Extracting specific metrics or calculating statistics</use-when>
<use-when>Building summaries or comparisons</use-when>
<use-when>Parsing structured data programmatically</use-when>
<use-when importance="critical">ALWAYS for status.py (lightweight, parseable)</use-when>
</format>
<format type="markdown">
<use-when>Analyze.py queries (10x smaller than JSON, still readable)</use-when>
<use-when>Creating reports for sharing</use-when>
<use-when>Need human-readable tables without parsing</use-when>
<use-when>Token budget is tight</use-when>
</format>
<format type="terminal" status="never-use">
<avoid-because>Includes ANSI codes and colors, hard to parse</avoid-because>
<avoid-because>Only for direct human terminal viewing</avoid-because>
</format>
</format-selection>
code
<domain-services> <project name="wellbore-domain-services" description="Wellbore data"/> <project name="well-delivery" description="Well delivery workflows"/> <project name="seismic-store-service" description="Seismic data storage"/> <project name="dataset" description="Dataset management"/> <project name="register" description="Data registration"/> <project name="unit-service" description="Unit conversion"/> </domain-services> <reference-services> <project name="crs-catalog-service" description="Coordinate reference systems"/> <project name="crs-conversion-service" description="CRS conversion"/> </reference-services> <ddms-services> <project name="rafs-ddms-services" description="R&D data management"/> <project name="eds-dms" description="Engineering data management"/> </ddms-services> <workflow-processing> <project name="ingestion-workflow" description="Data ingestion pipelines"/> <project name="indexer-queue" description="Indexing queue management"/> <project name="notification" description="Event notifications"/> <project name="segy-to-mdio-conversion-dag" description="Seismic format conversion"/> </workflow-processing> <infrastructure> <project name="infra-azure-provisioning" description="Azure infra provisioning"/> <project name="os-core-common" description="Shared core libraries"/> <project name="os-core-lib-azure" description="Azure-specific libs"/> </infrastructure> <other-services> <project name="geospatial" description="Geospatial services"/> <project name="policy" description="Policy engine"/> <project name="secret" description="Secret management"/> <project name="open-etp-client" description="ETP protocol client"/> <project name="schema-upgrade" description="Schema migration tools"/> </other-services> <cloud-providers> <provider code="azure" name="Microsoft Azure"/> <provider code="aws" name="Amazon Web Services"/> <provider code="gcp" name="Google Cloud Platform"/> <provider code="ibm" name="IBM Cloud"/> <provider code="cimpl" name="CIMPL (Venus) provider"/> </cloud-providers>
code
<when-to-use>
<scenario>Initial health check ("how is {project} doing?")</scenario>
<scenario>Recent pipeline status</scenario>
<scenario>Quick pass/fail overview</scenario>
<scenario importance="high">Default choice for most queries</scenario>
</when-to-use>
<token-impact>~900 tokens per project (very safe)</token-impact>
<options>
<option name="--pipelines N" default="10" recommended="3-5">Analyze last N pipelines</option>
<option name="--project NAME" required="true">Specify project (see list above)</option>
<option name="--format json" required="true">Structured output for parsing</option>
<option name="--venus">Filter to CIMPL (Venus) provider pipelines only</option>
<option name="--no-release">Exclude release tag pipelines (master/main branch only)</option>
</options>
<examples>
<example description="Quick status check (recommended starting point)">
script_run osdu status.py --format json --pipelines 3 --project partition
</example>
<example description="Check specific project without releases">
script_run osdu status.py --format json --pipelines 5 --project storage --no-release
</example>
<example description="Venus provider status">
script_run osdu status.py --format json --pipelines 3 --project indexer-service --venus
</example>
</examples>
</script>
<script name="analyze.py" recommendation="use-cautiously">
<purpose>In-depth flaky test detection and reliability metrics across many pipeline runs</purpose>
<when-to-use>
<scenario>After status.py shows issues</scenario>
<scenario>Flaky test job detection needed</scenario>
<scenario>Calculating pass rates over time</scenario>
<scenario>Provider comparison analysis</scenario>
<scenario importance="critical">Only with strict filters (project + stage or provider)</scenario>
</when-to-use>
<token-impact>
<impact pipelines="5" projects="1">~35K tokens (moderate)</impact>
<impact pipelines="10" projects="1">~68K tokens (heavy)</impact>
<impact projects="multiple">Can exceed 200K token limit ❌</impact>
</token-impact>
<critical-rules>
<rule priority="1">ALWAYS specify --project (never scan all 30 projects)</rule>
<rule priority="2">Start with --pipelines 5 (not default 10)</rule>
<rule priority="3">Add --stage or --provider for additional filtering</rule>
<rule priority="4">Use --format markdown if token budget is tight (10x smaller than JSON)</rule>
<rule priority="5">Only use if status.py insufficient</rule>
</critical-rules>
<options>
<option name="--pipelines N" default="10" recommended="5">Analyze last N pipelines</option>
<option name="--project NAME" required="true">Specific project (comma-separated for multiple)</option>
<option name="--format FORMAT" required="true" recommended="markdown">Use markdown to save tokens</option>
<option name="--stage STAGE">Filter by test stage (unit/integration/acceptance)</option>
<option name="--provider PROVIDER">Filter by cloud provider (azure/aws/gcp/ibm/cimpl)</option>
</options>
<examples>
<example description="Analyze flaky tests (safe query)">
script_run osdu analyze.py --format markdown --pipelines 5 --project partition --stage unit
</example>
<example description="Provider comparison (focused)">
script_run osdu analyze.py --format markdown --pipelines 5 --project storage --provider azure
</example>
<example description="Multi-project with strict filter (use cautiously)">
script_run osdu analyze.py --format markdown --pipelines 5 --project partition,storage --stage unit
</example>
</examples>
</script>
code
<pattern name="flaky-test-detection"> <step number="1">Check status</step> <command>script_run osdu status.py --format json --pipelines 5 --project partition</command> <step number="2">If issues found, deep dive with analyze.py</step> <command>script_run osdu analyze.py --format markdown --pipelines 5 --project partition --stage unit</command> </pattern> <pattern name="provider-comparison"> <description>Compare Azure vs AWS for specific project/stage</description> <command>script_run osdu analyze.py --format markdown --pipelines 5 --project storage --stage integration --provider azure</command> <command>script_run osdu analyze.py --format markdown --pipelines 5 --project storage --stage integration --provider aws</command> </pattern> <pattern name="stage-specific-analysis"> <description>Focus on unit tests only</description> <command>script_run osdu analyze.py --format markdown --pipelines 5 --project entitlements --stage unit</command> </pattern>
code
<dont-do description="Use high pipeline counts without project filter"> <bad-example>script_run osdu analyze.py --format json --pipelines 20</bad-example> <reason>Takes 3+ minutes, huge output</reason> </dont-do> <dont-do description="Use terminal format in agent context"> <bad-example>script_run osdu status.py --format terminal --project partition</bad-example> <reason>Includes ANSI codes, hard to parse</reason> </dont-do> <dont-do description="Jump straight to analyze.py"> <bad-example>script_run osdu analyze.py --format json --pipelines 10 --project partition</bad-example> <reason>Heavy query when status.py would suffice</reason> </dont-do>