From 91f9fb22dcbe5f81ffb8f928529f7e5c90577132 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 23 Oct 2025 12:29:28 +0200 Subject: [PATCH 1/2] add troubleshooting items --- .../airflow/pages/troubleshooting/index.adoc | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/docs/modules/airflow/pages/troubleshooting/index.adoc b/docs/modules/airflow/pages/troubleshooting/index.adoc index b8c7b4f2..86b35c33 100644 --- a/docs/modules/airflow/pages/troubleshooting/index.adoc +++ b/docs/modules/airflow/pages/troubleshooting/index.adoc @@ -50,3 +50,96 @@ webservers: ---- TIP: Our strong recommendation is to increase the webserver replicas, with each webserver running a single worker, as this removes the risk of running into timeouts or memory issues. + +== Checking DAG syntax (Upgrading to Airflow 3.x+) + +DAG files that ran under Airflow 2.x may need to be adjusted to be compatible with Airflow 3.x+. +The https://airflow.apache.org/docs/apache-airflow/stable/best-practices.html#installing-and-using-ruff[documentation] shows how this can be done with the Python `ruff` tool. +For example, the following DAG was compatible with Airflow 2.x: + +[source,python] +---- +import pendulum +from airflow import DAG +from airflow.decorators import task +from airflow.operators.bash import BashOperator + +@task(task_id="run_this") +def run_this_func(dag_run=None): + """ + Print the payload "message" passed to the DagRun conf attribute. + + :param dag_run: The DagRun object + :type dag_run: DagRun + """ + print(f"Remotely received value of {dag_run.conf.get('message')} for key=message") + +with DAG( + dag_id="example_trigger_target_dag", + start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), + catchup=False, + schedule_interval=None, + tags=['example'], +) as dag: + run_this = run_this_func() + + bash_task = BashOperator( + task_id="bash_task", + bash_command='echo "Here is the message: $message"', + env={'message': '{% raw %}{{ dag_run.conf.get("message") }}{% endraw %}'}, + ) +---- + +Assume this DAG lies in the `dags` folder in the current directory. +Testing this with `ruff` indicates one incompatability with Airflow 3.x and one deprecated operator: + +[source,bash] +---- +$ ruff check dags/ --select AIR3 --preview +dags/dag.py:6:2: AIR311 `airflow.decorators.task` is removed in Airflow 3.0; It still works in Airflow 3.0 but is expected to be removed in a future version. + | +4 | from airflow.operators.bash import BashOperator +5 | +6 | @task(task_id="run_this") + | ^^^^ AIR311 +7 | def run_this_func(dag_run=None): +8 | """ + | + = help: Use `airflow.sdk.task` instead + +dags/dag.py:20:5: AIR301 [*] `schedule_interval` is removed in Airflow 3.0 + | +18 | start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), +19 | catchup=False, +20 | schedule_interval=None, + | ^^^^^^^^^^^^^^^^^ AIR301 +21 | tags=['example'], +22 | ) as dag: + | + = help: Use `schedule` instead + +dags/dag.py:25:17: AIR312 `airflow.operators.bash.BashOperator` is deprecated and moved into `standard` provider in Airflow 3.0; It still works in Airflow 3.0 but is expected to be removed in a future version. + | +23 | run_this = run_this_func() +24 | +25 | bash_task = BashOperator( + | ^^^^^^^^^^^^ AIR312 +26 | task_id="bash_task", +27 | bash_command='echo "Here is the message: $message"', + | + = help: Install `apache-airflow-providers-standard>=0.0.1` and use `airflow.providers.standard.operators.bash.BashOperator` instead. + +Found 3 errors. +[*] 1 fixable with the `--fix` option. +---- + +== PYTHONPATH with custom DAGs folder using python modules + +When a custom DAG folder (e.g. `/dags`) is defined with `envOverrides` and some DAGs contain a python module structure, then the variable `PYTHONPATH` should be explicitly defined to contain both this folder and the log config location that is set by the operator. This setting is done automatically by the operator when the default DAGs folder or gitsync are used, but is not done when this is set by the user directly. + +[source,yaml] +---- + envOverrides: &envOverrides + AIRFLOW__CORE__DAGS_FOLDER: "/dags" + PYTHONPATH: "/stackable/app/log_config:/dags" +---- From 618e0100bd85294ff24e4fdb7bf854accc36dfc6 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Oct 2025 14:06:03 +0100 Subject: [PATCH 2/2] added explanatory note --- docs/modules/airflow/pages/troubleshooting/index.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/modules/airflow/pages/troubleshooting/index.adoc b/docs/modules/airflow/pages/troubleshooting/index.adoc index 86b35c33..c796bb4c 100644 --- a/docs/modules/airflow/pages/troubleshooting/index.adoc +++ b/docs/modules/airflow/pages/troubleshooting/index.adoc @@ -143,3 +143,5 @@ When a custom DAG folder (e.g. `/dags`) is defined with `envOverrides` and some AIRFLOW__CORE__DAGS_FOLDER: "/dags" PYTHONPATH: "/stackable/app/log_config:/dags" ---- + +NOTE: Generally speaking it is https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#configuration-reference[recommended] by Airflow to have the same config everywhere across all components.