{"id":3454,"date":"2025-09-09T18:08:14","date_gmt":"2025-09-09T23:08:14","guid":{"rendered":"https:\/\/devsite.datacracy.co\/?p=3454"},"modified":"2025-09-10T08:55:17","modified_gmt":"2025-09-10T13:55:17","slug":"integrating-airflow-with-databricks","status":"publish","type":"post","link":"https:\/\/devsite.datacracy.co\/es\/integrating-airflow-with-databricks\/","title":{"rendered":"Integrating Airflow with Databricks"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"3454\" class=\"elementor elementor-3454\">\n\t\t\t\t<div class=\"elementor-element elementor-element-37c54ae9 e-con-full e-flex e-con e-parent\" data-id=\"37c54ae9\" data-element_type=\"container\" data-e-type=\"container\" data-settings=\"{&quot;background_background&quot;:&quot;classic&quot;}\">\n\t\t\t\t<div class=\"elementor-element elementor-element-16198967 elementor-widget elementor-widget-spacer\" data-id=\"16198967\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-69952c6e elementor-widget elementor-widget-image\" data-id=\"69952c6e\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"300\" height=\"168\" src=\"https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2024\/11\/DATACRACY-LOGO-BN-e1732283126752-300x168.png\" class=\"attachment-medium size-medium wp-image-148\" alt=\"\" srcset=\"https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2024\/11\/DATACRACY-LOGO-BN-e1732283126752-300x168.png 300w, https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2024\/11\/DATACRACY-LOGO-BN-e1732283126752.png 693w\" sizes=\"(max-width: 300px) 100vw, 300px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-4d516ba elementor-widget elementor-widget-heading\" data-id=\"4d516ba\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">BLOG<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-19e73c92 elementor-widget elementor-widget-spacer\" data-id=\"19e73c92\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-d7cbee9 e-flex e-con-boxed e-con e-parent\" data-id=\"d7cbee9\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-c7c3592 elementor-widget elementor-widget-spacer\" data-id=\"c7c3592\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-d90287b e-flex e-con-boxed e-con e-parent\" data-id=\"d90287b\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-5c1ee72 elementor-widget elementor-widget-page-title\" data-id=\"5c1ee72\" data-element_type=\"widget\" data-e-type=\"widget\" data-settings=\"{&quot;align&quot;:&quot;center&quot;}\" data-widget_type=\"page-title.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\n\t\t<div class=\"hfe-page-title hfe-page-title-wrapper elementor-widget-heading\">\n\n\t\t\t\t\t\t\t\t\t\t\t<a href=\"https:\/\/devsite.datacracy.co\/es\/\">\n\t\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">\n\t\t\t\t\t\t\t\t\n\t\t\t\tIntegrating Airflow with Databricks\t\t\t\t\t- Overview of Airflow, Databricks, and dbt\t\t\t\t  \n\t\t\t<\/h2 > \n\t\t\t\t\t\t\t\t\t<\/a>\n\t\t\t\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-dfb7f24 e-flex e-con-boxed e-con e-parent\" data-id=\"dfb7f24\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-e90e7ec elementor-widget-divider--view-line elementor-widget elementor-widget-divider\" data-id=\"e90e7ec\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"divider.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-divider\">\n\t\t\t<span class=\"elementor-divider-separator\">\n\t\t\t\t\t\t<\/span>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-f6f3f09 e-flex e-con-boxed e-con e-parent\" data-id=\"f6f3f09\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-4419c30 elementor-widget elementor-widget-spacer\" data-id=\"4419c30\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-e12f192 e-flex e-con-boxed e-con e-parent\" data-id=\"e12f192\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-dfb8856 elementor-widget elementor-widget-text-editor\" data-id=\"dfb8856\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Before diving into the integration steps, let\u2019s quickly overview the technologies we\u2019ll be using.<\/p><ul><li>\u00a0**Apache Airflow**: An open-source platform used to programmatically author, schedule, and monitor workflows.<\/li><li>**Databricks**: A unified data analytics platform that provides a Spark-based processing engine, allowing you to<br \/>build scalable data pipelines.<\/li><li>\u00a0**dbt (data build tool)**: A tool that enables data analysts and engineers to transform data in a warehouse via<br \/>SQL, with the added benefit of validating the data quality through testing and documentation.<\/li><\/ul><p>By combining these tools, you can orchestrate a full end-to-end data pipeline in which data is processed in Databricks, and dbt is used to validate the quality of that data.<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-c66e7cd elementor-widget elementor-widget-heading\" data-id=\"c66e7cd\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h4 class=\"elementor-heading-title elementor-size-default\">Setting Up Airflow<\/h4>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-89a762f e-grid e-con-full e-con e-child\" data-id=\"89a762f\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t<div class=\"elementor-element elementor-element-6082371 elementor-widget elementor-widget-text-editor\" data-id=\"6082371\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Start by installing and setting up Apache Airflow. If you already have it running, you can skip this section.<\/p><ul><li>Install Airflow via pip: pip install apache-airflow.<\/li><li>Initialize the database and start the webserver:<br \/>airflow db init<br \/>airflow webserver<br \/>airflow scheduler<br \/>Next, configure Airflow to work with Databricks.<br \/>Airflow Configuration for Databricks:<br \/>\uf0b7 Install the Databricks integration package:<br \/>pip install apache-airflow-providers-databricks<br \/>\uf0b7 Configure the Databricks connection in Airflow\u2019s UI:<br \/>\uf0b7 Go to the Airflow Admin tab \u2192 Connections \u2192 Add a new connection.<br \/>\uf0b7 Set **Conn Type** to Databricks.<br \/>\uf0b7 Add your **Databricks host URL** and **token**.<\/li><\/ul>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9bfe234 elementor-widget elementor-widget-image\" data-id=\"9bfe234\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"900\" height=\"900\" src=\"https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2025\/09\/channels4_profile.jpg\" class=\"attachment-large size-large wp-image-3461\" alt=\"\" srcset=\"https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2025\/09\/channels4_profile.jpg 900w, https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2025\/09\/channels4_profile-300x300.jpg 300w, https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2025\/09\/channels4_profile-150x150.jpg 150w, https:\/\/devsite.datacracy.co\/wp-content\/uploads\/2025\/09\/channels4_profile-768x768.jpg 768w\" sizes=\"(max-width: 900px) 100vw, 900px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-b8e7b33 e-flex e-con-boxed e-con e-parent\" data-id=\"b8e7b33\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-1e38a8a elementor-widget elementor-widget-heading\" data-id=\"1e38a8a\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h4 class=\"elementor-heading-title elementor-size-default\">Running a Databricks Job from Airflow<\/h4>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-06b1a60 elementor-widget elementor-widget-spacer\" data-id=\"06b1a60\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"spacer.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-spacer\">\n\t\t\t<div class=\"elementor-spacer-inner\"><\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t<div class=\"elementor-element elementor-element-fe031d6 e-flex e-con-boxed e-con e-parent\" data-id=\"fe031d6\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-22e50ba elementor-widget elementor-widget-text-editor\" data-id=\"22e50ba\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>To orchestrate a Databricks job in Airflow, you can use the DatabricksSubmitRunOperator. Here\u2019s a simple example of a<br \/>DAG that triggers a Databricks job.<\/p><p><br \/>from airflow import DAG<br \/>from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator<br \/>from airflow.utils.dates import days_ago<br \/># Define Databricks job parameters<br \/>DATABRICKS_TASK = {<br \/>\u00a0\u00a0\u00a0 &amp;#39;new_cluster&amp;#39;: {<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 &amp;#39;spark_version&amp;#39;: &amp;#39;7.3.x-scala2.12&amp;#39;,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 &amp;#39;node_type_id&amp;#39;: &amp;#39;i3.xlarge&amp;#39;,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 &amp;#39;num_workers&amp;#39;: 2,<br \/>\u00a0\u00a0\u00a0 },<br \/>\u00a0\u00a0\u00a0 &amp;#39;notebook_task&amp;#39;: {<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 &amp;#39;notebook_path&amp;#39;: &amp;#39;\/Shared\/your_notebook_path&amp;#39;,<\/p><p>\u00a0\u00a0\u00a0 },<br \/>}<br \/>default_args = {<br \/>\u00a0\u00a0\u00a0 &amp;#39;owner&amp;#39;: &amp;#39;airflow&amp;#39;,<br \/>\u00a0\u00a0\u00a0 &amp;#39;start_date&amp;#39;: days_ago(1),<br \/>}<br \/># Define the DAG<br \/>with DAG(&amp;#39;databricks_job&amp;#39;,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 schedule_interval=&amp;#39;@daily&amp;#39;,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 default_args=default_args,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 catchup=False) as dag:<br \/>\u00a0\u00a0\u00a0 # Submit Databricks job<br \/>\u00a0\u00a0\u00a0 run_databricks_job = DatabricksSubmitRunOperator(<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 task_id=&amp;#39;run_databricks_job&amp;#39;,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 json=DATABRICKS_TASK,<br \/>\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 databricks_conn_id=&amp;#39;databricks_default&amp;#39;<br \/>\u00a0\u00a0\u00a0 )<br \/>\u00a0\u00a0\u00a0 run_databricks_job<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>Integrating Airflow with Databricks: Adding Data Quality with dbt<\/p>\n","protected":false},"author":1,"featured_media":1816,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"elementor_canvas","format":"standard","meta":{"_eb_attr":"","site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[32],"tags":[],"class_list":["post-3454","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-dato"],"_links":{"self":[{"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/posts\/3454","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/comments?post=3454"}],"version-history":[{"count":7,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/posts\/3454\/revisions"}],"predecessor-version":[{"id":3465,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/posts\/3454\/revisions\/3465"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/media\/1816"}],"wp:attachment":[{"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/media?parent=3454"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/categories?post=3454"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/devsite.datacracy.co\/es\/wp-json\/wp\/v2\/tags?post=3454"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}