Skip to main content
Skip table of contents

OPT_INSERT_UPDATE_BY_DATAHASH_AS_SWAP

Target database: Snowflake, Amazon Redshift, Azure SQL Database, Azure Synapse SQL, Microsoft Fabric, Google BigQuery, Databricks SQL

OPT_INSERT_UPDATE_BY_DATAHASH_AS_SWAP is a load option that enables 1-1 loading with key and datahash


See also:


Usage

OPTION
OPT_INSERT_UPDATE_BY_DATAHASH_AS_SWAP: Boolean

Default: false


Notes

OPT_INSERT_UPDATE_BY_DATAHASH_AS_SWAP can be used in table loads that have a single entity mapping (i.e. one source entity) and attributes with following attribute types defined:

  • DV_LOAD_TIME

  • DV_DATAHASH

  • either DV_HASHKEY or DV_REFERENCING_HASHKEY

  • DV_UPDATE_TIME (optional)

Hint: Can be used also with Run ID logic.


Examples

Insert, Update Load by DATAHASH as SWAP option

This example presents how an entity load would work with this logic enabled.

Assumptions:

  • Data is staged to table stage.stg_taxi_zone_lookup and it will be loaded to tgt.latest_entries

Feature is enabled for the load by setting option:

OPTION
OPT_INSERT_UPDATE_BY_DATAHASH_AS_SWAP: true

Resulting DML generated by Agile Data Engine:

Snowflake:

CODE
-- Temporary table is created
CREATE TEMPORARY TABLE tgt.Z_TMP_LATEST_ENTRIES AS
 -- Data not existing in the source collected to temporary table based on key
SELECT
  ...
FROM
  tgt.LATEST_ENTRIES trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          stage.STG_TAXI_ZONE_LOOKUP src_entity
      ) src
    WHERE
      trg.latest_entries_id = src.latest_entries_id
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  tgt.LATEST_ENTRIES trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      stage.STG_TAXI_ZONE_LOOKUP src
    WHERE
      trg.dv_datahash = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      stage.STG_TAXI_ZONE_LOOKUP src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN tgt.LATEST_ENTRIES trg on (
    trg.latest_entries_id = src.latest_entries_id
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      tgt.LATEST_ENTRIES trg
    WHERE
      trg.dv_datahash = src.dv_datahash
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  tgt.Z_TMP_LATEST_ENTRIES src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
INSERT OVERWRITE INTO tgt.LATEST_ENTRIES
SELECT
  *
FROM
  tgt.Z_TMP_LATEST_ENTRIES
;

Amazon Redshift:

CODE
-- Possible temporary table is dropped
DROP TABLE IF EXISTS tgt.Z_TMP_LATEST_ENTRIES;

-- Temporary table is created
CREATE TABLE tgt.Z_TMP_LATEST_ENTRIES(LIKE tgt.LATEST_ENTRIES);

-- Data is inserted
INSERT INTO
  tgt.Z_TMP_LATEST_ENTRIES (
     ...
  )
  -- Data not existing in the source collected to temporary table based on key
SELECT
  ...
FROM
  tgt.LATEST_ENTRIES trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          stage.STG_TAXI_ZONE_LOOKUP src_entity
      ) src
    WHERE
      trg.latest_entries_id = src.latest_entries_id
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  tgt.LATEST_ENTRIES trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      stage.STG_TAXI_ZONE_LOOKUP src
    WHERE
      trg.dv_datahash = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      stage.STG_TAXI_ZONE_LOOKUP src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN tgt.LATEST_ENTRIES trg on (
    trg.latest_entries_id = src.latest_entries_id
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      tgt.LATEST_ENTRIES trg
    WHERE
      trg.dv_datahash = src.dv_datahash
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  tgt.Z_TMP_LATEST_ENTRIES src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
BEGIN;
ALTER TABLE
  tgt.LATEST_ENTRIES RENAME TO LATEST_ENTRIES_TO_BE_DROPPED;
ALTER TABLE
  tgt.Z_TMP_LATEST_ENTRIES RENAME TO LATEST_ENTRIES;
END;

-- And replaced target table will be dropped
DROP TABLE IF EXISTS tgt.LATEST_ENTRIES_TO_BE_DROPPED;


Azure SQL Database:

CODE
-- Possible temporary table is dropped
IF OBJECT_ID('tgt.Z_TMP_LATEST_ENTRIES', 'U') IS NOT NULL DROP TABLE [tgt].[Z_TMP_LATEST_ENTRIES];

-- Possible old version of target table is dropped
IF OBJECT_ID('tgt.LATEST_ENTRIES_TO_BE_DROPPED', 'U') IS NOT NULL DROP TABLE [tgt].[LATEST_ENTRIES_TO_BE_DROPPED];

-- Temporary table is created
SELECT
INTO
  [tgt].[Z_TMP_LATEST_ENTRIES]
FROM
  [tgt].[LATEST_ENTRIES]
WHERE 1 != 1;

-- Data is inserted
INSERT INTO [tgt].[Z_TMP_LATEST_ENTRIES]
  -- Data not existing in the source collected to temporary table
 -- Data not existing in the source collected to temporary table based on key
SELECT
  ...
FROM
  [tgt].[LATEST_ENTRIES] trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          [stage].[STG_TAXI_ZONE_LOOKUP] src_entity
      ) src
    WHERE
      trg.[latest_entries_id] = src.[latest_entries_id]
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  [tgt].[LATEST_ENTRIES] trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      [stage].[STG_TAXI_ZONE_LOOKUP] src
    WHERE
      trg.[dv_datahash] = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      [stage].[STG_TAXI_ZONE_LOOKUP] src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN [tgt].[LATEST_ENTRIES] trg on (
    trg.[latest_entries_id] = src.[latest_entries_id]
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      [tgt].[LATEST_ENTRIES] trg
    WHERE
      trg.[dv_datahash] = src.[dv_datahash]
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  [tgt].[Z_TMP_LATEST_ENTRIES] src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
BEGIN;
EXEC sp_rename 'tgt.LATEST_ENTRIES', 'LATEST_ENTRIES_TO_BE_DROPPED'
EXEC sp_rename 'tgt.Z_TMP_LATEST_ENTRIES', 'LATEST_ENTRIES'
END;

-- The replaced target table is dropped
IF OBJECT_ID('tgt.LATEST_ENTRIES_TO_BE_DROPPED', 'U') IS NOT NULL DROP TABLE [tgt].[LATEST_ENTRIES_TO_BE_DROPPED];

Azure Synapse SQL:

CODE
-- Possible temporary table is dropped
IF OBJECT_ID('tgt.Z_TMP_LATEST_ENTRIES', 'U') IS NOT NULL DROP TABLE [tgt].[Z_TMP_LATEST_ENTRIES];

-- Possible old version of target table is dropped
IF OBJECT_ID('tgt.LATEST_ENTRIES_TO_BE_DROPPED', 'U') IS NOT NULL DROP TABLE [tgt].[LATEST_ENTRIES_TO_BE_DROPPED];

-- Temporary table is created
CREATE TABLE [tgt].[Z_TMP_LATEST_ENTRIES]
AS
SELECT
 <attribute list>
FROM
  [tgt].[LATEST_ENTRIES]
WHERE 1 != 1;

-- Data is inserted
INSERT INTO [tgt].[Z_TMP_LATEST_ENTRIES]
 -- Data not existing in the source collected to temporary table based on key
SELECT
  ...
FROM
  [tgt].[LATEST_ENTRIES] trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          [stage].[STG_TAXI_ZONE_LOOKUP] src_entity
      ) src
    WHERE
      trg.[latest_entries_id] = src.[latest_entries_id]
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  [tgt].[Z_TMP_LATEST_ENTRIES] trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      [stage].[STG_TAXI_ZONE_LOOKUP] src
    WHERE
      trg.[dv_datahash] = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      [stage].[STG_TAXI_ZONE_LOOKUP] src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN [tgt].[LATEST_ENTRIES] trg on (
    trg.[latest_entries_id] = src.[latest_entries_id]
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      [tgt].[LATEST_ENTRIES] trg
    WHERE
      trg.[dv_datahash] = src.[dv_datahash]
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  [tgt].[Z_TMP_LATEST_ENTRIES] src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
BEGIN;
RENAME OBJECT [tgt].[LATEST_ENTRIES] TO [LATEST_ENTRIES_TO_BE_DROPPED]
RENAME OBJECT [tgt].[Z_TMP_LATEST_ENTRIES] TO [LATEST_ENTRIES]
END;

-- The replaced target table is dropped
IF OBJECT_ID('tgt.LATEST_ENTRIES_TO_BE_DROPPED', 'U') IS NOT NULL DROP TABLE [tgt].[LATEST_ENTRIES_TO_BE_DROPPED];

Azure Synapse DW:

CODE
-- Possible temporary table is dropped
DROP TABLE IF EXISTS [tgt].[Z_TMP_LATEST_ENTRIES];

-- Temporary table is created
CREATE TABLE [tgt].[Z_TMP_LATEST_ENTRIES]
AS
SELECT
 <attribute list>
FROM
  [tgt].[LATEST_ENTRIES]
WHERE 1 != 1;

-- Data is inserted
INSERT INTO [tgt].[Z_TMP_LATEST_ENTRIES]
SELECT
  ...
FROM
  [tgt].[LATEST_ENTRIES] trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          [stage].[STG_TAXI_ZONE_LOOKUP] src_entity
      ) src
    WHERE
      trg.[latest_entries_id] = src.[latest_entries_id]
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  [tgt].[Z_TMP_LATEST_ENTRIES] trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      [stage].[STG_TAXI_ZONE_LOOKUP] src
    WHERE
      trg.[dv_datahash] = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      [stage].[STG_TAXI_ZONE_LOOKUP] src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN [tgt].[LATEST_ENTRIES] trg on (
    trg.[latest_entries_id] = src.[latest_entries_id]
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      [tgt].[LATEST_ENTRIES] trg
    WHERE
      trg.[dv_datahash] = src.[dv_datahash]
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  [tgt].[Z_TMP_LATEST_ENTRIES] src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
BEGIN;
DROP TABLE [tgt].[LATEST_ENTRIES];
CREATE TABLE [tgt].[LATEST_ENTRIES] AS CLONE OF [trg].[Z_TMP_LATEST_ENTRIES];
DROP TABLE [trg].[Z_TMP_LATEST_ENTRIES];
END;

Google BigQuery:

CODE
-- Possible temporary table is dropped
DROP TABLE IF EXISTS tgt.Z_TMP_LATEST_ENTRIES;

-- Possible old version of target table is dropped
DROP TABLE IF EXISTS tgt.LATEST_ENTRIES_TO_BE_DROPPED;

-- Temporary table is created
CREATE TABLE tgt.Z_TMP_LATEST_ENTRIES
AS
 -- Data not existing in the source collected to temporary table based on key
SELECT
  ...
FROM
  tgt.LATEST_ENTRIES trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          stage.STG_TAXI_ZONE_LOOKUP src_entity
      ) src
    WHERE
      trg.latest_entries_id = src.latest_entries_id
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  tgt.LATEST_ENTRIES trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      stage.STG_TAXI_ZONE_LOOKUP src
    WHERE
      trg.dv_datahash = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      stage.STG_TAXI_ZONE_LOOKUP src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN tgt.LATEST_ENTRIES trg on (
    trg.latest_entries_id = src.latest_entries_id
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      tgt.LATEST_ENTRIES trg
    WHERE
      trg.dv_datahash = src.dv_datahash
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  tgt.Z_TMP_LATEST_ENTRIES src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
BEGIN;
ALTER TABLE tgt.LATEST_ENTRIES RENAME TO tgt.LATEST_ENTRIES_TO_BE_DROPPED;
ALTER TABLE trg.Z_TMP_LATEST_ENTRIES RENAME TO LATEST_ENTRIES;
EXCEPTION WHEN ERROR THEN
CASE
WHEN
EXISTS(SELECT 1 FROM tgt.INFORMATION_SCHEMA.TABLES WHERE table_name = 'LATEST_ENTRIES_TO_BE_DROPPED')
AND
NOT EXISTS(SELECT 1 FROM tgt.INFORMATION_SCHEMA.TABLES WHERE table_name = 'LATEST_ENTRIES')
ALTER TABLE tgt.LATEST_ENTRIES_TO_BE_DROPPED
RENAME TO LATEST_ENTRIES;
END CASE;
SELECT @@error.message;
END;

-- Drop the replaced table
DROP TABLE IF EXISTS tgt.LATEST_ENTRIES_TO_BE_DROPPED;

Databricks SQL:

CODE
-- Temporary table is created
CREATE OR REPLACE TABLE <catalog>.tgt.Z_TMP_LATEST_ENTRIES AS
 -- Data not existing in the source collected to temporary table based on key
SELECT
  ...
FROM
  <catalog>.tgt.LATEST_ENTRIES trg
WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      (
        SELECT
          DISTINCT <key formula> AS latest_entries_id
        FROM
          <catalog>.stage.STG_TAXI_ZONE_LOOKUP src_entity
      ) src
    WHERE
      trg.latest_entries_id = src.latest_entries_id
  )
UNION ALL
-- Data with unchanged datahash collected from target table
SELECT
  ...
FROM
  <catalog>.tgt.LATEST_ENTRIES trg
WHERE
  EXISTS (
    SELECT
      1
    FROM
      <catalog>.stage.STG_TAXI_ZONE_LOOKUP src
    WHERE
      trg.dv_datahash = src.<datahash formula of src>
  )
UNION ALL
  -- Data with changed datahash collected from source to temporary table
SELECT
  DISTINCT
  ...
FROM
  (
    SELECT
      ...
    FROM
      <catalog>.stage.STG_TAXI_ZONE_LOOKUP src_entity
  ) src
  -- LEFT OUTER JOIN is used if both DV_LOAD_TIME and DV_UPDATE_TIME existing (to store the initial insertion time)
  LEFT OUTER JOIN <catalog>.tgt.LATEST_ENTRIES trg on (
    trg.latest_entries_id = src.latest_entries_id
  )
  WHERE
  NOT EXISTS (
    SELECT
      1
    FROM
      <catalog>.tgt.LATEST_ENTRIES trg
    WHERE
      trg.dv_datahash = src.dv_datahash
  );

-- Gatekeeper is called to assure we are not emptying the target table
SELECT
  COUNT() AS gatekeeper
FROM
  <catalog>.tgt.Z_TMP_LATEST_ENTRIES src_entity
HAVING
  COUNT() > 0;

-- Target table is replaced with temporary table
INSERT OVERWRITE INTO <catalog>.tgt.LATEST_ENTRIES
SELECT
  *
FROM
  <catalog>.tgt.Z_TMP_LATEST_ENTRIES
;

-- Drop the temporary table
DROP TABLE IF EXISTS <catalog>.tgt.Z_TMP_LATEST_ENTRIES;
JavaScript errors detected

Please note, these errors can depend on your browser setup.

If this problem persists, please contact our support.