transformation_query

Primarily the content of the TRANSFORM load step, but if the TRANSFORM load step is not defined, resolved as the single source selection query based on entity load mapping and given load options.


See also:


Usage

<transformation_query>

Notes

Supported in all loads, but can not be used inside TRANSFORM load step.

Primarily the content of the TRANSFORM load step, but if the TRANSFORM load step is not defined, resolved as the single source selection query based on entity load mapping and given load options.

Suitable for cases where load has complex source entity selection, yet it required to be enhanced or reused multiple times within the use case.


Examples

Variable used in a OVERRIDE load step

OVERRIDE load step in S_SATELLITE entity:

SQL
WITH processed_source AS (
    <transformation_query>
)
INSERT INTO <target_schema>.<target_entity_name> (
    <target_entity_attribute_list>
)
SELECT 
    <target_entity_attribute_list>
FROM processed_source src
WHERE
    NOT EXISTS (
        SELECT
            1
        FROM (
            SELECT
                t1.<target_entity_attribute_by_dv_datahash>
                , t1.<target_entity_attribute_by_timeslot>
            FROM (
                SELECT
                    s.<target_entity_attribute_by_dv_datahash>
                    , s.<target_entity_attribute_by_timeslot>
                    , ROW_NUMBER()
                        OVER (
                            PARTITION BY
                                s.<target_entity_attribute_by_dv_referencing_hash_key>
                            ORDER BY
                                s.<target_entity_attribute_by_dv_load_time> DESC
                        )
                        AS
                            dv_load_time_last
                FROM
                    <target_schema>.<target_entity_name> s
                WHERE
                    EXISTS (
                        SELECT
                            <target_entity_attribute_by_dv_referencing_hash_key>
                        FROM
                            processed_source stg
                        WHERE
                            stg.<target_entity_attribute_by_dv_referencing_hash_key> = s.<target_entity_attribute_by_dv_referencing_hash_key>
                    )
            ) t1
            WHERE
                t1.dv_load_time_last = 1
        ) trg
        WHERE
            trg.<target_entity_attribute_by_dv_datahash> = src.<target_entity_attribute_by_dv_datahash>
            AND
            trg.<target_entity_attribute_by_timeslot> >= '<timeslot_start>'::timestamp
            AND
            trg.<target_entity_attribute_by_timeslot> <= '<timeslot_end>'::timestamp
    );

Load executed in the target database:

SQL
WITH processed_source AS (
    SELECT 
        dv_id
        , dv_load_time
        , dv_run_id
        , dv_datahash
        ...
        , eventtime
    FROM
         (
            SELECT
                MD5(UPPER(NVL(sdt_stage_id, '-1'))) AS dv_id
                , SYSDATE() AS dv_load_time
                , <targetrunid> AS dv_run_id
                , MD5(UPPER(NVL(CAST(sdt_stage_create_time AS VARCHAR), '-1') || '~' || NVL(sdt_stage_id, '-1') || '~' || NVL(sdt_stage_source_tech, '-1') || '~' || NVL(sdt_stage_source_type, '-1'))) AS dv_datahash
                ...
                , sdt_stage_create_time AS eventtime
                , ROW_NUMBER()
                    OVER (
                        PARTITION BY
                            dv_id
                        ORDER BY
                            sdt_stage_create_time DESC
                    )
                    AS
                        latest_key_entry
            FROM
                staging.STAGE src_entity
            WHERE
                sdt_stage_batch_id IN (<loadablerunids>)
        )
        WHERE
            latest_key_entry = 1
)
INSERT INTO rdv.S_SATELLITE (
    dv_id
    , dv_load_time
    , dv_run_id
    , dv_datahash
    ...
    , eventtime
)
SELECT 
    dv_id
    , dv_load_time
    , dv_run_id
    , dv_datahash
    ...
    , eventtime
FROM processed_source src
WHERE
    NOT EXISTS (
        SELECT
            1
        FROM (
            SELECT
                t1.dv_datahash
                , t1.eventtime
            FROM (
                SELECT
                    s.dv_datahash
                    , s.eventtime
                    , ROW_NUMBER()
                        OVER (
                            PARTITION BY
                                s.dv_id
                            ORDER BY
                                s.dv_load_time DESC
                        )
                        AS
                            dv_load_time_last
                FROM
                    rdv.S_SATELLITE s
                WHERE
                    EXISTS (
                        SELECT
                            dv_id
                        FROM
                            processed_source stg
                        WHERE
                            stg.dv_id = s.dv_id
                    )
            ) t1
            WHERE
                t1.dv_load_time_last = 1
        ) trg
        WHERE
            trg.dv_datahash = src.dv_datahash
            AND
            trg.eventtime >= '<timeslot_start>'::timestamp
            AND
            trg.eventtime <= '<timeslot_end>'::timestamp
    );