CDC with NiFi, Kafka Connect, Flink SQL, Cloudera Data in Motion
https://youtube.com/shorts/dBh8Aig-JVU?feature=share
https://github.com/tspannhw/FLaNK-CDC/blob/main/flinkcdc.MD
https://github.com/tspannhw/FLaNK-CDC/blob/main/kafkacdc.md
- https://medium.com/cloudera-inc/cdc-not-cat-data-capture-e43713879c03
- https://dzone.com/articles/streaming-change-data-capture-data-two-ways
- https://attend.cloudera.com/skillupseriesseptember14
- https://www.slideshare.net/bunkertor/partnerskillupenable-a-streaming-cdc-solution
- https://docs.cloudera.com/cdf-datahub/7.2.17/sa-overview/topics/csa-overview.html
- https://docs.cloudera.com/cdf-datahub/7.2.14/ssb-querying-data/topics/csa-ssb-kafka-kudu-join.html
- https://blog.cloudera.com/projects-in-sql-stream-builder/
- https://github.com/asdaraujo/edge2ai-workshop/blob/trunk/workshop_nifi.adoc
- https://github.com/asdaraujo/edge2ai-workshop/blob/trunk/workshop_cdc.adoc
- https://blog.cloudera.com/using-dead-letter-queues-with-sql-stream-builder/
- https://blog.cloudera.com/materialized-views-in-sql-stream-builder/
- https://blog.cloudera.com/job-notifications-in-sql-stream-builder/
- https://blog.cloudera.com/sql-streambuilder-data-transformations/
- https://blog.cloudera.com/streaming-ingestion-for-apache-iceberg-with-cloudera-stream-processing/
- https://blog.cloudera.com/a-ui-that-makes-you-want-to-stream/
- https://blog.cloudera.com/enriching-streams-with-hive-tables-via-flink-sql/
- https://blog.cloudera.com/getting-started-with-cloudera-stream-processing-community-edition/
- https://blog.cloudera.com/fraud-detection-with-cloudera-stream-processing-part-2-real-time-streaming-analytics/
- https://blog.cloudera.com/fraud-detection-with-cloudera-stream-processing-part-1/
- https://blog.cloudera.com/turning-streams-into-data-products/
- https://github.com/apache/bahir-flink/blob/master/flink-connector-kudu/README.md
- https://docs.cloudera.com/csa/1.11.0/how-to-flink/topics/csa-kudu-catalog.html
- https://docs.cloudera.com/csa/1.11.0/how-to-ssb/topics/csa-ssb-kafka-kudu-join.html
- https://docs.cloudera.com/csa/1.11.0/how-to-flink/topics/csa-kudu-sink.html
- https://blog.cloudera.com/projects-in-sql-stream-builder/
`kudu`.`default_database`.`default.sensors`
CREATE TABLE `ssb`.`ssb_default`.`iotenriched` (
`sensor_id` BIGINT,
`sensor_ts` BIGINT,
`is_healthy` BIGINT,
`response` ROW<`result` BIGINT>,
`sensor_0` BIGINT,
`sensor_1` BIGINT,
`sensor_2` BIGINT,
`sensor_3` BIGINT,
`sensor_4` BIGINT,
`sensor_5` BIGINT,
`sensor_6` BIGINT,
`sensor_7` BIGINT,
`sensor_8` BIGINT,
`sensor_9` BIGINT,
`sensor_10` BIGINT,
`sensor_11` BIGINT,
`eventTimestamp` TIMESTAMP(3) WITH LOCAL TIME ZONE METADATA FROM 'timestamp',
WATERMARK FOR `eventTimestamp` AS `eventTimestamp` - INTERVAL '3' SECOND
) WITH (
'deserialization.failure.policy' = 'ignore_and_log',
'properties.request.timeout.ms' = '120000',
'format' = 'json',
'properties.bootstrap.servers' = 'cdp.52.23.78.5.nip.io:9092',
'connector' = 'kafka',
'properties.transaction.timeout.ms' = '900000',
'topic' = 'iot_enriched',
'scan.startup.mode' = 'group-offsets',
'properties.auto.offset.reset' = 'earliest',
'properties.group.id' = 'iotenrichedssbreader1'
)
insert into `kudu`.`default_database`.`default.sensors`
select cast(iotenriched.sensor_id as int) as sensor_id,
iotenriched.sensor_ts,
cast(iotenriched.sensor_0 as DOUBLE) as sensor_0,
cast(iotenriched.sensor_1 as DOUBLE) as sensor_1,
cast(iotenriched.sensor_2 as DOUBLE) as sensor_2,
cast(iotenriched.sensor_3 as DOUBLE) as sensor_3,
cast(iotenriched.sensor_4 as DOUBLE) as sensor_4,
cast(iotenriched.sensor_5 as DOUBLE) as sensor_5,
cast(iotenriched.sensor_6 as DOUBLE) as sensor_6,
cast(iotenriched.sensor_7 as DOUBLE) as sensor_7,
cast(iotenriched.sensor_8 as DOUBLE) as sensor_8,
cast(iotenriched.sensor_9 as DOUBLE) as sensor_9,
cast(iotenriched.sensor_10 as DOUBLE) as sensor_10,
cast(iotenriched.sensor_11 as DOUBLE) as sensor_11,
cast(iotenriched.is_healthy as INT) as is_healthy
from iotenriched