17、Flink实战:容错机制(七)深入理解KeyedState,使用keyedState实现累加功能

直接上代码如下:

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class KeyedStateDeepReview {
    public static void main(String[] args) throws Exception {
        //1.创建一个 flink steam 程序的执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 重启相关配置
        // 只有开启了checkpointing,才会有重启策略
        env.enableCheckpointing(5000);    // 5秒为一个周期

//        // 设置最多重启3次,每次间隔两秒
        // 默认的重启策略是  延迟无限重启CheckpointConfig
        env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000));

        //为了实现Exactly_once,必须呀记录偏移量
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        //3. 创建DataStream
        DataStreamSource<String> lines = env.socketTextStream( "192.168.***.***", 8888);

        //Transformation(s) 对数据进行处理操作
        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(String word) {
                //将每个单词与 1 组合,形成一个元组
                return Tuple2.of(word, 1);
            }
        });

        //4. Transformation 进行分组聚合(keyBy:将key相同的分到一个组中)
        KeyedStream<Tuple2<String, Integer>, Tuple> keyedStream = wordAndOne.keyBy(0);

        // 使用 KeyedState 通过中间状态求和
        SingleOutputStreamOperator<Tuple2<String, Integer>> summed = keyedStream.map(new KeyedStateSum());

        //5.调用Sink (Sink必须调用)
        summed.print();

        //6. 启动
        env.execute("KeyedStateDeepReview");
    }
}
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;

public class KeyedStateSum extends RichMapFunction<Tuple2<String,Integer>, Tuple2<String,Integer>> {

    //状态数据不参与序列化,添加 transient 修饰
    private transient ValueState<Tuple2<String,Integer>> valueState;

    // open方法在完成构造方法后执行一次
    @Override
    public void open(Configuration parameters) throws Exception {
        ValueStateDescriptor<Tuple2<String,Integer>> stateDescriptor =
                new ValueStateDescriptor<>("wc-keyed-state", Types.TUPLE(Types.STRING, Types.INT));
        // 每一个组key都有自己的状态,无需特别指定是哪个word组的状态
        valueState = getRuntimeContext().getState(stateDescriptor);
    }

    @Override
    public Tuple2<String, Integer> map(Tuple2<String, Integer> value) throws Exception {
        //输入的单词
        String word = value.f0;
        //输入的次数
        Integer count = value.f1;
        //根据State获取中间数据
        Tuple2<String,Integer> historyKV = valueState.value();

        //根据State中间数据,进行累加
        if (historyKV != null) {
            historyKV.f1 += count;
            //累加后,更新State数据
            valueState.update(historyKV);
        } else {
            // 更新历史数据
            valueState.update(value);
        }
//                return historyKV;  //这样写会报空指针异常
//                return Tuple2.of(word,valueState.value().f1);
        return valueState.value();
    }

}

测试结果:成功实现累加功能

*

版权声明:本文不是「本站」原创文章,版权归原作者所有 | 原文地址: