flink achieve parallel counter

1, the inspiration comes from the counter flink achieve Hadoop MapReduce computational framework's concept.

flink counted by the parallel interface realized Accumulator. Flink parallel management is implemented.

public interface Accumulator<V, R extends Serializable> extends Serializable, Cloneable 

The result of the counting by JobExecutionResul getAccumulatorResult T acquisition method.

2, an example, in the ordinary process flow field count empty, empty fields including null, spaces, TAB and so on. This scenario is more common.

public class EmptyFieldsCountAccumulator {
    private static final String EMPTY_FIELD_ACCUMULATOR= "empty-fields";

    public static void main(String args[]) throws Exception{
        final ParameterTool params = ParameterTool.fromArgs(args);

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // make parameters available in the web interface
        env.getConfig().setGlobalJobParameters(params);

        // get the data set
        final DataSet<StringTriple> file = getDataSet(env, params);

        // filter lines with empty fields
        final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

        // Here, we could do further processing with the filtered lines...
        JobExecutionResult result;
        // output the filtered lines
        if (params.has("output")) {
            filteredLines.writeAsCsv(params.get("output"));
            // execute program
            result = env.execute("Accumulator example");
        } else {
            System.out.println("Printing result to stdout. Use --output to specify output path.");
            filteredLines.print();
            result = env.getLastJobExecutionResult();
        }

        // get the accumulator result via its registration key
        final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
        System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
    }

    @SuppressWarnings("unchecked")
    private static DataSet<StringTriple> getDataSet(ExecutionEnvironment env, ParameterTool params) {
        if (params.has("input")) {
            return env.readCsvFile(params.get("input"))
                    .fieldDelimiter(";")
                    .pojoType(StringTriple.class);
        } else {
            System.out.println("Executing EmptyFieldsCountAccumulator example with default input data set.");
            System.out.println("Use --input to specify file input.");
            return env.fromCollection(getExampleInputTuples());
        }
    }

    private static Collection<StringTriple> getExampleInputTuples() {
        Collection<StringTriple> inputTuples = new ArrayList<StringTriple>();
        inputTuples.add(new StringTriple("John", "Doe", "Foo Str."));
        inputTuples.add(new StringTriple("Joe", "Johnson", ""));
        inputTuples.add(new StringTriple(null, "Kate Morn", "Bar Blvd."));
        inputTuples.add(new StringTriple("Tim", "Rinny", ""));
        inputTuples.add(new StringTriple("Alicia", "Jackson", "  "));
        inputTuples.add(new StringTriple("Alicia", "Jackson", "  "));
        inputTuples.add(new StringTriple("Alicia", "Jackson", "  "));
        inputTuples.add(new StringTriple("Tom", "Jackson", "A"));
        inputTuples.add(new StringTriple("Amy", "li", "B  "));
        return inputTuples;
    }

    /**
     * This function filters all incoming tuples that have one or more empty fields.
     * In doing so, it also counts the number of empty fields per attribute with an accumulator (registered under
     * {@link EmptyFieldsCountAccumulator#EMPTY_FIELD_ACCUMULATOR}).
     */
    public static final class EmptyFieldFilter extends RichFilterFunction<StringTriple> {

        // create a new accumulator in each filter function instance
        // accumulators can be merged later on
        private final VectorAccumulator emptyFieldCounter = new VectorAccumulator();

        @Override
        public void open(final Configuration parameters) throws Exception {
            super.open(parameters);

            // register the accumulator instance
            getRuntimeContext().addAccumulator(EMPTY_FIELD_ACCUMULATOR,
                    this.emptyFieldCounter);
        }

        @Override
        public boolean filter(final StringTriple t) {
            boolean containsEmptyFields = false;

            // iterate over the tuple fields looking for empty ones
            for (int pos = 0; pos < t.getArity(); pos++) {

                final String field = t.getField(pos);
                if (field == null ||. field.trim () isEmpty ()) { 
                    containsEmptyFields = to true ; 

                    // IF Field IS AN encountered empty, The Update
                     // ACC with 
                    the this .emptyFieldCounter.add (POS); 
                } 
            } 

            return ! containsEmptyFields; 
        } 
    } 

    / ** 
     * Maintains A vector of ACC with the this Counts. the Calling { @link #add (integer) the Increments} 
     * <I> n-</ I> Component -th vector. the vector of the size of Managed iS Automatically. 
     * the integer counter input vector the output is a List, press position count field, List field count is in the index position, the value is the count result 
     * /
    public static class VectorAccumulator implements Accumulator<Integer,ArrayList<Integer>>{
        //存储计数器向量
        private final ArrayList<Integer> resultVector;

        public VectorAccumulator() {
            this(new ArrayList<>());
        }

        public VectorAccumulator(ArrayList<Integer> resultVector) {
            this.resultVector = resultVector;
        }

        private void updateResultVector(int position,intDelta) {
             // If the position vector is given is not enough to expand the container 
            the while ( the this .resultVector.size () <= position) {
                 the this .resultVector.add (0 ); 
            } 

            Final  int Component = the this .resultVector.get (position );
             the this .resultVector.set (position, Component + Delta); 
        } 

        // adds 1 to the designated position 
        @Override
         public  void the add (Integer position) { 
            updateResultVector (position, 1 ); 
        } 

        @Override 
        publicThe ArrayList <Integer> getLocalValue () {
             return  the this .resultVector; 
        } 

        @Override 
        public  void resetLocal () {
             the this .resultVector.clear (); 
        } 

        @Override 
        public  void Merge (Accumulator <Integer, the ArrayList <Integer >> OTHER) {
             / / merging two vectors counters containers, the container according to the index merging 
            Final the ArrayList <Integer> = otherVector other.getLocalValue ();
             for ( int I = 0; I <otherVector.size (); I ++ ) { 
                updateResultVector (I, otherVector. get (i)); 
            } 
        } 

        @Override
        public Accumulator<Integer, ArrayList<Integer>> clone() {
            return new VectorAccumulator(new ArrayList<>(this.resultVector));
        }

        @Override
        public String toString() {
            return StringUtils.join(this.resultVector,':');
        }
    }


    public static class StringTriple extends Tuple3<String, String, String> {

        public StringTriple() {}

        public StringTriple(String f0, String f1, String f2) {
            super(f0, f1, f2);
        }

    }

}

 

 

 

Guess you like

Origin www.cnblogs.com/asker009/p/11220905.html