Hive的UDTF开发实例.md

案例:
实现输入两个参数分别为开始日期(日期为6位数YYYYMM),输出开始时间和结束时间之间每隔12月的周期计数+1.

例如:
输入: UDT_10(‘20170201’,‘20321001’)
输出:±--------±------+
| col1 | col2 |
±--------±------+
| 201702 | 0 |
| 201703 | 0 |
| 201704 | 0 |
| 201705 | 0 |
| 201706 | 0 |
| 201707 | 0 |
| 201708 | 0 |
| 201709 | 0 |
| 201710 | 0 |
| 201711 | 0 |
| 201712 | 0 |
| 201801 | 0 |
| 201802 | 1 |
| 201803 | 1 |
| 201804 | 1 |
| 201805 | 1 |
| 201806 | 1 |
| 201807 | 1 |
| 201808 | 1 |
| 201809 | 1 |
| 201810 | 1 |
±--------±------+

package io.transwarp.udtf;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;

import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

public class DateProcess extends GenericUDTF {

	@Override
	public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
		if (args.length < 2) {
			throw new UDFArgumentTypeException();

		}
		ArrayList<String> fieldNames = new ArrayList<String>();
		ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
		fieldNames.add("col1");
		fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
		fieldNames.add("col2");
		fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
		return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);

	}

	@Override
	public void process(Object[] text) throws HiveException {
		// TODO Auto-generated method stub
		String beginTime = text[0].toString();
		String endTime = text[1].toString();
		List<String> linepre = new ArrayList<String>();
		StringBuffer rowpre = new StringBuffer();
		for (int i = 2; i < text.length; i++) {
			linepre.add(text[i].toString());
		}

		List<String> allMonthList = new ArrayList<String>();
		allMonthList = getMonthBetween(beginTime, endTime);
		List<String> row = new ArrayList<String>();
		row.addAll(linepre);//2个元素:201802 203209
		//2个元素:201802 , 203209 , :
		for (String line : allMonthList) {
			
			//2个元素:201802 , 203209 , 
//			row.add(line);
//			forward(row.toArray());
			String []str=line.split(" ");
			row.add(str[0]);//201702
			row.add(str[1]);//0
//			row.add(str[0]);//201702
//			row.add(str[1]);//0
			forward(row.toArray());
			row.remove(row.size() - 1);
			row.remove(row.size() - 1);

		}

		// }
	}

	@Override
	public void close() throws HiveException {
		// TODO Auto-generated method stub

	}

	public static void main(String[] args) throws ParseException {
		System.out.println(getMonthBetween("20170201", "20321001"));
		System.out.println(getMonthBetween("20170201", "20321001").size());
		// Object[] textObjects = { "20170801", "20180801" };
		// System.out.println(textObjects[0].toString());
		// System.out.println(textObjects[1].toString());
		// try {
		// new DateProcess().process(textObjects);
		// } catch (HiveException e) {
		// // TODO Auto-generated catch block
		// e.printStackTrace();
		// }
	}

	public static List<String> getMonthBetween(String startDate, String endDate) {

		int startYear = Integer.parseInt(startDate.substring(0, 4)); // startDate.substring(0,4);
		int startMonth = Integer.parseInt(startDate.substring(4, 6));
		int endYear = Integer.parseInt(endDate.substring(0, 4));
		int endMonth = Integer.parseInt(endDate.substring(4, 6));
		int k = -1;
		List<String> list = new ArrayList<String>();
		HashMap<String, Integer> map = new HashMap<String, Integer>();
		while (endYear > startYear || (endYear == startYear && endMonth >= startMonth)) {
			k++;

			int outNum = k / 12;
			map.put(startYear + "" + startMonth, outNum);
			if (startMonth / 10 < 1) {
				// System.out.println(startYear+"0"+startMonth+","+outNum);
				list.add(startYear + "0" + startMonth + " " + outNum);
			} else {
				// System.out.println(startYear+""+startMonth+","+outNum);
				list.add(startYear + "" + startMonth + " " + outNum);
			}

			if (startMonth == 12) {
				startYear++;
				startMonth = 1;
			} else {
				startMonth++;
			}
		}

		return list;
	}

}

create permanent function UDTF as 'io.transwarp.udtf.DateProcess' using jar 'hdfs://nameservice1/tmp/testjar/udtf_date_process.jar';
select UDTF('20170201','20321001') from system.dual;

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/qq_33202508/article/details/82852672
今日推荐