案例:
实现输入两个参数分别为开始日期(日期为6位数YYYYMM),输出开始时间和结束时间之间每隔12月的周期计数+1.
例如:
输入: UDT_10(‘20170201’,‘20321001’)
输出:±--------±------+
| col1 | col2 |
±--------±------+
| 201702 | 0 |
| 201703 | 0 |
| 201704 | 0 |
| 201705 | 0 |
| 201706 | 0 |
| 201707 | 0 |
| 201708 | 0 |
| 201709 | 0 |
| 201710 | 0 |
| 201711 | 0 |
| 201712 | 0 |
| 201801 | 0 |
| 201802 | 1 |
| 201803 | 1 |
| 201804 | 1 |
| 201805 | 1 |
| 201806 | 1 |
| 201807 | 1 |
| 201808 | 1 |
| 201809 | 1 |
| 201810 | 1 |
±--------±------+
package io.transwarp.udtf;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
public class DateProcess extends GenericUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
if (args.length < 2) {
throw new UDFArgumentTypeException();
}
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("col1");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col2");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] text) throws HiveException {
// TODO Auto-generated method stub
String beginTime = text[0].toString();
String endTime = text[1].toString();
List<String> linepre = new ArrayList<String>();
StringBuffer rowpre = new StringBuffer();
for (int i = 2; i < text.length; i++) {
linepre.add(text[i].toString());
}
List<String> allMonthList = new ArrayList<String>();
allMonthList = getMonthBetween(beginTime, endTime);
List<String> row = new ArrayList<String>();
row.addAll(linepre);//2个元素:201802 203209
//2个元素:201802 , 203209 , :
for (String line : allMonthList) {
//2个元素:201802 , 203209 ,
// row.add(line);
// forward(row.toArray());
String []str=line.split(" ");
row.add(str[0]);//201702
row.add(str[1]);//0
// row.add(str[0]);//201702
// row.add(str[1]);//0
forward(row.toArray());
row.remove(row.size() - 1);
row.remove(row.size() - 1);
}
// }
}
@Override
public void close() throws HiveException {
// TODO Auto-generated method stub
}
public static void main(String[] args) throws ParseException {
System.out.println(getMonthBetween("20170201", "20321001"));
System.out.println(getMonthBetween("20170201", "20321001").size());
// Object[] textObjects = { "20170801", "20180801" };
// System.out.println(textObjects[0].toString());
// System.out.println(textObjects[1].toString());
// try {
// new DateProcess().process(textObjects);
// } catch (HiveException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
}
public static List<String> getMonthBetween(String startDate, String endDate) {
int startYear = Integer.parseInt(startDate.substring(0, 4)); // startDate.substring(0,4);
int startMonth = Integer.parseInt(startDate.substring(4, 6));
int endYear = Integer.parseInt(endDate.substring(0, 4));
int endMonth = Integer.parseInt(endDate.substring(4, 6));
int k = -1;
List<String> list = new ArrayList<String>();
HashMap<String, Integer> map = new HashMap<String, Integer>();
while (endYear > startYear || (endYear == startYear && endMonth >= startMonth)) {
k++;
int outNum = k / 12;
map.put(startYear + "" + startMonth, outNum);
if (startMonth / 10 < 1) {
// System.out.println(startYear+"0"+startMonth+","+outNum);
list.add(startYear + "0" + startMonth + " " + outNum);
} else {
// System.out.println(startYear+""+startMonth+","+outNum);
list.add(startYear + "" + startMonth + " " + outNum);
}
if (startMonth == 12) {
startYear++;
startMonth = 1;
} else {
startMonth++;
}
}
return list;
}
}
create permanent function UDTF as 'io.transwarp.udtf.DateProcess' using jar 'hdfs://nameservice1/tmp/testjar/udtf_date_process.jar';
select UDTF('20170201','20321001') from system.dual;