vertica自定义函数

Vertica实现mysql函数substring_index:

package com.yy.vertica;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;

import com.vertica.sdk.BlockReader;
import com.vertica.sdk.BlockWriter;
import com.vertica.sdk.ColumnTypes;
import com.vertica.sdk.DestroyInvocation;
import com.vertica.sdk.ScalarFunction;
import com.vertica.sdk.ScalarFunctionFactory;
import com.vertica.sdk.ServerInterface;
import com.vertica.sdk.SizedColumnTypes;
import com.vertica.sdk.UdfException;
import com.vertica.sdk.VerticaType;

/**
 * vertica udf : substring_index
 *
 */
public class UDFSubStringIndexFactory extends ScalarFunctionFactory
{

	@Override
	public ScalarFunction createScalarFunction(ServerInterface arg0) {
		return new SubStringIndex();
	}
	

	public class  SubStringIndex extends ScalarFunction{

		@Override
		public void processBlock(ServerInterface serverInterface, BlockReader argReader, BlockWriter argWriter) throws UdfException, DestroyInvocation {
			//see how many arguments were passed in
			int numCols = argReader.getNumCols();
			
			//check argument nums
			if (numCols != 3) {
				throw new UdfException(0, "Must supply 3 arguments:String input, String stripChars, int index");
			}
			
			//make sure input columns : String input, String stripChars, int index
			SizedColumnTypes inTypes = argReader.getTypeMetaData();
			VerticaType firstParamType = inTypes.getColumnType(0);
			VerticaType secondParamType = inTypes.getColumnType(1);
			VerticaType thirdParamType = inTypes.getColumnType(2);
			if (!firstParamType.isStringType() && !secondParamType.isStringType() && !thirdParamType.isInt()) {
				throw new UdfException(0, "make sure input columns is : String input, String stripChars, int index");
			}
			
			String paramString = argReader.getString(0);
			String stripChars =  argReader.getString(1);
			int index = NumberUtils.toInt(Long.toString(argReader.getLong(2)));
			argWriter.setString(evaluate(paramString, stripChars, index));
		}
		
		public String evaluate(String input, String stripChars, int index) {
			String[] al = StringUtils.split(input, stripChars);
			if (al == null || stripChars == null || index == 0) {
				return null;
			}
			int indexAbs = Math.abs(index)>=al.length ? al.length: Math.abs(index);
			String[] result = new String[indexAbs];
			List<String> tmp = Arrays.asList(al);
			if (index > 0) {
				System.arraycopy(tmp.toArray(), 0, result, 0, indexAbs);
				return StringUtils.join(result, stripChars);
			}
			// 反向取值
			Collections.reverse(tmp);
			System.arraycopy(tmp.toArray(), 0, result, 0, indexAbs);
			List<String> res = Arrays.asList(result);
			Collections.reverse(res);
			return StringUtils.join(res, stripChars);
		}
		
	}
	
	@Override
	public void getPrototype(ServerInterface serverInterface, ColumnTypes argTypes, ColumnTypes returnType) {
		// Accepts any number and type or arguments. The ScalarFunction
        // class handles parsing the arguments.
		argTypes.addVarchar();
		argTypes.addVarchar();
		argTypes.addInt();
		returnType.addVarchar();
	}
	
	@Override
	public void getReturnType(ServerInterface srvInterface, final SizedColumnTypes argTypes, SizedColumnTypes returnType){
		VerticaType type = argTypes.getColumnType(0);
		returnType.addVarchar(type.getStringLength());
	}
}

 结果输出

Vmart=> select substring_index('a/b/c','/',2);
-[ RECORD 1 ]---+----
substring_index | a/b

Vmart=> select substring_index('a/b/c','/',3);
-[ RECORD 1 ]---+------
substring_index | a/b/c

Vmart=> select substring_index('a/b/c','/',1);
-[ RECORD 1 ]---+--
substring_index | a

注意点:

abstract void com.vertica.sdk.UDXFactory.getReturnType ( ServerInterface srvInterface, SizedColumnTypes argTypes,

SizedColumnTypes returnType ) throws UdfException [pure virtual]

Function to tell Vertica what the return types (and length/precision if necessary) of this UDX are.

For CHAR/VARCHAR types, specify the max length,

For NUMERIC types, specify the precision and scale.

For Time types (with or without time zone), specify the precision, -1 means unspecified/don’t care

For IntervalYM/IntervalDS types, specify the precision and range

For all other types, no length/precision specification needed

字符串返回值需要指定返回长度。

创建自定义函数分两个步骤:

1、创建lib

2、创建function

Vmart=> SELECT SET_CONFIG_PARAMETER('JavaBinaryForUDx','/usr/bin/java');
Vmart=> create LIBRARY verticaextlib as '/home/dbadmin/verticaext.jar' language 'Java';
CREATE LIBRARY
Vmart=> create function substring_index as language 'Java' name 'com.yy.vertica.UDFSubStringIndexFactory' librARY verticaextlib;
CREATE FUNCTION

猜你喜欢

转载自yugouai.iteye.com/blog/2092477