一.环境搭建与配置
采用Hadoop-3.2.2+伪分布式环境,在win10操作系统下利用idea进行代码的编写,其中使用maven进行环境配置。
1.配置pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>java_Hadoop</groupId>
<artifactId>Design_Hadoop</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>2.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
<dependency>
<groupId>org.jfree.chart</groupId>
<artifactId>com.springsource.org.jfree.chart</artifactId>
<version>1.0.9</version>
</dependency>
<dependency>
<groupId>org.jfree</groupId>
<artifactId>com.springsource.org.jfree</artifactId>
<version>1.0.12</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>com.springsource.javax.servlet</artifactId>
<version>2.4.0</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>9</source>
<target>9</target>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.source>9</maven.compiler.source>
<maven.compiler.target>9</maven.compiler.target>
</properties>
</project>
如果下载太慢或报错,可以尝试更改maven的settings.xml文件,如果不知道该文件的存储位置,可以打开idea的设置查看,如下:
之后根据路径寻找到settings.xml,添加 配置镜像文件
<mirrors>
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
</mirror>
<mirror>
<id>central</id>
<mirrorOf>*</mirrorOf>
<name>Maven Repository Switchboard</name>
<url>https://repo1.maven.org/maven2</url>
</mirror>
</mirrors>
等待下载完毕。
2.在本机启动hadoop3.2.2并上传蔬菜.txt
具体的配置过程详见其他博主的博客,本文只从启动开始说起(注意前提是已经配置好hadoop以及nn和dn等等)
①.启动hadoop
启动命令行窗口(注意要以管理员身份运行)执行命令start-all.cmd,如果启动成功,则*输出如下
生成四个窗口,分别代表nn,dn,nm和rm
②.利用jps命令查看当前进程
③.上传蔬菜.txt至hdfs文件系统
蔬菜.txt是存储各蔬菜售卖信息情况的文本,示例如下:
上传该文本至hdfs,可选的命令很多,如copyFromLocal、put等
打开hadoop提供的html可视化页面localhost:9870/explorer.html#/,查看是否上传成功
至此,我们可以开始编写hadoop程序了!
二.问题与程序编写
Problem 1
统计各地区每一天大白菜、土豆的价格(柱状图)
Problem1代码
BarChart.java
package problem1;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartPanel;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
import org.jfree.chart.axis.ValueAxis;
import org.jfree.chart.labels.ItemLabelAnchor;
import org.jfree.chart.labels.ItemLabelPosition;
import org.jfree.chart.labels.StandardCategoryItemLabelGenerator;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.renderer.category.BarRenderer3D;
import org.jfree.data.category.CategoryDataset;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.ui.TextAnchor;
import java.awt.*;
public class BarChart {
ChartPanel frame1;
DefaultCategoryDataset dataset ;
JFreeChart chart;
public BarChart(){
dataset = new DefaultCategoryDataset();
}
public void plot(){
CategoryDataset set = this.dataset;
chart = ChartFactory.createBarChart3D(
"蔬菜", // 图表标题
"地区", // 目录轴的显示标签
"数量", // 数值轴的显示标签
set, // 数据集
PlotOrientation.VERTICAL, // 图表方向:水平、垂直
true, // 是否显示图例(对于简单的柱状图必须是false)
false, // 是否生成工具
true // 是否生成URL链接
);
//从这里开始
CategoryPlot plot=chart.getCategoryPlot();//获取图表区域对象
CategoryAxis domainAxis=plot.getDomainAxis(); //水平底部列表
domainAxis.setLabelFont(new Font("黑体",Font.PLAIN,10)); //水平底部标题
domainAxis.setTickLabelFont(new Font("宋体",Font.PLAIN,10)); //垂直标题
ValueAxis rangeAxis=plot.getRangeAxis();//获取柱状
rangeAxis.setLabelFont(new Font("黑体",Font.BOLD,15));
chart.getLegend().setItemFont(new Font("黑体", Font.BOLD, 15));
chart.getTitle().setFont(new Font("宋体",Font.BOLD,20));//设置标题字体
//到此为止,解决汉字乱码问题
BarRenderer3D renderer = new BarRenderer3D();
renderer.setBaseItemLabelGenerator(new StandardCategoryItemLabelGenerator());
renderer.setBaseItemLabelFont(new Font("黑体",Font.PLAIN,15));
renderer.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_CENTER));
renderer.setBaseItemLabelsVisible(true);
plot.setRenderer(renderer);//将修改后的属性值保存到图中
frame1=new ChartPanel(chart,true);
}
public void addItem(float value, String rowKey, String columnKey) {
this.dataset.addValue(value, rowKey, columnKey);
}
public void changeMain(String name){
this.chart.setTitle(name);
}
public ChartPanel getChartPanel(){
return frame1;
}
}
CountPrice.java
该类为启动类,运行此文件即可得到最终的运行结果
package problem1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import javax.swing.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
public class CountPrice {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//Job job = new Job(); 这种对象创建方法已弃用
Job job = Job.getInstance();//使用这种方法创建一个job对象
job.setJarByClass(CountPrice.class);
job.setJobName("Get some Max Price");
//指定输入输出路径
FileInputFormat.addInputPath(job,new Path("hdfs://localhost:9000/test/蔬菜.txt"));
FileOutputFormat.setOutputPath(job,new Path("./result1"));
job.setInputFormatClass(TextInputFormat.class);//采用默认切片类型
job.setPartitionerClass(HashPartitioner.class);//采用Hash分区函数
//确定map类型和reduce类型
job.setMapperClass(CountPrice_Mapper.class);
// job.setCombinerClass(MaxGrade_Reducer.class);
job.setReducerClass(CountPrice_Reducer.class);
job.setGroupingComparatorClass(GroupingComparator.class);
//设置map输出类型
job.setMapOutputKeyClass(Date_Area_Vegetable.class);
job.setMapOutputValueClass(FloatWritable.class);
//控制reduce类型的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
//waitForCompletion()方法提交作业并等待执行完成
job.waitForCompletion(true);
Configuration conf = new Configuration();
Path inFile = new Path("/user/95192/result1/part-r-00000");
FileSystem hdfs = FileSystem.get(conf);
FSDataInputStream inputStream = hdfs.open(inFile);
BufferedReader reader= new BufferedReader(new InputStreamReader(inputStream));
String str = reader.readLine();
BarChart chart_cabbage = new BarChart();
BarChart chart_potato = new BarChart();
while (str != null) {
System.out.println(str);
String[] values = str.split("\\s+");//以空格分隔出所有的value
String veg = values[0];
if (veg.equals("大白菜")){
String area = values[1];
String time = values[2];
float price = Float.parseFloat(values[3]);
chart_cabbage.addItem(price,time,area);
}else if(veg.equals("土豆")){
String area = values[1];
String time = values[2];
float price = Float.parseFloat(values[3]);
chart_potato.addItem(price,time,area);
}
str = reader.readLine();
}
chart_cabbage.plot();
chart_potato.plot();
chart_cabbage.changeMain("大白菜各地区每天销售量");
chart_potato.changeMain("土豆各地区每天销售量");
JFrame cabbage_frame=new JFrame("大白菜条图");
cabbage_frame.add(chart_cabbage.getChartPanel());
cabbage_frame.setBounds(50, 10, 1024, 768);
cabbage_frame.setVisible(true);
JFrame potato_frame=new JFrame("土豆条图");
potato_frame.add(chart_potato.getChartPanel());
potato_frame.setBounds(50, 10, 1024, 768);
potato_frame.setVisible(true);
}
}
CountPrice_Mapper.java
package problem1;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
public class CountPrice_Mapper extends Mapper<LongWritable,Text, Date_Area_Vegetable, FloatWritable>{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] values = line.split("\\s+");//以空格分隔出所有的value
Iterator<String> iterator = Arrays.asList(values).iterator();//获得数组构造器
//过滤掉产品序号和第一行
if(!iterator.next().equals("1")){
String name = iterator.next();
if(name.equals("大白菜") || name.equals("土豆")){
float date1 = Float.parseFloat(iterator.next());
float date2 = Float.parseFloat(iterator.next());;
float date3 = Float.parseFloat(iterator.next());;
float date4 = Float.parseFloat(iterator.next());;
float date5 = Float.parseFloat(iterator.next());;
//过滤掉批发市场名称
iterator.next();
String area = iterator.next()+iterator.next();
context.write(new Date_Area_Vegetable(area,"2014年1月1日",name),new FloatWritable(date1));
context.write(new Date_Area_Vegetable(area,"2014年1月2日",name),new FloatWritable(date2));
context.write(new Date_Area_Vegetable(area,"2014年1月3日",name),new FloatWritable(date3));
context.write(new Date_Area_Vegetable(area,"2014年1月4日",name),new FloatWritable(date4));
context.write(new Date_Area_Vegetable(area,"2014年1月5日",name),new FloatWritable(date5));
}
}
}
}
CountPrice_Reducer.java
package problem1;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class CountPrice_Reducer extends Reducer<Date_Area_Vegetable, FloatWritable,Text,FloatWritable> {
public void reduce(Date_Area_Vegetable tri, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException {
for (FloatWritable value : values) {
context.write(new Text(tri.getName() + " " + tri.getArea() + " " + tri.getDate()), value);
}
}
}
Date_Area_Vegetable.java
该类是一个三元组类,也为自定义的一个键
package problem1;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Date_Area_Vegetable implements WritableComparable<Date_Area_Vegetable> {
String area;
String date;
String name;
public Date_Area_Vegetable(String area, String date, String name){
this.area = area;
this.date = date;
this.name = name;
}
public Date_Area_Vegetable(){
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getArea() {
return area;
}
public void setArea(String area) {
this.area = area;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
@Override
public int compareTo(Date_Area_Vegetable o) {
if(!name.equals(o.getName())){
//蔬菜种类相同,那么就将其看作同一类
return name.compareTo(o.getName());
}else {
return 0;
}
}
//保证读写顺序相同
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(name);
dataOutput.writeUTF(area);
dataOutput.writeUTF(date);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
name = dataInput.readUTF();
area = dataInput.readUTF();
date = dataInput.readUTF();
}
}
GroupingComparator.java
该类为自定义的分组类
package problem1;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
//自定义分组类,重写compare()方法
public class GroupingComparator extends WritableComparator {
protected GroupingComparator() {
super(Date_Area_Vegetable.class, true);
}
public int compare(WritableComparable cc1, WritableComparable cc2) {
Date_Area_Vegetable l1 = (Date_Area_Vegetable) cc1;
Date_Area_Vegetable l2 = (Date_Area_Vegetable) cc2;
String l = l1.getName();
String r = l2.getName();
return l.compareTo(r);// 比较两个蔬菜名字
}
}
运行结果
Problem 2
选取一个城市,统计各蔬菜价格变化曲线(折线图)
Problem2代码
LineChart.java
package problem2;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartPanel;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.DateAxis;
import org.jfree.chart.axis.ValueAxis;
import org.jfree.chart.labels.ItemLabelAnchor;
import org.jfree.chart.labels.ItemLabelPosition;
import org.jfree.chart.labels.StandardXYItemLabelGenerator;
import org.jfree.chart.plot.XYPlot;
import org.jfree.chart.renderer.xy.XYItemRenderer;
import org.jfree.chart.renderer.xy.XYLineAndShapeRenderer;
import org.jfree.data.time.TimeSeries;
import org.jfree.data.time.TimeSeriesCollection;
import org.jfree.data.xy.XYDataset;
import org.jfree.ui.TextAnchor;
import java.awt.*;
import java.text.SimpleDateFormat;
public class LineChart {
TimeSeriesCollection lineDataset=new TimeSeriesCollection();
JFreeChart chart;
ChartPanel frame1;
public LineChart() {
}
public void plot(){
XYDataset xydataset = this.lineDataset;
chart = ChartFactory.createTimeSeriesChart("", "日期", "价格",xydataset, true, true, true);
XYPlot xyplot = (XYPlot) chart.getPlot();
DateAxis dateaxis = (DateAxis) xyplot.getDomainAxis();
dateaxis.setDateFormatOverride(new SimpleDateFormat("yyyy-MMM-dd"));
XYLineAndShapeRenderer xyLineAndShapeRenderer = (XYLineAndShapeRenderer) xyplot.getRenderer();
xyLineAndShapeRenderer.setBaseShapesVisible(true);
frame1=new ChartPanel(chart,true);
dateaxis.setLabelFont(new Font("黑体",Font.BOLD,11)); //水平底部标题
dateaxis.setTickLabelFont(new Font("宋体",Font.BOLD,12)); //垂直标题
xyplot.setRangeGridlinesVisible(true);
xyplot.setRangeGridlinePaint(Color.white);
ValueAxis rangeAxis=xyplot.getRangeAxis();//获取柱状
XYItemRenderer renderer1 = xyplot.getRenderer();
renderer1.setSeriesStroke(1,new BasicStroke(2.0f));
renderer1.setBaseStroke(new BasicStroke(2.0f));
//设置曲线显示各数据点的值
renderer1.setBaseItemLabelsVisible(true);
renderer1.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_CENTER));
renderer1.setBaseItemLabelGenerator(new StandardXYItemLabelGenerator());
renderer1.setBaseItemLabelFont(new Font("Dialog", 1, 12));
rangeAxis.setLabelFont(new Font("黑体",Font.BOLD,15));
chart.getLegend().setItemFont(new Font("黑体", Font.BOLD, 15));
chart.getTitle().setFont(new Font("宋体",Font.BOLD,20));//设置标题字体
}
public ChartPanel getChartPanel(){
return frame1;
}
public JFreeChart getChart(){
return this.chart;
}
public void changeMain(String name){
this.chart.setTitle(name);
}
public void addTimeSeries(TimeSeries timeSeries){
this.lineDataset.addSeries(timeSeries);
}
}
CityPrice.java
该类为启动类,运行此文件即可得到最终的运行结果,选取的城市是吉林长春
package problem2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.jfree.chart.ChartUtilities;
import org.jfree.chart.JFreeChart;
import org.jfree.data.time.Day;
import org.jfree.data.time.TimeSeries;
import javax.swing.*;
import java.io.*;
import java.util.HashSet;
public class CityPrice {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//Job job = new Job(); 这种对象创建方法已弃用
Job job = Job.getInstance();//使用这种方法创建一个job对象
job.setJarByClass(CityPrice.class);
job.setJobName("Get some Max Price");
//指定输入输出路径
FileInputFormat.addInputPath(job,new Path("hdfs://localhost:9000/test/蔬菜.txt"));
FileOutputFormat.setOutputPath(job,new Path("./result2"));
//确定map类型和reduce类型
job.setMapperClass(CityPrice_Mapper.class);
// job.setCombinerClass(MaxGrade_Reducer.class);
job.setReducerClass(CityPrice_Reducer.class);
//设置map输出类型
job.setMapOutputKeyClass(Date_Area_Vegetable.class);
job.setMapOutputValueClass(FloatWritable.class);
//控制reduce类型的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
//waitForCompletion()方法提交作业并等待执行完成
job.waitForCompletion(true);
Configuration conf = new Configuration();
Path inFile = new Path("/user/95192/result2/part-r-00000");
FileSystem hdfs = FileSystem.get(conf);
FSDataInputStream inputStream = hdfs.open(inFile);
BufferedReader reader= new BufferedReader(new InputStreamReader(inputStream));
String str = reader.readLine();
LineChart city_Line = new LineChart();
HashSet<String> n_set = new HashSet<>();
HashSet<TimeSeries> tn_set = new HashSet<>();
HashSet<String> c_set = new HashSet<>();
c_set.add("吉林长春");
String before = "吉林长春";
while (str != null) {
System.out.println(str);
String[] values = str.split("\\s+");//以空格分隔出所有的value
// if(!c_set.contains(values[0])){
if(!before.equals(values[0])){
c_set.add(values[0]);
city_Line.plot();
city_Line.changeMain(before+"销售量");
saveAsFile(city_Line.getChart(),"E:\\JAVA\\测试2021\\Result2\\"+before+"销售折线图.png",1024,768);
// before = values[0];
// tn_set = new HashSet<>();
// n_set = new HashSet<>();
// city_Line = new LineChart();
break;
}
String name = values[2];
if(!n_set.contains(name)){
n_set.add(name);
TimeSeries timeSeries=new TimeSeries(name, Day.class);
//tri.getArea() + " " + tri.getDate() + " " + tri.getName()
Day date = new Day(Integer.parseInt(values[1].substring(7,8)),
Integer.parseInt(values[1].substring(5,6)),
Integer.parseInt(values[1].substring(0,4)));
timeSeries.add(date,Float.parseFloat(values[3]));
city_Line.addTimeSeries(timeSeries);
tn_set.add(timeSeries);
}else{
for(TimeSeries tim : tn_set){
// System.out.println(tim.getKey());
if (((String)tim.getKey()).equals(name)){
Day date = new Day(Integer.parseInt(values[1].substring(7,8)),
Integer.parseInt(values[1].substring(5,6)),
Integer.parseInt(values[1].substring(0,4)));
tim.add(date,Float.parseFloat(values[3]));
break;
}
}
}
str = reader.readLine();
}
// city_Line.plot();
// city_Line.changeMain("销售量");
JFrame frame=new JFrame("销售折线图");
frame.add(city_Line.getChartPanel());
frame.setBounds(50, 10, 1024, 768);
// frame.pack();
frame.setVisible(true);
}
public static void saveAsFile(JFreeChart chart, String outputPath, int weight, int height) {
FileOutputStream out = null;
try {
File outFile = new File(outputPath);
if (!outFile.getParentFile().exists()) {
outFile.getParentFile().mkdirs();
}
out = new FileOutputStream(outputPath);
// 保存为PNG
ChartUtilities.writeChartAsPNG(out, chart, weight, height);
// 保存为JPEG
// ChartUtilities.writeChartAsJPEG(out, chart, weight, height);
out.flush();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
// do nothing
}
}
}
}
}
CityPrice_Mapper.java
package problem2;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
public class CityPrice_Mapper extends Mapper<LongWritable,Text, Date_Area_Vegetable, FloatWritable>{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] values = line.split("\\s+");//以空格分隔出所有的value
Iterator<String> iterator = Arrays.asList(values).iterator();//获得数组构造器
//过滤掉产品序号和第一行
if(!iterator.next().equals("1")){
String name = iterator.next();
float date1 = Float.parseFloat(iterator.next());
float date2 = Float.parseFloat(iterator.next());;
float date3 = Float.parseFloat(iterator.next());;
float date4 = Float.parseFloat(iterator.next());;
float date5 = Float.parseFloat(iterator.next());;
//过滤掉批发市场名称
iterator.next();
String area = iterator.next()+iterator.next();
context.write(new Date_Area_Vegetable(area,"2014年1月1日",name),new FloatWritable(date1));
context.write(new Date_Area_Vegetable(area,"2014年1月2日",name),new FloatWritable(date2));
context.write(new Date_Area_Vegetable(area,"2014年1月3日",name),new FloatWritable(date3));
context.write(new Date_Area_Vegetable(area,"2014年1月4日",name),new FloatWritable(date4));
context.write(new Date_Area_Vegetable(area,"2014年1月5日",name),new FloatWritable(date5));
}
}
}
City_Reducer.java
package problem2;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class CityPrice_Reducer extends Reducer<Date_Area_Vegetable, FloatWritable,Text,FloatWritable> {
public void reduce(Date_Area_Vegetable tri, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException {
for (FloatWritable value : values) {
context.write(new Text(tri.getArea() + " " + tri.getDate() + " " + tri.getName()), value);
}
}
}
Date_Area_Vegetable.java
该类是一个三元组类,也为自定义的一个键
package problem2;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Date_Area_Vegetable implements WritableComparable<Date_Area_Vegetable> {
String area;
String date;
String name;
public Date_Area_Vegetable(String area, String date, String name){
this.area = area;
this.date = date;
this.name = name;
}
public Date_Area_Vegetable(){
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getArea() {
return area;
}
public void setArea(String area) {
this.area = area;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
@Override
public int compareTo(Date_Area_Vegetable o) {
if(!area.equals(o.getArea())){
//地区相同,那么就将其看作同一类
return area.compareTo(o.getArea());
}else {
return 0;
}
}
//保证读写顺序相同
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(name);
dataOutput.writeUTF(area);
dataOutput.writeUTF(date);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
name = dataInput.readUTF();
area = dataInput.readUTF();
date = dataInput.readUTF();
}
}
GroupingComparator.java
该类为自定义的分组类
package problem2;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import problem1.Date_Area_Vegetable;
//自定义分组类,重写compare()方法
public class GroupingComparator extends WritableComparator {
protected GroupingComparator() {
super(problem1.Date_Area_Vegetable.class, true);
}
public int compare(WritableComparable cc1, WritableComparable cc2) {
problem1.Date_Area_Vegetable l1 = (problem1.Date_Area_Vegetable) cc1;
problem1.Date_Area_Vegetable l2 = (Date_Area_Vegetable) cc2;
String l = l1.getName();
String r = l2.getName();
return l.compareTo(r);// 比较两个蔬菜名字
}
}
运行结果
Problem 3
统计每种蔬菜价格最高的地区和日期(柱状图)
Problem3代码
BarChart.java
package problem3;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartPanel;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
import org.jfree.chart.axis.ValueAxis;
import org.jfree.chart.labels.ItemLabelAnchor;
import org.jfree.chart.labels.ItemLabelPosition;
import org.jfree.chart.labels.StandardCategoryItemLabelGenerator;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.renderer.category.BarRenderer3D;
import org.jfree.data.category.CategoryDataset;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.ui.TextAnchor;
import java.awt.*;
public class BarChart {
ChartPanel frame1;
DefaultCategoryDataset dataset ;
public BarChart(){
dataset = new DefaultCategoryDataset();
}
public void plot(){
CategoryDataset set = this.dataset;
JFreeChart chart = ChartFactory.createBarChart3D(
"蔬菜", // 图表标题
"蔬菜种类", // 目录轴的显示标签
"数量", // 数值轴的显示标签
set, // 数据集
PlotOrientation.VERTICAL, // 图表方向:水平、垂直
true, // 是否显示图例(对于简单的柱状图必须是false)
false, // 是否生成工具
true // 是否生成URL链接
);
//从这里开始解决汉字乱码问题
CategoryPlot plot=chart.getCategoryPlot();//获取图表区域对象
CategoryAxis domainAxis=plot.getDomainAxis(); //水平底部列表
domainAxis.setLabelFont(new Font("黑体",Font.PLAIN,10)); //水平底部标题
domainAxis.setTickLabelFont(new Font("宋体",Font.PLAIN,10)); //垂直标题
ValueAxis rangeAxis=plot.getRangeAxis();//获取柱状
rangeAxis.setLabelFont(new Font("黑体",Font.BOLD,15));
chart.getLegend().setItemFont(new Font("黑体", Font.BOLD, 15));
chart.getTitle().setFont(new Font("宋体",Font.BOLD,20));//设置标题字体
//
BarRenderer3D renderer = new BarRenderer3D();
renderer.setBaseItemLabelGenerator(new StandardCategoryItemLabelGenerator());
renderer.setBaseItemLabelFont(new Font("黑体",Font.PLAIN,15));
renderer.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_CENTER));
renderer.setBaseItemLabelsVisible(true);
plot.setRenderer(renderer);//将修改后的属性值保存到图中
frame1=new ChartPanel(chart,true); //这里也可以用chartFrame,可以直接生成一个独立的Frame
}
public void addItem(float value, String rowKey, String columnKey) {
this.dataset.addValue(value, rowKey, columnKey);
}
public ChartPanel getChartPanel(){
return frame1;
}
}
MaxPrice.java
该类为启动类,运行此文件即可得到最终的运行结果.
package problem3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import javax.swing.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
public class MaxPrice {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//Job job = new Job(); 这种对象创建方法已弃用
Job job = Job.getInstance();//使用这种方法创建一个job对象
job.setJarByClass(MaxPrice.class);
job.setJobName("Get some Max Price");
//指定输入输出路径
FileInputFormat.addInputPath(job,new Path("hdfs://localhost:9000/test/蔬菜.txt"));
FileOutputFormat.setOutputPath(job,new Path("./result3"));
//确定map类型和reduce类型
job.setMapperClass(MaxPrice_Mapper.class);
// job.setCombinerClass(MaxGrade_Reducer.class);
job.setReducerClass(MaxPrice_Reducer.class);
//设置map输出类型
job.setMapOutputKeyClass(Date_Area_Vegetable.class);
job.setMapOutputValueClass(FloatWritable.class);
//控制reduce类型的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
//waitForCompletion()方法提交作业并等待执行完成
job.waitForCompletion(true);
Configuration conf = new Configuration();
Path inFile = new Path("/user/95192/result3/part-r-00000");
FileSystem hdfs = FileSystem.get(conf);
FSDataInputStream inputStream = hdfs.open(inFile);
BufferedReader reader= new BufferedReader(new InputStreamReader(inputStream));
String str = reader.readLine();
BarChart chart_Area = new BarChart();
BarChart chart_Time = new BarChart();
while (str != null) {
System.out.println(str);
String[] values = str.split("\\s+");//以空格分隔出所有的value
String veg = values[0];
String area = values[1];
String time = values[2];
float price = Float.parseFloat(values[3]);
chart_Area.addItem(price,veg,area);
chart_Time.addItem(price,veg,time);
str = reader.readLine();
}
chart_Area.plot();
chart_Time.plot();
JFrame Time_frame=new JFrame("时间分类最大值条图");
Time_frame.add(chart_Time.getChartPanel());
Time_frame.setBounds(50, 10, 1024, 768);
Time_frame.setVisible(true);
JFrame Area_frame=new JFrame("地区分类最大值条图");
Area_frame.add(chart_Area.getChartPanel());
Area_frame.setBounds(50, 10, 1024, 768);
Area_frame.setVisible(true);
}
}
MaxPrice_Mapper.java
package problem3;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
public class MaxPrice_Mapper extends Mapper<LongWritable,Text, Date_Area_Vegetable, FloatWritable>{
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] values = line.split("\\s+");//以空格分隔出所有的value
Iterator<String> iterator = Arrays.asList(values).iterator();//获得数组构造器
//过滤掉产品序号和第一行
if(!iterator.next().equals("1")){
String name = iterator.next();
float date1 = Float.parseFloat(iterator.next());
float date2 = Float.parseFloat(iterator.next());;
float date3 = Float.parseFloat(iterator.next());;
float date4 = Float.parseFloat(iterator.next());;
float date5 = Float.parseFloat(iterator.next());;
//过滤掉批发市场名称
iterator.next();
String area = iterator.next()+iterator.next();
context.write(new Date_Area_Vegetable(area,"2014年1月1日",name),new FloatWritable(date1));
context.write(new Date_Area_Vegetable(area,"2014年1月2日",name),new FloatWritable(date2));
context.write(new Date_Area_Vegetable(area,"2014年1月3日",name),new FloatWritable(date3));
context.write(new Date_Area_Vegetable(area,"2014年1月4日",name),new FloatWritable(date4));
context.write(new Date_Area_Vegetable(area,"2014年1月5日",name),new FloatWritable(date5));
}
}
}
Max_Reducer.java
package problem3;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicReference;
public class MaxPrice_Reducer extends Reducer<Date_Area_Vegetable, FloatWritable,Text,FloatWritable> {
public void reduce(Date_Area_Vegetable tri, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException {
// AtomicFloat maxValue = new Atomic(Float.MIN_VALUE);//设置最大数值为Long.MIN_VALUE
AtomicReference<Float> maxValue = new AtomicReference<>(Float.MIN_VALUE);
values.forEach(value -> {
maxValue.set(Math.max(maxValue.get(), value.get()));//利用Math类的max方法不断更新maxValue
});
context.write(new Text(tri.getName()+" "+tri.getArea()+" "+tri.getDate()),new FloatWritable(maxValue.get()));//每个键输出一个最大键值
}
}
Date_Area_Vegetable.java
该类是一个三元组类,也为自定义的一个键
package problem3;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Date_Area_Vegetable implements WritableComparable<Date_Area_Vegetable> {
String area;
String date;
String name;
public Date_Area_Vegetable(String area, String date, String name){
this.area = area;
this.date = date;
this.name = name;
}
public Date_Area_Vegetable(){
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getArea() {
return area;
}
public void setArea(String area) {
this.area = area;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
@Override
public int compareTo(Date_Area_Vegetable o) {
if(!name.equals(o.getName())){
//蔬菜种类相同,那么就将其看作同一类
return name.compareTo(o.getName());
}else {
return 0;
}
}
//保证读写顺序相同
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(name);
dataOutput.writeUTF(area);
dataOutput.writeUTF(date);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
name = dataInput.readUTF();
area = dataInput.readUTF();
date = dataInput.readUTF();
}
}
GroupingComparator.java
该类为自定义的分组类
package problem2;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import problem1.Date_Area_Vegetable;
//自定义分组类,重写compare()方法
public class GroupingComparator extends WritableComparator {
protected GroupingComparator() {
super(problem1.Date_Area_Vegetable.class, true);
}
public int compare(WritableComparable cc1, WritableComparable cc2) {
problem1.Date_Area_Vegetable l1 = (problem1.Date_Area_Vegetable) cc1;
problem1.Date_Area_Vegetable l2 = (Date_Area_Vegetable) cc2;
String l = l1.getName();
String r = l2.getName();
return l.compareTo(r);// 比较两个蔬菜名字
}
}
运行结果