最近需要统计各种关键词,写txt是最基本也是最简单的,但是对于同类型下的相似的统计会生成大量txt文件,不是很友好,因此选择使用写入excle表单的方式来进行同类别不同子类的统计
使用的是c#的NPOI操作excle,对于NPOI,可以网上查找更多资料
需求:统计各个学科下所有期刊的关键词
读取excle表:
private Dictionary<string,string> readExcle()
{
Dictionary<string, string> subject_journal_dic = new Dictionary<string, string>();//学科 期刊字典
string importExcelPath = "E:\\*****.xlsx";//学科期刊表
IWorkbook readWorkbook = WorkbookFactory.Create(importExcelPath);
ISheet sheet = readWorkbook.GetSheetAt(0);//获取第一个工作薄
IRow Columns = (IRow)sheet.GetRow(0);//获取第一行,列名
//导出excel
for (int i = 1; i <= sheet.LastRowNum; i++)
{
IRow dataItem = (IRow)sheet.GetRow(i);
var journalId = dataItem.Cells[1].ToString();
if (string.IsNullOrEmpty(journalId))
break;
var subjectName = dataItem.Cells[8].ToString();
if (subject_journal_dic.ContainsKey(subjectName))
{
subject_journal_dic[subjectName] = subject_journal_dic[subjectName] + "+" + journalId;
}
else
{
subject_journal_dic.Add(subjectName, journalId);
}
}
return subject_journal_dic;
}
写入新表:
//写excle
public void start()
{
Dictionary<string, string> subject_journal_dic = readExcle();
string exportExcelPath = "e:\\*****.xlsx";
FileStream fs = new FileStream(exportExcelPath, FileMode.OpenOrCreate, FileAccess.ReadWrite);
var writeWorkbook = new XSSFWorkbook();
foreach (var item in subject_journal_dic)
{
Console.WriteLine($"处理学科:{item.Key}");
var writeSheet = writeWorkbook.CreateSheet(item.Key.Replace("/","-"));//创建表单
IRow writeColumns = writeSheet.CreateRow(0);//新建行,一般将第一行作为标题行
writeColumns.CreateCell(0).SetCellValue("关键词");
writeColumns.CreateCell(1).SetCellValue("词频");
var keywordsDic = new Dictionary<string, int>();
//获取学科下所有期刊的关键词,写入keywordsDic(自己实现,一般是连数据库查询)
//·······
int rowIndex = 1;
int icount = 0;
var sortedkeywordsDic = keywordsDic.OrderByDescending(c => c.Value);//对于关键词进行词频的排序
foreach (var keyword in sortedkeywordsDic)
{
if (icount >= 30)
break;
IRow newwriteColumns = writeSheet.CreateRow(rowIndex);//创建新行
rowIndex++;
newwriteColumns.CreateCell(0).SetCellValue(keyword.Key);//新行写入数据
newwriteColumns.CreateCell(1).SetCellValue(keyword.Value);//新行写入数据
icount++;
}
}
writeWorkbook.Write(fs);
fs.Close();
Console.WriteLine("处理完成");
Console.ReadKey();
}