递归调用搜索和多线程搜索文件效率对比

先上结果:
多线程版本输出:

used time is: 0.014919s
all thread is done
scan completed, there are 10740 file(s) and 211 document(s) total: 10951


递归调用版本输出:

open ../.cache/dconf/ failed, Error: Permission denied
used time is: 0.007569s
scan completed, there are 10740 file(s) and 211 document(s) total: 10951


可以看出来递归调用版本效率要优于多线程版本,下边是两个版本的源程序。

多线程版本:

#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <strings.h>
#include <iostream>
#include <vector>
#include <string.h>
#include <pthread.h>
#include <unistd.h>

#include <time.h>

using namespace std;

typedef string TASKLISTTYPEDEF;


vector <TASKLISTTYPEDEF> g_task_list;    ///> 任务列表
pthread_mutex_t g_mutex;             ///>全局互斥锁
const unsigned int g_max_jobs = 64;  ///>最大的线程数

/*
// 线程状态记录
// 0:线程挂起
// 1:线程正在工作
// 2:任务完成
*/
int g_threads_statu[g_max_jobs] = {0};

int g_total_file = 0;
int g_total_dir = 0;


int find_dir_file(const string path, int &file_cnt, int &dir_cnt, vector <TASKLISTTYPEDEF> &new_task_list);
void *dir_proc_pthread(void *arg);

//线程回调函数
//处理文件和目录计数
void *dir_proc_pthread(void *arg)
{
    int id = *(int*)arg;   ///>目录和文件的计数
    string dir;
    vector <TASKLISTTYPEDEF> new_task_list;
    int dir_cnt = 0;
    int file_cnt = 0;
    while(true)
    {
        pthread_mutex_lock(&g_mutex);    //上锁
        // 取任务
        // 任务为空,检查其它线程状态,如果有线程在工作则等待,如果都在等待则任务完成
        if(g_task_list.size() != 0)
        {
            dir = g_task_list.at(0);
            g_task_list.erase(g_task_list.begin());
            g_threads_statu[id] = 1;
        }
        else
        {
            for(int i = 0; i < g_max_jobs; i++)
            {
                if(1 == g_threads_statu[i])
                {
                    g_threads_statu[id] = 0;
                    break;                    
                }
                g_threads_statu[id] = 2;
            }
        }
        pthread_mutex_unlock(&g_mutex);     //解锁

        if(g_threads_statu[id] == 0)        // 挂起状态则等待1ms
        {
            usleep(1250);
        }
        else if(g_threads_statu[id] == 1)   // 工作状态按照取得的目录查找
        {
            find_dir_file(dir, file_cnt, dir_cnt, new_task_list);
            pthread_mutex_lock(&g_mutex);
            g_task_list.insert(g_task_list.end(), new_task_list.begin(), new_task_list.end());
            g_total_dir += dir_cnt;
            g_total_file += file_cnt;
            pthread_mutex_unlock(&g_mutex);
            usleep(1000);
        }
        else if(g_threads_statu[id] == 2)   // 完成状态则退出
        {
            pthread_exit(NULL);
        }

    }
}
//查找指定目录下的所有文件和目录
//目录打开出错后会自动结束查找过程
int find_dir_file(const string path, int &file_cnt, int &dir_cnt, vector <TASKLISTTYPEDEF> &new_task_list)
{
    DIR *dir = NULL;                ///>路径信息
    file_cnt = 0;
    dir_cnt = 0;
    new_task_list.clear();
    //取出文件夹列表中的第一个元素,然后移除
    dir = opendir(path.data());
    if(dir == NULL)
    {
        cout << "open " << path << " failed. ";
        fflush(stdout);
        perror("Error");
        return 0;
    }
    //读取目录下所有的文件夹和文件名称
    struct dirent *_dirent = NULL;// = readdir(dir);
    do
    {
        //读取当前目录下的一个文件或文件夹信息
        _dirent = readdir(dir);
        //到达当前目录末尾,跳出循环
        if(NULL == _dirent)
        {
            break;
        }
        //判断类型,如果是文件则打印目录,如果是目录则迭代进入查找
        if(DT_DIR == _dirent->d_type)
        {
            //如果是当前目录或是上级目录则跳过,否则进入目录
            if(0 == strcmp(_dirent->d_name, ".") || 0 == strcmp(_dirent->d_name, ".."))
            {
                continue;
            }
            else
            {
                dir_cnt++;
                string new_path = path + string(_dirent->d_name) + "/";
                new_task_list.push_back(new_path);
            }
        }
        else
        {
            file_cnt++;
        }
    }while(_dirent != NULL);
    if(dir != NULL)
    {
        closedir(dir);
    }
    return 0;
}

//主函数
int main(int arg, char *argc[])
{
    double start = clock();
    string path;    ///> 存放初始路径
    pthread_t threads_ids[g_max_jobs];  ///> 记录各个线程ID号
    int thread_num_table[g_max_jobs] = {0};   ///> 生成各个线程的编号
    //如果传入了指定目录则按照指定目录所搜,如果没有指定目录则按当前目录搜索
    if(argc[1] != NULL)
    {
        path = string(argc[1]);
    }
    else
    {
        path = string("./");
    }

    //初始化互斥锁
    pthread_mutex_init(&g_mutex, NULL);
    g_task_list.push_back(path);
    //创建所需要的线程,作为消费者
    for(int i = 0; i < g_max_jobs; i++)
    {
        thread_num_table[i] = i;
        g_threads_statu[i] = 0;
        pthread_create(&threads_ids[i], NULL, &dir_proc_pthread, (void*)&thread_num_table[i]);
    }

    //等待所有线程结束
    for(int i = 0; i < g_max_jobs; i++)
    {
        pthread_join(threads_ids[i], NULL);
    }
    double end = clock();
    cout << "used time is: " << (end - start) / CLOCKS_PER_SEC << endl;
    cout << "all thread is done" << endl;

    //输出查询结果
    cout << "scan completed, there are " << g_total_file << " file(s) and " << g_total_dir << " document(s)" \
         << " total: " << g_total_file + g_total_dir << endl;

    //销毁互斥锁
    pthread_mutex_destroy(&g_mutex);

    return 0;
}

//end of file

递归调用版本:

#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <time.h>

typedef struct _FILEDOCNUM{
    unsigned int filenum;
    unsigned int docnum;
}FILEDOCNUM;

FILEDOCNUM filedocnum = {0, 0};

int find_all_dir_file(const char *base_path)
{
    DIR *dir = NULL;                ///>路径信息
    char path[1024] = {0};          ///>拷贝当前进入目录
    char enter_path[1024] = {0};    ///>进入的下级目录

    //拷贝当前进入目录
    strcpy(path, base_path);
    //打开目录
    dir = opendir(path);
    //判断是否成功打开了目录
    if(dir == NULL)
    {
        printf("open %s failed, ", path);
        fflush(stdout);
        perror("Error");
        return 0;
    }

    //打开成功
    //读取目录下所有的文件夹和文件名称
    struct dirent *_dirent = NULL;// = readdir(dir);
    do
    {
        //读取当前目录下的一个文件或文件夹信息
        _dirent = readdir(dir);
        //到达当前目录末尾,跳出循环
        if(NULL == _dirent)
        {
            break;
        }
        //判断类型,如果是文件则打印目录,如果是目录则迭代进入查找
        if(DT_DIR == _dirent->d_type)
        {
            //如果是当前目录或是上级目录则跳过,否则进入目录迭代查找
            if(0 == strcmp(_dirent->d_name, ".") || 0 == strcmp(_dirent->d_name, ".."))
            {
                continue;
            }
            else
            {
                strcpy(enter_path, path);
                strcat(enter_path, _dirent->d_name);
                strcat(enter_path, "/");
                filedocnum.docnum++;
                // printf("%s\r\n", enter_path);
                find_all_dir_file(enter_path);
            }

        }
        else
        {
            filedocnum.filenum++;
            // printf("%s%s\r\n", path, _dirent->d_name);
        }  
    }while(_dirent != NULL);

    if(dir != NULL)
    {
        closedir(dir);
    }
    return 0;
}

int main(int arg, char *argc[])
{
    double start = clock();
    char path[1024];
    //如果传入了指定目录则按照指定目录所搜,如果没有指定目录则按当前目录搜索
    if(argc[1] != NULL)
    {
        strcpy(path, argc[1]);
    }
    else
    {
        strcpy(path, "./");
    }
    printf("begin to scan all file and path\r\n");
    find_all_dir_file(path);
    double end = clock();
    printf("used time is: %f\r\n", (end - start) / CLOCKS_PER_SEC);
    printf("scan completed, there are %d file(s) and %d document(s) total: %d\r\n", \
          filedocnum.filenum, filedocnum.docnum, filedocnum.filenum + filedocnum.docnum);
    return 0;
}

猜你喜欢

转载自blog.csdn.net/wolf_tong/article/details/80925078