Nginx前端埋点数据采集

参考链接
http://blog.codinglabs.org/articles/how-web-analytics-data-collection-system-work.html

准备工作
1 . openresty
下载路径: http://openresty.org/cn/download.html

openresty安装
http://openresty.org/cn/installation.html

实验过程
需要收集数据的页面代码

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <script src="jquery-3.4.1.js"></script>
    <title>start页面</title>
    <script type="text/javascript">
        var _maq = _maq || [];
        _maq.push(['_setAccount','u-z1234']);
        (function() {
            var ma = document.createElement('script');
            ma.type = 'text/javascript';
            ma.async = true;
            ma.src = 'https://[服务器ip]/ma.js';
            var s = document.getElementsByTagName('script')[0];
            s.parentNode.insertBefore(ma, s);
        })();
    </script>
</head>
<body>
     <h1><a href="/end">点我跳转end页面</a></h1>
</body>
</html>

向服务器发送请求ma.js
ma.js 整合了页面需要采集的数据,然后用encodeURIComponent方法将字符串作为 URI 组件进行编码,用image对象携带参数向服务器发送请求,ma.js 文件放在Nginx根目录的HTML文件夹里就行

(function () {     
var params = {};     
//Document对象数据     
if(document) {
    params.domain = document.domain || '';          
    params.url = document.URL || '';          
    params.title = document.title || '';  
    params.referrer = document.referrer || '';  
    }    
//Window对象数据 
if(window && window.screen) { 
    params.sh = window.screen.height || 0;         
    params.sw = window.screen.width || 0; 
    params.cd = window.screen.colorDepth || 0; 
}    

//navigator对象数据

if(navigator) { 
        params.lang = navigator.language || '';  
}        

//解析_maq配置     
if(_maq) { 
    for(var i in _maq) { 
        switch(_maq[i][0]) {                 
            case '_setAccount': 
                      params.account = _maq[i][1]; 
                      break;                 
            default:                   
                break; 
            }    
        }    
    }    

//拼接参数串 
var args = '';      
for(var i in params) {         
    if(args != '') { 
            args += '||'; 
        }    
        args += i + '=' + encodeURIComponent(params[i]); 
}    
   
//通过Image对象请求后端脚本 
var img = new Image(1, 1);  
img.src = 'https://[服务器ip地址]/log.gif?' + args; })();

Nginx.conf 配置
Nginx 我这里支持HTTPS,所以有一些SSL配置,自己做实验可以将HTTPSserver里面的代码移到httpserver里,页面请求的ma.js 最后用image对象携带参数向服务器发送了请求,请求/log.gif ,nginx 匹配到 location /log.gif ,然后对cookie 做了处理,然后请求了一个内部的 /i-log ,然后location /i-log 处理参数args ,按照log_format 的格式输出到指定目录,然后返回空字符串,这样就采集到自己所需要的数据了


#user  nobody;
worker_processes  2;

#error_log  logs/error.log;
#error_log  logs/error.log  notice;
#error_log  logs/error.log  info;

#pid        logs/nginx.pid;


events {
    worker_connections  1024;
}


http {
    include       mime.types;
    default_type  application/octet-stream;

    #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
    #                  '$status $body_bytes_sent "$http_referer" '
    #                  '"$http_user_agent" "$http_x_forwarded_for"';

    log_format tick escape=json  
    "$msec||$remote_addr||$status||$body_bytes_sent||$u_domain||$u_url||$u_title||$u_referrer||$u_sh||$u_sw||$u_cd||$u_lang||$http_user_agent||$u_account"; 
    #access_log  logs/access.log  main;

    sendfile        on;
    #tcp_nopush     on;

    #keepalive_timeout  0;
    keepalive_timeout  65;

    #gzip  on;
    # HTTP server
    server {
        listen       80;
        server_name  localhost;
        #重定向
        rewrite ^(.*)$ https://$host$1 permanent;
        #charset koi8-r;

        #access_log  logs/host.access.log  main;
	}    
    
    # HTTPS server
    server {
        listen       443 ssl;
        server_name  localhost;
        root html;
        index  index.html index.thm;
	
        #ssl 参数配置
        ssl_certificate      /usr/local/openresty/nginx/cert/xxx.crt;
        ssl_certificate_key  /usr/local/openresty/nginx/cert/xxx.key;

        ssl_session_timeout  5m;

        ssl_ciphers  ECDHE-RSA-AES128-GCM-SHA256:ECDHE:ECDH:AES:HIGH:!NULL:!aNULL:!MD5:!ADH:!RC4;
        ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
        ssl_prefer_server_ciphers  on;

	

        location /log.gif { 
		#伪装成gif文件 
		default_type image/gif;     
		#本身关闭access_log,通过subrequest记录log     
		access_log off; 
		access_by_lua "
        -- 用户跟踪cookie名为__utrace
        local uid = ngx.var.cookie___utrace        
        if not uid then
            -- 如果没有则生成一个跟踪cookie,算法为md5(时间戳+IP+客户端信息)
            uid = ngx.md5(ngx.now() .. ngx.var.remote_addr .. ngx.var.http_user_agent)
        end 
        ngx.header['Set-Cookie'] = {'__utrace=' .. uid .. '; path=/'}
        if ngx.var.arg_domain then
        -- 通过subrequest到/i-log记录日志,将参数和用户跟踪cookie带过去
            ngx.location.capture('/i-log?' .. ngx.var.args .. '&utrace=' .. uid)
        end 
        ";    
	   
		#此请求资源本地不缓存 
		add_header Expires "Fri, 01 Jan 1980 00:00:00 GMT";     
		add_header Pragma "no-cache"; 
		add_header Cache-Control "no-cache, max-age=0, mustrevalidate"; 
		#返回一个1×1的空gif图片 
		empty_gif; 
        }
        
        location /i-log { 
        #内部location,不允许外部直接访问     
        internal; 
       
        #设置变量,注意需要unescape,来自ngx_set_misc模块 
        set_unescape_uri $u_domain $arg_domain;
        set_unescape_uri $u_url $arg_url;
        set_unescape_uri $u_title $arg_title;
        set_unescape_uri $u_referrer $arg_referrer;
        set_unescape_uri $u_sh $arg_sh;
        set_unescape_uri $u_sw $arg_sw;
        set_unescape_uri $u_cd $arg_cd;
        set_unescape_uri $u_lang $arg_lang;
        set_unescape_uri $u_account $arg_account;
        #打开日志  
        log_subrequest on; 
        #记录日志到ma.log 格式为tick 
        access_log /usr/local/openresty/nginx/nginx_logs/ma.log tick; 
       
        #输出空字符串 
        echo ''; 
        }

        #error_page  404              /404.html;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }

        # proxy the PHP scripts to Apache listening on 127.0.0.1:80
        #
        #location ~ \.php$ {
        #    proxy_pass   http://127.0.0.1;
        #}

        # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
        #
        #location ~ \.php$ {
        #    root           html;
        #    fastcgi_pass   127.0.0.1:9000;
        #    fastcgi_index  index.php;
        #    fastcgi_param  SCRIPT_FILENAME  /scripts$fastcgi_script_name;
        #    include        fastcgi_params;
        #}

        # deny access to .htaccess files, if Apache's document root
        # concurs with nginx's one
        #
        #location ~ /\.ht {
        #    deny  all;
        #}
    }
    


    # another virtual host using mix of IP-, name-, and port-based configuration
    #
    #server {
    #    listen       8000;
    #    listen       somename:8080;
    #    server_name  somename  alias  another.alias;

    #    location / {
    #        root   html;
    #        index  index.html index.htm;
    #    }
    #}


    
    #server {
    #    listen       443 ssl;
    #    server_name  localhost;

    #    ssl_certificate      cert.pem;
    #    ssl_certificate_key  cert.key;

    #    ssl_session_cache    shared:SSL:1m;
    #    ssl_session_timeout  5m;

    #    ssl_ciphers  HIGH:!aNULL:!MD5;
    #    ssl_prefer_server_ciphers  on;

    #    location / {
    #        root   html;
    #        index  index.html index.htm;
    #    }
    #}

}

每小时日志切分脚本

#!/bin/bash 
#安装目录下日志文件
log_path='/usr/local/openresty/nginx/nginx_logs/ma.log'
#需要保存的目录位置
new_log_path='/usr/local/openresty/nginx/nginx_logs_byhour'

#获取年月日小时
curr_time=`date -d "1 hour ago" +"%Y%m%d_%H%M%S"`

#将安装目录下的日志文件,移动到指定存储位置
mv $log_path $new_log_path/$curr_time.log

kill -USR1 `cat /usr/local/openresty/nginx/logs/nginx.pid`

定时任务

0 * * * * /root/nginx_cplog.sh

猜你喜欢

转载自blog.csdn.net/zZsSzss/article/details/103370184