centos7 安装部署airflow

版权声明:本文为博主原创文章,转载请注明出处 https://blog.csdn.net/vkingnew/article/details/86507597
运行环境:
python 3.6.6
apache airflow 1.10.1
mysql 5.7.24


centos7:
--准备工作:
rpm -ivh https://centos7.iuscommunity.org/ius-release.rpm
yum install epel-release

或者
rpm -ivh https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-11.noarch.rpm
--安装python36版本:
# yum -y  install python36u-pip python36 python36-libs
-- 升级pip版本:
# pip3.6 install --upgrade pip

# pip --version
pip 18.1 from /usr/local/lib/python3.6/site-packages/pip (python 3.6)
# pip list
Package    Version
---------- -------
pip        18.1   
setuptools 39.0.1 

--安装依赖的软件包:
# yum -y install gcc gcc-c++ cyrus-sasl cyrus-sasl-devel cyrus-sasl-lib

--安装mysql:
# wget http://repo.mysql.com/mysql57-community-release-el7-11.noarch.rpm
# rpm -ivh http://repo.mysql.com/mysql57-community-release-el7-11.noarch.rpm

# yum -y install mysql-community-server  mysql-community-devel mysql-community-common
--配置文件:
#cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/tmp/mysql.sock
default-storage-engine          =InnoDB
character_set_server            = utf8mb4
character-set-client-handshake    = FALSE
character-set-server            = utf8mb4
collation-server                = utf8mb4_unicode_ci
init_connect                    ='SET NAMES utf8mb4'
lower_case_table_names        = 1
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
#skip-grant-tables 
plugin-load=validate_password.so 
validate-password=OFF

log-error=/var/log/mysqld.log
pid-file=/tmp/mysqld.pid
explicit_defaults_for_timestamp=true
-- 登录数据库:
systemctl start mysqld
#mysql -p -S /tmp/mysql.sock

--安装完成之后创建库和用户密码:

mysql> alter user root@'localhost' identified by 'xyz';
Query OK, 0 rows affected (0.00 sec)

mysql> grant all privileges on *.* to root@'%' identified by 'xyz' with grant option;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)

 create database airflow;
create user 'airflow'@'%' identified by 'airflow';
create user 'airflow'@'localhost' identified by 'airflow';
grant all on airflow.* to 'airflow'@'%';
grant all privileges on *.* to 'airflow'@'%';
flush privileges;

--查询airflow的文件:
# pip show --files apache-airflow 
--airflow安装的路径:
# find / -name airflow
/usr/local/bin/airflow
/usr/local/lib/python3.6/site-packages/airflow
/usr/local/lib/python3.6/site-packages/airflow/bin/airflow
/usr/local/lib/python3.6/site-packages/airflow/www/templates/airflow
/usr/local/lib/python3.6/site-packages/airflow/www_rbac/templates/airflow

一般的安装路径位于:
airflow会被安装到Python下的第三方包中,路径一般为${PYTHON_HOME}/lib/python3.6/sit-packages/airflow.

---创建airflow的配置文件:
# mkdir -p /etc/airflow
# cat /etc/airflow/airflow.cfg 
sql_alchemy_conn = mysql://airflow:airflow@localhost:3306/airflow

设置airflow的文件路径:
# export AIRFLOW_HOME=/etc/airflow
--初始化数据库:

# yum -y install mysql-community-server  mysql-community-devel


---安装airflow软件:
#export SLUGIFY_USES_TEXT_UNIDECODE=yes
# pip3 install apache-airflow[all]

--版本查询:
# pip list | grep -i airflow
apache-airflow             1.10.1     
--初始化:
export AIRFLOW_HOME=/etc/airflow
# airflow  initdb


--启动airflow:
nohup airflow scheduler &

nohup airflow webserver -p 8080 &

默认使用8080端口。


---附加:
使用用户和密码登录:
0.安装插件:
#pip install apache-airflow[password]
1.配置文件修改 airflow.cfg
[webserver]
authenticate = True
auth_backend = airflow.contrib.auth.backends.password_auth
2.python设置:
import airflow  
from airflow import models, settings  
from airflow.contrib.auth.backends.password_auth import PasswordUser  
user = PasswordUser(models.User())  
user.username = 'admin'   
user.email = '[email protected]'    
user.password = 'admin'    
session = settings.Session()  
session.add(user)  
session.commit()  
session.close()  
exit()

-- 修改时间:
Airflow默认的时间是GMT时间,比北京时间早8小时。
1.
2.修改webserver界面右上角当前时间:
vim /usr/local/lib/python3.6/site-packages/airflow/www/templates/admin/master.html 
将行内容修改为:
//var UTCseconds = (x.getTime() + x.getTimezoneOffset()*60*1000);
如下:
    var UTCseconds = x.getTime() //+ x.getTimezoneOffset()*60*1000);
3.修改webserver lastRun时间
在方法get_last_dagrun之前加上方法utc2local:如下

def utc2local(self,utc):
        import time
        epoch = time.mktime(utc.timetuple())
        offset = datetime.fromtimestamp(epoch) - datetime.utcfromtimestamp(epoch)
        return utc + offset

    @provide_session
    def get_last_dagrun(self, session=None, include_externally_triggered=False):
       DR = DagRun
        qry = session.query(DR).filter(
            DR.dag_id == self.dag_id,
        )
        if not include_externally_triggered:
            qry = qry.filter(DR.external_trigger.__eq__(False))

        qry = qry.order_by(DR.execution_date.desc())

        last = qry.first()

        return last
4.修改文件:/usr/local/lib/python3.6/site-packages/airflow/www/templates/airflow/dags.html 中
 last_run.execution_date..strftime("%Y-%m-%d %H:%M")和last_run.start_date.strftime("%Y-%m-%d %H:%M")分别为:

dag.utc2local(last_run.execution_date).strftime("%Y-%m-%d %H:%M")
dag.utc2local(last_run.start_date).strftime("%Y-%m-%d %H:%M")

示例如下:
之前的代码:
<td class="text-nowrap latest_dag_run {{ dag.dag_id }}">
                  {% if dag %}
                    {% set last_run = dag.get_last_dagrun(include_externally_triggered=True) %}
                    {% if last_run and last_run.execution_date %}
                      <a href="{{ url_for('airflow.graph', dag_id=dag.dag_id, execution_date=last_run.execution_date) }}">
                        {{ last_run.execution_date.strftime("%Y-%m-%d %H:%M") }}
                      </a>
                      <span aria-hidden="true" id="statuses_info" title="Start Date: {{ last_run.start_date.strftime("%Y-%m-%d %H:%M") }}" class="glyphicon glyphicon-info-sign"></span>
                    {% endif %}
                  {% endif %}
                </td>

-- 修改后:
<td class="text-nowrap latest_dag_run {{ dag.dag_id }}">
                  {% if dag %}
                    {% set last_run = dag.get_last_dagrun(include_externally_triggered=True) %}
                    {% if last_run and last_run.execution_date %}
                      <a href="{{ url_for('airflow.graph', dag_id=dag.dag_id, execution_date=last_run.execution_date) }}">
                        {{ dag.utc2local(last_run.execution_date).strftime("%Y-%m-%d %H:%M") }}
                      </a>
                      <span aria-hidden="true" id="statuses_info" title="Start Date: {{ dag.utc2local(last_run.start_date).strftime("%Y-%m-%d %H:%M") }}" class="glyphicon glyphicon-info-sign"></span>
                    {% endif %}
                  {% endif %}
                </td>

修改完成之后重启webserver


--若独立安装则至少需要的插件:
export SLUGIFY_USES_TEXT_UNIDECODE=yes
pip install apache-airflow
pip install apache-airflow[devel]
pip install apache-airflow[celery]
pip install apache-airflow[jdbc]
pip install apache-airflow[mysql]
pip install apache-airflow[password]
pip install apache-airflow[rabbitmq]
pip install apache-airflow[redis]

--文章参考:
http://airflow.apache.org/installation.html

猜你喜欢

转载自blog.csdn.net/vkingnew/article/details/86507597