pyspark job code complete

大数据平台支持pyspark作业开发，为了方便python 代码编写，提供代码自动补全、语法检测、代码格式化功能，编辑器使用ACE，使用tornado 把这个三个功能封装成rest接口，给编辑器使用
#!/usr/bin/env python2
#coding=utf-8

import tornado.ioloop
import tornado.web
from tornado.escape import json_encode
from pyspark.sql import SparkSession
import jedi
from tornado.options import define, options
from yapf.yapflib.yapf_api import FormatCode
from jedi import settings
import linecache
import collections
import pyflakes.api

settings.add_bracket_after_function = True
settings.case_insensitive_completion = False

Issue = collections.namedtuple('Issue', [
  'line_number',
  'column_number',
  'message',
  'line',
])


class Reporter(pyflakes.reporter.Reporter):
    _ignored_issues = ["undefined name 'info'", "undefined name 'error'", "undefined name 'warn'"]

    def __init__(self):
        self.issues = []

    def unexpectedError(self, filename, msg):
        self._register_issue(filename, None, None, msg, None)

    def syntaxError(self, filename, msg, lineno, offset, text):
        self._register_issue(filename, lineno, offset, msg, text)

    def flake(self, msg):
        self._register_issue(msg.filename, msg.lineno, msg.col, msg.message % msg.message_args, None)

    def _is_ignored(self, issue):
        return str(issue) in Reporter._ignored_issues

    def _register_issue(self, path, line_number, column_number, issue, line):
        if not self._is_ignored(issue):
            if path and line_number and not line:
                line = linecache.getline(path, line_number)

            self.issues.append(Issue(line_number, column_number, issue, line))

def _check_import(source, row):
    for index, item in enumerate(source.split("\n"), 1):
        item = item.strip()
        if index == row and (item.startswith("import ") or item.startswith("from ")):
            return True

    return False;


class StatusHandler(tornado.web.RequestHandler):
    def get(self):
        self.write("ok")


class SparkSessionHandler(tornado.web.RequestHandler):
    def post(self):
        source = self.get_argument("source", default=None)
        script = jedi.Interpreter(source, [{"sparkSession": SparkSession}])
        completions = script.completions()

        items = [];
        for comp in completions:
            if hasattr(comp._name.parent_context, '_context'):
                context = comp._name.parent_context._context.name.string_name
            else:
                if hasattr(comp._name.parent_context, 'instance'):
                    context = comp._name.parent_context.instance.name.string_name
                else:
                    context = comp._name.parent_context.name.string_name

            items.append({'context': context, 'name': comp.name})

        callback = self.get_argument('callback', default=None)
        if callback :
            jsonp = "{jsfunc}({json});".format(jsfunc=callback,
                                               json=json_encode(items))
            self.set_header('Content-Type', 'application/javascript')
            self.write(jsonp)
        else:
            self.set_header('Content-Type', 'application/json')
            self.write(json_encode(items))


class ScriptHandler(tornado.web.RequestHandler):
    def post(self):
        source = self.get_argument("source", default=None)
        row = self.get_argument("row", default=None)
        column = self.get_argument("column", default=None)

        row = int(row)
        column = int(column)
        script = jedi.Script(source, row, column, '')
        completions = script.completions()
        definitions = script.goto_definitions()

        #字符串中不需要提示
if len(definitions) > 0 and definitions[0].name == 'str':
            return self.write("[]")

        isImport = _check_import(source, row);

        items = [];
        for comp in completions:
            if isImport and comp.type == 'module':
                items.append({'context': "module", 'name': comp.name})
            else:
                if comp.type == 'module':
                    continue

                if hasattr(comp._name.parent_context, '_context'):
                    context = comp._name.parent_context._context.name.string_name
                else:
                    if hasattr(comp._name.parent_context, 'instance'):
                        context = comp._name.parent_context.instance.name.string_name
                    else:
                        if comp._name.api_type == 'param':
                            context = 'param'
elif comp._name.api_type == 'function':
                            context = 'function'
else:
                            context = comp.full_name[0:-(len(comp.complete) + 1)]

                if context == '__builtin__':
                    context = 'builtin'

name = comp.name;
                if context == 'function1':
                    params = []
                    for param in comp.params:
                        params.append(param.name)
                    name = "{name}({params})".format(name=name, params=",".join(params))

                items.append({'context': context, 'name': name})

        callback = self.get_argument('callback', default=None)
        if callback:
            jsonp = "{jsfunc}({json});".format(jsfunc=callback,
                                               json=json_encode(items))
            self.set_header('Content-Type', 'application/javascript;charset=utf-8')
            self.write(jsonp)
        else:
            self.set_header('Content-Type', 'application/json;charset=utf-8')
            self.write(json_encode(items))


class CodeFormatHandler(tornado.web.RequestHandler):
    def post(self):
        source = self.get_argument("source", default=None)
        self.set_header('Content-Type', 'text/plain;charset=utf-8')

        tupleCode = FormatCode(source, style_config='pep8')
        newSource = tupleCode[0].encode("utf-8")
        self.write(newSource)


# https://programtalk.com/vs2/python/12155/spiderfoot/ext/stem/util/test_tools.py/
class CheckSyntaxHandler(tornado.web.RequestHandler):
    def post(self):
        codeString = self.get_argument("source", default=None)

        reporter = Reporter()
        pyflakes.api.check(codeString, '<string>', reporter)

        self.set_header('Content-Type', 'application/json;charset=utf-8')
        self.write(json_encode(reporter.issues))


def _make_app():
    return tornado.web.Application([
        (r"/ok.htm", StatusHandler),
        (r"/sparkSession", SparkSessionHandler),
        (r"/script", ScriptHandler),
        (r"/codeFormat", CodeFormatHandler),
        (r"/checkSyntax", CheckSyntaxHandler),
    ])


define("port", default=9110, help="server port")
define("num_processes", default=0, help="bind cpu, production env value :0")


# http://beginman.cn/web/2015/04/06/jsonp-tornado/
# for i in `ps waux | grep "code-autocomplete" | awk '{ print $2 }'`; do kill -15 ${i}; done
if __name__ == "__main__":
    tornado.options.options.logging = "warn"
tornado.options.parse_command_line()

    app = _make_app()

    http_server = tornado.httpserver.HTTPServer(app)
    http_server.bind(options.port)
    http_server.start(options.num_processes)

    print("server started, port {}".format(options.port))
    tornado.ioloop.IOLoop.current().start()
pyspark job code complete

猜你喜欢