《PG源码学习--4.查询规划》

查询经过语法解析,语义分析,规则重写,到了查询计划环节。这里学习查询计划的代码。

一.入口

postgres\src\backend\optimizer\plan\planer.c

PlannedStmt *
planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
   PlannedStmt *result;

   if (planner_hook)
      result = (*planner_hook) (parse, cursorOptions, boundParams);
   else
      result = standard_planner(parse, cursorOptions, boundParams);
   return result;
}

二.数据结构

postgres\include\server\nodes\plannodes.h

typedef struct PlannedStmt
{
   NodeTag       type;

   CmdType       commandType;   /* select|insert|update|delete|utility */

   uint32    queryId;      /* query identifier (copied from Query) */

   bool      hasReturning;  /* is it insert|update|delete RETURNING? */

   bool      hasModifyingCTE;   /* has insert|update|delete in WITH? */

   bool      canSetTag;    /* do I set the command result tag? */

   bool      transientPlan; /* redo plan when TransactionXmin changes? */

   bool      dependsOnRole; /* is plan specific to current role? */

   bool      parallelModeNeeded; /* parallel mode required to execute? */

   struct Plan *planTree;    /* tree of Plan nodes */

   List      *rtable;          /* list of RangeTblEntry nodes */

   /* rtable indexes of target relations for INSERT/UPDATE/DELETE */
   List      *resultRelations;   /* integer list of RT indexes, or NIL */

   /*
    * rtable indexes of non-leaf target relations for UPDATE/DELETE on all
    * the partitioned tables mentioned in the query.
    */
   List      *nonleafResultRelations;

   /*
    * rtable indexes of root target relations for UPDATE/DELETE; this list
    * maintains a subset of the RT indexes in nonleafResultRelations,
    * indicating the roots of the respective partition hierarchies.
    */
   List      *rootResultRelations;

   List      *subplans;     /* Plan trees for SubPlan expressions; note
                         * that some could be NULL */

   Bitmapset  *rewindPlanIDs; /* indices of subplans that require REWIND */

   List      *rowMarks;     /* a list of PlanRowMark's */

   List      *relationOids;  /* OIDs of relations the plan depends on */

   List      *invalItems;       /* other dependencies, as PlanInvalItems */

   int          nParamExec;       /* number of PARAM_EXEC Params used */

   Node      *utilityStmt;   /* non-null if this is utility stmt */

   /* statement location in source string (copied from Query) */
   int          stmt_location; /* start location, or -1 if unknown */
   int          stmt_len;     /* length in bytes; 0 means "rest of string" */
} PlannedStmt;

三.standard_planner

1.流程

在这里插入图片描述

2.源代码

代码明细

PlannedStmt *
standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
   PlannedStmt *result;
   PlannerGlobal *glob;
   double    tuple_fraction;
   PlannerInfo *root;
   RelOptInfo *final_rel;
   Path      *best_path;
   Plan      *top_plan;
   ListCell   *lp,
            *lr;

   /*
    * Set up global state for this planner invocation.  This data is needed
    * across all levels of sub-Query that might exist in the given command,
    * so we keep it in a separate struct that's linked to by each per-Query
    * PlannerInfo.
    */
   glob = makeNode(PlannerGlobal);

   glob->boundParams = boundParams;
   glob->subplans = NIL;
   glob->subroots = NIL;
   glob->rewindPlanIDs = NULL;
   glob->finalrtable = NIL;
   glob->finalrowmarks = NIL;
   glob->resultRelations = NIL;
   glob->nonleafResultRelations = NIL;
   glob->rootResultRelations = NIL;
   glob->relationOids = NIL;
   glob->invalItems = NIL;
   glob->nParamExec = 0;
   glob->lastPHId = 0;
   glob->lastRowMarkId = 0;
   glob->lastPlanNodeId = 0;
   glob->transientPlan = false;
   glob->dependsOnRole = false;

   /*
    * Assess whether it's feasible to use parallel mode for this query. We
    * can't do this in a standalone backend, or if the command will try to
    * modify any data, or if this is a cursor operation, or if GUCs are set
    * to values that don't permit parallelism, or if parallel-unsafe
    * functions are present in the query tree.
    *
    * For now, we don't try to use parallel mode if we're running inside a
    * parallel worker.  We might eventually be able to relax this
    * restriction, but for now it seems best not to have parallel workers
    * trying to create their own parallel workers.
    *
    * We can't use parallelism in serializable mode because the predicate
    * locking code is not parallel-aware.  It's not catastrophic if someone
    * tries to run a parallel plan in serializable mode; it just won't get
    * any workers and will run serially.  But it seems like a good heuristic
    * to assume that the same serialization level will be in effect at plan
    * time and execution time, so don't generate a parallel plan if we're in
    * serializable mode.
    */
   if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
      IsUnderPostmaster &&
      dynamic_shared_memory_type != DSM_IMPL_NONE &&
      parse->commandType == CMD_SELECT &&
      !parse->hasModifyingCTE &&
      max_parallel_workers_per_gather > 0 &&
      !IsParallelWorker() &&
      !IsolationIsSerializable())
   {
      /* all the cheap tests pass, so scan the query tree */
      glob->maxParallelHazard = max_parallel_hazard(parse);
      glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
   }
   else
   {
      /* skip the query tree scan, just assume it's unsafe */
      glob->maxParallelHazard = PROPARALLEL_UNSAFE;
      glob->parallelModeOK = false;
   }

   /*
    * glob->parallelModeNeeded should tell us whether it's necessary to
    * impose the parallel mode restrictions, but we don't actually want to
    * impose them unless we choose a parallel plan, so it is normally set
    * only if a parallel plan is chosen (see create_gather_plan).  That way,
    * people who mislabel their functions but don't use parallelism anyway
    * aren't harmed.  But when force_parallel_mode is set, we enable the
    * restrictions whenever possible for testing purposes.
    */
   glob->parallelModeNeeded = glob->parallelModeOK &&
      (force_parallel_mode != FORCE_PARALLEL_OFF);

   /* Determine what fraction of the plan is likely to be scanned */
   if (cursorOptions & CURSOR_OPT_FAST_PLAN)
   {
      /*
       * We have no real idea how many tuples the user will ultimately FETCH
       * from a cursor, but it is often the case that he doesn't want 'em
       * all, or would prefer a fast-start plan anyway so that he can
       * process some of the tuples sooner.  Use a GUC parameter to decide
       * what fraction to optimize for.
       */
      tuple_fraction = cursor_tuple_fraction;

      /*
       * We document cursor_tuple_fraction as simply being a fraction, which
       * means the edge cases 0 and 1 have to be treated specially here.  We
       * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
       */
      if (tuple_fraction >= 1.0)
         tuple_fraction = 0.0;
      else if (tuple_fraction <= 0.0)
         tuple_fraction = 1e-10;
   }
   else
   {
      /* Default assumption is we need all the tuples */
      tuple_fraction = 0.0;
   }

   /* primary planning entry point (may recurse for subqueries) */
   root = subquery_planner(glob, parse, NULL,
                     false, tuple_fraction);

   /* Select best Path and turn it into a Plan */
   final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
   best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);

   top_plan = create_plan(root, best_path);

   /*
    * If creating a plan for a scrollable cursor, make sure it can run
    * backwards on demand.  Add a Material node at the top at need.
    */
   if (cursorOptions & CURSOR_OPT_SCROLL)
   {
      if (!ExecSupportsBackwardScan(top_plan))
         top_plan = materialize_finished_plan(top_plan);
   }

   /*
    * Optionally add a Gather node for testing purposes, provided this is
    * actually a safe thing to do.
    */
   if (force_parallel_mode != FORCE_PARALLEL_OFF && top_plan->parallel_safe)
   {
      Gather    *gather = makeNode(Gather);

      gather->plan.targetlist = top_plan->targetlist;
      gather->plan.qual = NIL;
      gather->plan.lefttree = top_plan;
      gather->plan.righttree = NULL;
      gather->num_workers = 1;
      gather->single_copy = true;
      gather->invisible = (force_parallel_mode == FORCE_PARALLEL_REGRESS);

      /*
       * Since this Gather has no parallel-aware descendants to signal to,
       * we don't need a rescan Param.
       */
      gather->rescan_param = -1;

      /*
       * Ideally we'd use cost_gather here, but setting up dummy path data
       * to satisfy it doesn't seem much cleaner than knowing what it does.
       */
      gather->plan.startup_cost = top_plan->startup_cost +
         parallel_setup_cost;
      gather->plan.total_cost = top_plan->total_cost +
         parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
      gather->plan.plan_rows = top_plan->plan_rows;
      gather->plan.plan_width = top_plan->plan_width;
      gather->plan.parallel_aware = false;
      gather->plan.parallel_safe = false;

      /* use parallel mode for parallel plans. */
      root->glob->parallelModeNeeded = true;

      top_plan = &gather->plan;
   }

   /*
    * If any Params were generated, run through the plan tree and compute
    * each plan node's extParam/allParam sets.  Ideally we'd merge this into
    * set_plan_references' tree traversal, but for now it has to be separate
    * because we need to visit subplans before not after main plan.
    */
   if (glob->nParamExec > 0)
   {
      Assert(list_length(glob->subplans) == list_length(glob->subroots));
      forboth(lp, glob->subplans, lr, glob->subroots)
      {
         Plan      *subplan = (Plan *) lfirst(lp);
         PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);

         SS_finalize_plan(subroot, subplan);
      }
      SS_finalize_plan(root, top_plan);
   }

   /* final cleanup of the plan */
   Assert(glob->finalrtable == NIL);
   Assert(glob->finalrowmarks == NIL);
   Assert(glob->resultRelations == NIL);
   Assert(glob->nonleafResultRelations == NIL);
   Assert(glob->rootResultRelations == NIL);
   top_plan = set_plan_references(root, top_plan);
   /* ... and the subplans (both regular subplans and initplans) */
   Assert(list_length(glob->subplans) == list_length(glob->subroots));
   forboth(lp, glob->subplans, lr, glob->subroots)
   {
      Plan      *subplan = (Plan *) lfirst(lp);
      PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);

      lfirst(lp) = set_plan_references(subroot, subplan);
   }

   /* build the PlannedStmt result */
   result = makeNode(PlannedStmt);

   result->commandType = parse->commandType;
   result->queryId = parse->queryId;
   result->hasReturning = (parse->returningList != NIL);
   result->hasModifyingCTE = parse->hasModifyingCTE;
   result->canSetTag = parse->canSetTag;
   result->transientPlan = glob->transientPlan;
   result->dependsOnRole = glob->dependsOnRole;
   result->parallelModeNeeded = glob->parallelModeNeeded;
   result->planTree = top_plan;
   result->rtable = glob->finalrtable;
   result->resultRelations = glob->resultRelations;
   result->nonleafResultRelations = glob->nonleafResultRelations;
   result->rootResultRelations = glob->rootResultRelations;
   result->subplans = glob->subplans;
   result->rewindPlanIDs = glob->rewindPlanIDs;
   result->rowMarks = glob->finalrowmarks;
   result->relationOids = glob->relationOids;
   result->invalItems = glob->invalItems;
   result->nParamExec = glob->nParamExec;
   /* utilityStmt should be null, but we might as well copy it */
   result->utilityStmt = parse->utilityStmt;
   result->stmt_location = parse->stmt_location;
   result->stmt_len = parse->stmt_len;

   return result;
}

四.其他

日拱一卒,加油!

发布了6 篇原创文章 · 获赞 1 · 访问量 137

猜你喜欢

转载自blog.csdn.net/weixin_39939108/article/details/104545330