一、循环渲染
在前面的场景里,箱子和地面都被渲染了多次:
for (int i = 0; i <= 7; i++)
{
model = glm::translate(glm::mat4(1.0f), positions[i]);
model = glm::scale(model, glm::vec3(0.01f));
glUniformMatrix4fv(modelLoc, 1, GL_FALSE, glm::value_ptr(model));
wood.Draw(shaderObj);
}
for (int i = -2; i <= 1; i++)
{
for(int j = -2; j <= 1; j++)
{
model = glm::translate(glm::mat4(1.0f), glm::vec3(i * 3.52f, -0.05f, j * 3.72f));
model = glm::scale(model, glm::vec3(0.1f));
model = glm::rotate(model, glm::radians(90.0f), glm::vec3(-1.0f, 0.0f, 0.0f));
glUniformMatrix4fv(modelLoc, 1, GL_FALSE, glm::value_ptr(model));
ground.Draw(shaderObj);
}
}
不止样例,在很多场景中,都会不少物体需要重复绘制,常规的有花草、墙壁等,游戏《无人深空》就有很多小行星带的场景,里面的石块动辄几万个,这个时候像上面循环Draw的方式就不太可取了
这些数据都有一个特点:顶点数据完全一样,只是进行了不同的世界空间变换,这个时候倘若循环使用glDrawArrays或glDrawElements方法,那么OpenGL在每一次绘制时都会重复的做一些没有必要的工作,例如告诉GPU从哪个缓冲读取数据,以及在哪里寻找顶点属性等等,这样只要次数足够多,哪怕不考虑渲染速度仍然会导致延迟和卡顿
因此,正确的做法是将数据一次发送给GPU,然后告诉OpenGL使用一个绘制函数将这些数据绘制为多个物体
二、实例化
实例化就是一种只调用一次渲染函数却能绘制出很多物体的方法,它节省渲染物体时从CPU到GPU的通信时间,因为只需做一次。想要使用实例化方法生成物体也很简单,只需要用glDrawArraysInstanced和glDrawElementsInstanced方法代替前面的glDrawArrays或glDrawElements方法就可以,它们相对于glDrawArrays或glDrawElements就多了一个参数,代表着渲染的次数,其它参数一致
简单修改一行代码:
- glDrawArraysInstanced():对应glDrawArrays()方法,最后面多一个int类型参数表数量
- glDrawElementsInstanced():对应glDrawElements方法,最后面多一个int类型参数表数量
glBindVertexArray(this->VAO);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, UBO, 0, sizeof(Material));
glDrawElementsInstanced(GL_TRIANGLES, this->indices.size(), GL_UNSIGNED_INT, 0, num); //EBO绘制
当然了,如果是绘制的模型,那么需要在对应的mesh类中修改,并且同时修改自己封装的Draw()方法(支持多传一个参数并设置其默认值为1就可以了)
改用了glDrawElementsInstanced()方法后,openGL就会直接生成对应数量的物体
接下来就是处理这些物体的位置了,之前为了让每个物体处于不同的位置,是在每次draw之前动态更改模型矩阵的值,那么现在肯定只需要一次draw了,该如何设置每个物体的位置呢?
内建变量gl_InstanceID:
在通过实例化绘制时,gl_InstanceID的初值是0,它在每个实例渲染时都会增加1,如果用glDrawElementsInstanced()绘制同一个物体100次,那么顶点着色器中gl_InstanceID的值最后就是99
有了gl_InstanceID就好办了,只需要在着色器中定义一个模型矩阵数组,并将gl_InstanceID当作下标取数据就好了:
#version 420 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 normal;
layout (location = 2) in vec2 texture;
out VS_OUT
{
vec2 texIn;
vec3 normalIn;
vec3 fragPosIn;
}vs_out;
uniform mat4 model[128]; //模型矩阵
layout (std140, binding = 0) uniform Matrices
{
mat4 view; //观察矩阵
mat4 projection; //投影矩阵
};
void main()
{
gl_Position = projection * view * model[gl_InstanceID] * vec4(position, 1.0);
vs_out.texIn = texture;
vs_out.fragPosIn = vec3(model[gl_InstanceID] * vec4(position, 1.0f));
vs_out.normalIn = mat3(transpose(inverse(model[gl_InstanceID]))) * normal;
}
这样子的话,就需要在主程序中预先传入所有的模型矩阵至着色器:
modelLoc = glGetUniformLocation(shaderObj.Program, "model");
for (GLuint i = 0; i <= 7; i++)
{
string index = to_string(i);
model = glm::translate(glm::mat4(1.0f), positions[i]);
model = glm::scale(model, glm::vec3(0.01f));
GLint location = glGetUniformLocation(shaderObj.Program, ("model[" + index + "]").c_str());
glUniformMatrix4fv(location, 1, GL_FALSE, glm::value_ptr(model));
}
wood.Draw(shaderObj, 8);
GLint groundIndex = 0;
for (int i = -2; i <= 1; i++)
{
for(int j = -2; j <= 1; j++)
{
string index = to_string(groundIndex++);
model = glm::translate(glm::mat4(1.0f), glm::vec3(i * 3.52f, -0.05f, j * 3.72f));
model = glm::scale(model, glm::vec3(0.1f));
model = glm::rotate(model, glm::radians(90.0f), glm::vec3(-1.0f, 0.0f, 0.0f));
GLint location = glGetUniformLocation(shaderObj.Program, ("model[" + index + "]").c_str());
glUniformMatrix4fv(location, 1, GL_FALSE, glm::value_ptr(model));
}
}
ground.Draw(shaderObj, groundIndex + 1);
如果没问题的话,上面的代码就可以生成和之前一样的效果,只不过这次不再是暴力循环绘制
三、实例化数组(Instanced Array)
前面是在着色器中定义了一个数组,但是着色器能接受的uniform数据是有限的,如果有成千上万个物体又会遇到瓶颈,可以换个方式传递数据
和位置和纹理一样,将模型矩阵作为一个顶点属性传入着色器也是个不错的选择:
#version 420 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 normal;
layout (location = 2) in vec2 texture;
layout (location = 3) in mat4 model;
out VS_OUT
{
vec2 texIn;
vec3 normalIn;
vec3 fragPosIn;
}vs_out;
layout (std140, binding = 0) uniform Matrices
{
mat4 view; //观察矩阵
mat4 projection; //投影矩阵
};
void main()
{
gl_Position = projection * view * model * vec4(position, 1.0);
vs_out.texIn = texture;
vs_out.fragPosIn = vec3(model * vec4(position, 1.0f));
vs_out.normalIn = mat3(transpose(inverse(model))) * normal;
}
然后和其它顶点属性一样处理就好了,不过注意一点:顶点属性最大数据量和vec4相等,因此对于mat4类型的属性,它会被隐性拆成4个vec4,顶点位置依次顺延,所以在着色器中,必须为mat4属性保留4个location,并需要以下面这种方法这样配置实例数组:
- glVertexAttribDivisor(GLuint index, GLuint divisor):每隔divisor个实例,就会往顶点着色器中传入buffer中的一个新的属性值,其中index为位置值
void UpdateModelMatrix(glm::mat4* model, GLint size)
{
GLuint buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER, size * sizeof(glm::mat4), &model[0], GL_STATIC_DRAW);
glBindVertexArray(this->VAO);
glEnableVertexAttribArray(3);
glVertexAttribPointer(3, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (GLvoid*)0);
glEnableVertexAttribArray(4);
glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (GLvoid*)(sizeof(glm::vec4)));
glEnableVertexAttribArray(5);
glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (GLvoid*)(2 * sizeof(glm::vec4)));
glEnableVertexAttribArray(6);
glVertexAttribPointer(6, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (GLvoid*)(3 * sizeof(glm::vec4)));
glVertexAttribDivisor(3, 1);
glVertexAttribDivisor(4, 1);
glVertexAttribDivisor(5, 1);
glVertexAttribDivisor(6, 1);
glBindVertexArray(0);
}
这个方法添加为Mesh类的公有方法,当然Model类也要给个入口:
public:
Model(const GLchar* path, const GLchar* texPath = "")
{
this->loadModel(path, texPath);
}
void Draw(Shader shader, GLint num = 1)
{
for (GLuint i = 0; i < this->meshes.size(); i++)
this->meshes[i].Draw(shader, num);
}
void UpdateModelMatrix(glm::mat4* model, GLint size)
{
if (size <= 0)
return;
for (GLuint i = 0; i < this->meshes.size(); i++)
this->meshes[i].UpdateModelMatrix(model, size);
}
好了,这个时候只需要传入mat4矩阵就可以,主代码也精简了:
glm::mat4* modelMatrices;
modelMatrices = new glm::mat4[1000];
glm::mat4 model = glm::mat4(1.0f);
for (GLuint i = 0; i <= 7; i++)
{
model = glm::translate(glm::mat4(1.0f), positions[i]);
model = glm::scale(model, glm::vec3(0.01f));
modelMatrices[i] = model;
}
wood.UpdateModelMatrix(modelMatrices, 8);
GLint groundIndex = 0;
for (int i = -4; i <= 4; i++)
{
for (int j = -2; j <= 4; j++)
{
model = glm::translate(glm::mat4(1.0f), glm::vec3(i * 3.52f, -0.05f, j * 3.72f));
model = glm::scale(model, glm::vec3(0.1f));
model = glm::rotate(model, glm::radians(90.0f), glm::vec3(-1.0f, 0.0f, 0.0f));
modelMatrices[groundIndex++] = model;
}
}
ground.UpdateModelMatrix(modelMatrices, groundIndex);
delete[] modelMatrices;
//……
while (!glfwWindowShouldClose(window))
{
//……
wood.Draw(shaderObj, 8);
ground.Draw(shaderObj, groundIndex + 1);
//……
}
可以测试了,这个时候会发现对于之前的写法,生成相同数量的物体就会顺畅很多很多:
也可以实例化更多的地面模型而不担心卡顿
当然了,如果生成的物体过多又或者模型过于复杂,那么当然还是会卡的,这个时候就需要:①用更优秀的渲染算法/优化手段;②换一张更好的显卡,这些后面有机会再说吧