awesome!!!
This commit is contained in:
		
							
								
								
									
										951
									
								
								third-party/stb/tests/caveview/cave_render.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										951
									
								
								third-party/stb/tests/caveview/cave_render.c
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,951 @@ | ||||
| // This file renders vertex buffers, converts raw meshes | ||||
| // to GL meshes, and manages threads that do the raw-mesh | ||||
| // building (found in cave_mesher.c) | ||||
|  | ||||
|  | ||||
| #include "stb_voxel_render.h" | ||||
|  | ||||
| #define STB_GLEXT_DECLARE "glext_list.h" | ||||
| #include "stb_gl.h" | ||||
| #include "stb_image.h" | ||||
| #include "stb_glprog.h" | ||||
|  | ||||
| #include "caveview.h" | ||||
| #include "cave_parse.h" | ||||
| #include "stb.h" | ||||
| #include "sdl.h" | ||||
| #include "sdl_thread.h" | ||||
| #include <math.h> | ||||
| #include <assert.h> | ||||
|  | ||||
| //#define STBVOX_CONFIG_TEX1_EDGE_CLAMP | ||||
|  | ||||
|  | ||||
| // currently no dynamic way to set mesh cache size or view distance | ||||
| //#define SHORTVIEW | ||||
|  | ||||
|  | ||||
| stbvox_mesh_maker g_mesh_maker; | ||||
|  | ||||
| GLuint main_prog; | ||||
| GLint uniform_locations[64]; | ||||
|  | ||||
| //#define MAX_QUADS_PER_DRAW        (65536 / 4) // assuming 16-bit indices, 4 verts per quad | ||||
| //#define FIXED_INDEX_BUFFER_SIZE   (MAX_QUADS_PER_DRAW * 6 * 2)  // 16*1024 * 12 == ~192KB | ||||
|  | ||||
| // while uploading texture data, this holds our each texture | ||||
| #define TEX_SIZE  64 | ||||
| uint32 texture[TEX_SIZE][TEX_SIZE]; | ||||
|  | ||||
| GLuint voxel_tex[2]; | ||||
|  | ||||
| // chunk state | ||||
| enum | ||||
| { | ||||
|    STATE_invalid, | ||||
|    STATE_needed, | ||||
|    STATE_requested, | ||||
|    STATE_abandoned, | ||||
|    STATE_valid, | ||||
| }; | ||||
|  | ||||
| // mesh is 32x32x255 ... this is hardcoded in that | ||||
| // a mesh covers 2x2 minecraft chunks, no #defines for it | ||||
| typedef struct | ||||
| { | ||||
|    int state; | ||||
|    int chunk_x, chunk_y; | ||||
|    int num_quads; | ||||
|    float priority; | ||||
|    int vbuf_size, fbuf_size; | ||||
|  | ||||
|    float transform[3][3]; | ||||
|    float bounds[2][3]; | ||||
|  | ||||
|    GLuint vbuf;// vbuf_tex; | ||||
|    GLuint fbuf, fbuf_tex; | ||||
|  | ||||
| } chunk_mesh; | ||||
|  | ||||
| void scale_texture(unsigned char *src, int x, int y, int w, int h) | ||||
| { | ||||
|    int i,j,k; | ||||
|    assert(w == 256 && h == 256); | ||||
|    for (j=0; j < TEX_SIZE; ++j) { | ||||
|       for (i=0; i < TEX_SIZE; ++i) { | ||||
|          uint32 val=0; | ||||
|          for (k=0; k < 4; ++k) { | ||||
|             val >>= 8; | ||||
|             val += src[ 4*(x+(i>>2)) + 4*w*(y+(j>>2)) + k]<<24; | ||||
|          } | ||||
|          texture[j][i] = val; | ||||
|       } | ||||
|    } | ||||
| } | ||||
|  | ||||
| void build_base_texture(int n) | ||||
| { | ||||
|    int x,y; | ||||
|    uint32 color = stb_rand() | 0x808080; | ||||
|    for (y=0; y<TEX_SIZE; ++y) | ||||
|       for (x=0; x<TEX_SIZE; ++x) { | ||||
|          texture[y][x] = (color + (stb_rand()&0x1f1f1f))|0xff000000; | ||||
|       } | ||||
| } | ||||
|  | ||||
| void build_overlay_texture(int n) | ||||
| { | ||||
|    int x,y; | ||||
|    uint32 color = stb_rand(); | ||||
|    if (color & 16) | ||||
|       color = 0xff000000; | ||||
|    else | ||||
|       color = 0xffffffff; | ||||
|    for (y=0; y<TEX_SIZE; ++y) | ||||
|       for (x=0; x<TEX_SIZE; ++x) { | ||||
|          texture[y][x] = 0; | ||||
|       } | ||||
|  | ||||
|    for (y=0; y < TEX_SIZE/8; ++y) { | ||||
|       for (x=0; x < TEX_SIZE; ++x) { | ||||
|          texture[y][x] = color; | ||||
|          texture[TEX_SIZE-1-y][x] = color; | ||||
|          texture[x][y] = color; | ||||
|          texture[x][TEX_SIZE-1-y] = color; | ||||
|       } | ||||
|    } | ||||
| } | ||||
|  | ||||
| // view radius of about 1024 = 2048 columns / 32 columns-per-mesh = 2^11 / 2^5 = 64x64 | ||||
| // so we need bigger than 64x64 so we can precache, which means we have to be | ||||
| // non-power-of-two, or we have to be pretty huge | ||||
| #define CACHED_MESH_NUM_X   128 | ||||
| #define CACHED_MESH_NUM_Y   128 | ||||
|  | ||||
|  | ||||
| chunk_mesh cached_chunk_mesh[CACHED_MESH_NUM_Y][CACHED_MESH_NUM_X]; | ||||
|  | ||||
| void free_chunk(int slot_x, int slot_y) | ||||
| { | ||||
|    chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x]; | ||||
|    if (cm->state == STATE_valid) { | ||||
|       glDeleteTextures(1, &cm->fbuf_tex); | ||||
|       glDeleteBuffersARB(1, &cm->vbuf); | ||||
|       glDeleteBuffersARB(1, &cm->fbuf); | ||||
|       cached_chunk_mesh[slot_y][slot_x].state = STATE_invalid; | ||||
|    } | ||||
| } | ||||
|  | ||||
| void upload_mesh(chunk_mesh *cm, uint8 *build_buffer, uint8 *face_buffer) | ||||
| { | ||||
|    glGenBuffersARB(1, &cm->vbuf); | ||||
|    glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf); | ||||
|    glBufferDataARB(GL_ARRAY_BUFFER_ARB, cm->num_quads*4*sizeof(uint32), build_buffer, GL_STATIC_DRAW_ARB); | ||||
|    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | ||||
|  | ||||
|    glGenBuffersARB(1, &cm->fbuf); | ||||
|    glBindBufferARB(GL_TEXTURE_BUFFER_ARB, cm->fbuf); | ||||
|    glBufferDataARB(GL_TEXTURE_BUFFER_ARB, cm->num_quads*sizeof(uint32), face_buffer , GL_STATIC_DRAW_ARB); | ||||
|    glBindBufferARB(GL_TEXTURE_BUFFER_ARB, 0); | ||||
|  | ||||
|    glGenTextures(1, &cm->fbuf_tex); | ||||
|    glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex); | ||||
|    glTexBufferARB(GL_TEXTURE_BUFFER_ARB, GL_RGBA8UI, cm->fbuf); | ||||
|    glBindTexture(GL_TEXTURE_BUFFER_ARB, 0); | ||||
| } | ||||
|  | ||||
| static void upload_mesh_data(raw_mesh *rm) | ||||
| { | ||||
|    int cx = rm->cx; | ||||
|    int cy = rm->cy; | ||||
|    int slot_x = (cx >> 1) & (CACHED_MESH_NUM_X-1); | ||||
|    int slot_y = (cy >> 1) & (CACHED_MESH_NUM_Y-1); | ||||
|    chunk_mesh *cm; | ||||
|  | ||||
|    free_chunk(slot_x, slot_y); | ||||
|  | ||||
|    cm = &cached_chunk_mesh[slot_y][slot_x]; | ||||
|    cm->num_quads = rm->num_quads; | ||||
|  | ||||
|    upload_mesh(cm, rm->build_buffer, rm->face_buffer); | ||||
|    cm->vbuf_size = rm->num_quads*4*sizeof(uint32); | ||||
|    cm->fbuf_size = rm->num_quads*sizeof(uint32); | ||||
|    cm->priority = 100000; | ||||
|    cm->chunk_x = cx; | ||||
|    cm->chunk_y = cy; | ||||
|  | ||||
|    memcpy(cm->bounds, rm->bounds, sizeof(cm->bounds)); | ||||
|    memcpy(cm->transform, rm->transform, sizeof(cm->transform)); | ||||
|  | ||||
|    // write barrier here | ||||
|    cm->state = STATE_valid; | ||||
| } | ||||
|  | ||||
| GLint uniform_loc[16]; | ||||
| float table3[128][3]; | ||||
| float table4[64][4]; | ||||
| GLint tablei[2]; | ||||
|  | ||||
| float step=0; | ||||
|  | ||||
| #ifdef SHORTVIEW | ||||
| int view_dist_in_chunks = 50; | ||||
| #else | ||||
| int view_dist_in_chunks = 80; | ||||
| #endif | ||||
|  | ||||
| void setup_uniforms(float pos[3]) | ||||
| { | ||||
|    int i,j; | ||||
|    step += 1.0f/60.0f; | ||||
|    for (i=0; i < STBVOX_UNIFORM_count; ++i) { | ||||
|       stbvox_uniform_info raw, *ui=&raw; | ||||
|       stbvox_get_uniform_info(&raw, i); | ||||
|       uniform_loc[i] = -1; | ||||
|  | ||||
|       if (i == STBVOX_UNIFORM_texscale || i == STBVOX_UNIFORM_texgen || i == STBVOX_UNIFORM_color_table) | ||||
|          continue; | ||||
|  | ||||
|       if (ui) { | ||||
|          void *data = ui->default_value; | ||||
|          uniform_loc[i] = stbgl_find_uniform(main_prog, ui->name); | ||||
|         switch (i) { | ||||
|             case STBVOX_UNIFORM_face_data: | ||||
|                tablei[0] = 2; | ||||
|                data = tablei; | ||||
|                break; | ||||
|  | ||||
|             case STBVOX_UNIFORM_tex_array: | ||||
|                glActiveTextureARB(GL_TEXTURE0_ARB); | ||||
|                glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]); | ||||
|                glActiveTextureARB(GL_TEXTURE1_ARB); | ||||
|                glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]); | ||||
|                glActiveTextureARB(GL_TEXTURE0_ARB); | ||||
|                tablei[0] = 0; | ||||
|                tablei[1] = 1; | ||||
|                data = tablei; | ||||
|                break; | ||||
|  | ||||
|             case STBVOX_UNIFORM_color_table: | ||||
|                data = ui->default_value; | ||||
|                ((float *)data)[63*4+3] = 2.0f; // emissive | ||||
|                break; | ||||
|  | ||||
|             case STBVOX_UNIFORM_camera_pos: | ||||
|                data = table3[0]; | ||||
|                table3[0][0] = pos[0]; | ||||
|                table3[0][1] = pos[1]; | ||||
|                table3[0][2] = pos[2]; | ||||
|                table3[0][3] = stb_max(0,(float)sin(step*2)*0.125f); | ||||
|                break; | ||||
|  | ||||
|             case STBVOX_UNIFORM_ambient: { | ||||
|                float bright = 1.0; | ||||
|                //float bright = 0.75; | ||||
|                float amb[3][3]; | ||||
|  | ||||
|                // ambient direction is sky-colored upwards | ||||
|                // "ambient" lighting is from above | ||||
|                table4[0][0] =  0.3f; | ||||
|                table4[0][1] = -0.5f; | ||||
|                table4[0][2] =  0.9f; | ||||
|  | ||||
|                amb[1][0] = 0.3f; amb[1][1] = 0.3f; amb[1][2] = 0.3f; // dark-grey | ||||
|                amb[2][0] = 1.0; amb[2][1] = 1.0; amb[2][2] = 1.0; // white | ||||
|  | ||||
|                // convert so (table[1]*dot+table[2]) gives | ||||
|                // above interpolation | ||||
|                //     lerp((dot+1)/2, amb[1], amb[2]) | ||||
|                //     amb[1] + (amb[2] - amb[1]) * (dot+1)/2 | ||||
|                //     amb[1] + (amb[2] - amb[1]) * dot/2 + (amb[2]-amb[1])/2 | ||||
|  | ||||
|                for (j=0; j < 3; ++j) { | ||||
|                   table4[1][j] = (amb[2][j] - amb[1][j])/2 * bright; | ||||
|                   table4[2][j] = (amb[1][j] + amb[2][j])/2 * bright; | ||||
|                } | ||||
|  | ||||
|                // fog color | ||||
|                table4[3][0] = 0.6f, table4[3][1] = 0.7f, table4[3][2] = 0.9f; | ||||
|                table4[3][3] = 1.0f / (view_dist_in_chunks * 16); | ||||
|                table4[3][3] *= table4[3][3]; | ||||
|  | ||||
|                data = table4; | ||||
|                break; | ||||
|             } | ||||
|          } | ||||
|  | ||||
|          switch (ui->type) { | ||||
|             case STBVOX_UNIFORM_TYPE_sampler: stbglUniform1iv(uniform_loc[i], ui->array_length, data); break; | ||||
|             case STBVOX_UNIFORM_TYPE_vec2:    stbglUniform2fv(uniform_loc[i], ui->array_length, data); break; | ||||
|             case STBVOX_UNIFORM_TYPE_vec3:    stbglUniform3fv(uniform_loc[i], ui->array_length, data); break; | ||||
|             case STBVOX_UNIFORM_TYPE_vec4:    stbglUniform4fv(uniform_loc[i], ui->array_length, data); break; | ||||
|          } | ||||
|       } | ||||
|    } | ||||
| } | ||||
|  | ||||
| GLuint unitex[64], unibuf[64]; | ||||
| void make_texture_buffer_for_uniform(int uniform, int slot) | ||||
| { | ||||
|    GLenum type; | ||||
|    stbvox_uniform_info raw, *ui=&raw; | ||||
|    GLint uloc; | ||||
|     | ||||
|    stbvox_get_uniform_info(ui, uniform); | ||||
|    uloc = stbgl_find_uniform(main_prog, ui->name); | ||||
|  | ||||
|    if (uniform == STBVOX_UNIFORM_color_table) | ||||
|       ((float *)ui->default_value)[63*4+3] = 2.0f; // emissive | ||||
|  | ||||
|    glGenBuffersARB(1, &unibuf[uniform]); | ||||
|    glBindBufferARB(GL_ARRAY_BUFFER_ARB, unibuf[uniform]); | ||||
|    glBufferDataARB(GL_ARRAY_BUFFER_ARB, ui->array_length * ui->bytes_per_element, ui->default_value, GL_STATIC_DRAW_ARB); | ||||
|    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | ||||
|  | ||||
|    glGenTextures(1, &unitex[uniform]); | ||||
|    glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]); | ||||
|    switch (ui->type) { | ||||
|       case STBVOX_UNIFORM_TYPE_vec2: type = GL_RG32F; break; | ||||
|       case STBVOX_UNIFORM_TYPE_vec3: type = GL_RGB32F; break; | ||||
|       case STBVOX_UNIFORM_TYPE_vec4: type = GL_RGBA32F; break; | ||||
|       default: assert(0); | ||||
|    } | ||||
|    glTexBufferARB(GL_TEXTURE_BUFFER_ARB, type, unibuf[uniform]); | ||||
|    glBindTexture(GL_TEXTURE_BUFFER_ARB, 0); | ||||
|  | ||||
|    glActiveTextureARB(GL_TEXTURE0 + slot); | ||||
|    glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]); | ||||
|    glActiveTextureARB(GL_TEXTURE0); | ||||
|  | ||||
|    stbglUseProgram(main_prog); | ||||
|    stbglUniform1i(uloc, slot); | ||||
| } | ||||
|  | ||||
| #define MAX_MESH_WORKERS  8 | ||||
| #define MAX_CHUNK_LOAD_WORKERS 2 | ||||
|  | ||||
| int num_mesh_workers; | ||||
| int num_chunk_load_workers; | ||||
|  | ||||
| typedef struct | ||||
| { | ||||
|    int state; | ||||
|    int request_cx; | ||||
|    int request_cy; | ||||
|    int padding[13]; | ||||
|  | ||||
|    SDL_sem * request_received; | ||||
|  | ||||
|    SDL_sem * chunk_server_done_processing; | ||||
|    int chunk_action; | ||||
|    int chunk_request_x; | ||||
|    int chunk_request_y; | ||||
|    fast_chunk *chunks[4][4]; | ||||
|  | ||||
|    int padding2[16]; | ||||
|    raw_mesh rm; | ||||
|    int padding3[16]; | ||||
|  | ||||
|    uint8 *build_buffer; | ||||
|    uint8 *face_buffer ; | ||||
| } mesh_worker; | ||||
|  | ||||
| enum | ||||
| { | ||||
|    WSTATE_idle, | ||||
|    WSTATE_requested, | ||||
|    WSTATE_running, | ||||
|    WSTATE_mesh_ready, | ||||
| }; | ||||
|  | ||||
| mesh_worker mesh_data[MAX_MESH_WORKERS]; | ||||
| int num_meshes_started; // stats | ||||
|  | ||||
| int request_chunk(int chunk_x, int chunk_y); | ||||
| void update_meshes_from_render_thread(void); | ||||
|  | ||||
| unsigned char tex2_data[64][4]; | ||||
|  | ||||
| void init_tex2_gradient(void) | ||||
| { | ||||
|    int i; | ||||
|    for (i=0; i < 16; ++i) { | ||||
|       tex2_data[i+ 0][0] = 64 + 12*i; | ||||
|       tex2_data[i+ 0][1] = 32; | ||||
|       tex2_data[i+ 0][2] = 64; | ||||
|  | ||||
|       tex2_data[i+16][0] = 255; | ||||
|       tex2_data[i+16][1] = 32 + 8*i; | ||||
|       tex2_data[i+16][2] = 64; | ||||
|  | ||||
|       tex2_data[i+32][0] = 255; | ||||
|       tex2_data[i+32][1] = 160; | ||||
|       tex2_data[i+32][2] = 64 + 12*i; | ||||
|  | ||||
|       tex2_data[i+48][0] = 255; | ||||
|       tex2_data[i+48][1] = 160 + 6*i; | ||||
|       tex2_data[i+48][2] = 255; | ||||
|    } | ||||
| } | ||||
|  | ||||
| void set_tex2_alpha(float fa) | ||||
| { | ||||
|    int i; | ||||
|    int a = (int) stb_lerp(fa, 0, 255); | ||||
|    if (a < 0) a = 0; else if (a > 255) a = 255; | ||||
|    glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]); | ||||
|    for (i=0; i < 64; ++i) { | ||||
|       tex2_data[i][3] = a; | ||||
|       glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]); | ||||
|    } | ||||
| } | ||||
|  | ||||
| void render_init(void) | ||||
| { | ||||
|    int i; | ||||
|    char *binds[] = { "attr_vertex", "attr_face", NULL }; | ||||
|    char *vertex; | ||||
|    char *fragment; | ||||
|    int w=0,h=0; | ||||
|  | ||||
|    unsigned char *texdata = stbi_load("terrain.png", &w, &h, NULL, 4); | ||||
|  | ||||
|    stbvox_init_mesh_maker(&g_mesh_maker); | ||||
|    for (i=0; i < num_mesh_workers; ++i) { | ||||
|       stbvox_init_mesh_maker(&mesh_data[i].rm.mm); | ||||
|    } | ||||
|  | ||||
|    vertex = stbvox_get_vertex_shader(); | ||||
|    fragment = stbvox_get_fragment_shader(); | ||||
|  | ||||
|    { | ||||
|       char error_buffer[1024]; | ||||
|       char *main_vertex[] = { vertex, NULL }; | ||||
|       char *main_fragment[] = { fragment, NULL }; | ||||
|       main_prog = stbgl_create_program(main_vertex, main_fragment, binds, error_buffer, sizeof(error_buffer)); | ||||
|       if (main_prog == 0) { | ||||
|          ods("Compile error for main shader: %s\n", error_buffer); | ||||
|          assert(0); | ||||
|          exit(1); | ||||
|       } | ||||
|    } | ||||
|    //init_index_buffer(); | ||||
|  | ||||
|    make_texture_buffer_for_uniform(STBVOX_UNIFORM_texscale     , 3); | ||||
|    make_texture_buffer_for_uniform(STBVOX_UNIFORM_texgen       , 4); | ||||
|    make_texture_buffer_for_uniform(STBVOX_UNIFORM_color_table  , 5); | ||||
|  | ||||
|    glGenTextures(2, voxel_tex); | ||||
|  | ||||
|    glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]); | ||||
|    glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA, | ||||
|                       TEX_SIZE,TEX_SIZE,256, | ||||
|                       0,GL_RGBA,GL_UNSIGNED_BYTE,NULL); | ||||
|    for (i=0; i < 256; ++i) { | ||||
|       if (texdata) | ||||
|          scale_texture(texdata, (i&15)*w/16, (h/16)*(i>>4), w,h); | ||||
|       else | ||||
|          build_base_texture(i); | ||||
|       glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]); | ||||
|    } | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16); | ||||
|    #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||||
|    #endif | ||||
|  | ||||
|    glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT); | ||||
|  | ||||
|    glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]); | ||||
|    glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA, | ||||
|                       1,1,64, | ||||
|                       0,GL_RGBA,GL_UNSIGNED_BYTE,NULL); | ||||
|    init_tex2_gradient(); | ||||
|    set_tex2_alpha(0.0); | ||||
|    #if 0 | ||||
|    for (i=0; i < 128; ++i) { | ||||
|       //build_overlay_texture(i); | ||||
|       glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]); | ||||
|    } | ||||
|    #endif | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); | ||||
|    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||||
|    glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT); | ||||
| } | ||||
|  | ||||
| void world_init(void) | ||||
| { | ||||
|    int a,b,x,y; | ||||
|  | ||||
|    Uint64 start_time, end_time; | ||||
|    #ifdef NDEBUG | ||||
|    int range = 32; | ||||
|    #else | ||||
|    int range = 12; | ||||
|    #endif | ||||
|  | ||||
|    start_time = SDL_GetPerformanceCounter(); | ||||
|  | ||||
|    // iterate in 8x8 clusters of qchunks at a time to get better converted-chunk-cache reuse | ||||
|    // than a purely row-by-row ordering is (single-threaded this is a bigger win than | ||||
|    // any of the above optimizations were, since it halves zlib/mc-conversion costs) | ||||
|    for (x=-range; x <= range; x += 16) | ||||
|       for (y=-range; y <= range; y += 16) | ||||
|          for (b=y; b < y+16 && b <= range; b += 2) | ||||
|             for (a=x; a < x+16 && a <= range; a += 2) | ||||
|                while (!request_chunk(a, b)) { // if request fails, all threads are busy | ||||
|                   update_meshes_from_render_thread(); | ||||
|                   SDL_Delay(1); | ||||
|                } | ||||
|  | ||||
|    // wait until all the workers are done, | ||||
|    // (this is only needed if we want to time | ||||
|    // when the build finishes, or when we want to reset the | ||||
|    // cache size; otherwise we could just go ahead and | ||||
|    // start rendering whatever we've got) | ||||
|    for(;;) { | ||||
|       int i; | ||||
|       update_meshes_from_render_thread(); | ||||
|       for (i=0; i < num_mesh_workers; ++i) | ||||
|          if (mesh_data[i].state != WSTATE_idle) | ||||
|             break; | ||||
|       if (i == num_mesh_workers) | ||||
|          break; | ||||
|       SDL_Delay(3); | ||||
|    } | ||||
|  | ||||
|    end_time = SDL_GetPerformanceCounter(); | ||||
|    ods("Build time: %7.2fs\n", (end_time - start_time) / (float) SDL_GetPerformanceFrequency()); | ||||
|  | ||||
|    // don't waste lots of storage on chunk caches once it's finished starting-up; | ||||
|    // this was only needed to be this large because we worked in large blocks | ||||
|    // to maximize sharing | ||||
|    reset_cache_size(32); | ||||
| } | ||||
|  | ||||
| extern SDL_mutex * chunk_cache_mutex; | ||||
|  | ||||
| int mesh_worker_handler(void *data) | ||||
| { | ||||
|    mesh_worker *mw = data; | ||||
|    mw->face_buffer = malloc(FACE_BUFFER_SIZE); | ||||
|    mw->build_buffer = malloc(BUILD_BUFFER_SIZE); | ||||
|  | ||||
|    // this loop only works because the compiler can't | ||||
|    // tell that the SDL_calls don't access mw->state; | ||||
|    // really we should barrier that stuff | ||||
|    for(;;) { | ||||
|       int i,j; | ||||
|       int cx,cy; | ||||
|  | ||||
|       // wait for a chunk request | ||||
|       SDL_SemWait(mw->request_received); | ||||
|  | ||||
|       // analyze the chunk request | ||||
|       assert(mw->state == WSTATE_requested); | ||||
|       cx = mw->request_cx; | ||||
|       cy = mw->request_cy; | ||||
|  | ||||
|       // this is inaccurate as it can block while another thread has the cache locked | ||||
|       mw->state = WSTATE_running; | ||||
|  | ||||
|       // get the chunks we need (this takes a lock and caches them) | ||||
|       for (j=0; j < 4; ++j) | ||||
|          for (i=0; i < 4; ++i) | ||||
|             mw->chunks[j][i] = get_converted_fastchunk(cx-1 + i, cy-1 + j); | ||||
|  | ||||
|       // build the mesh based on the chunks | ||||
|       mw->rm.build_buffer = mw->build_buffer; | ||||
|       mw->rm.face_buffer = mw->face_buffer; | ||||
|       build_chunk(cx, cy, mw->chunks, &mw->rm); | ||||
|       mw->state = WSTATE_mesh_ready; | ||||
|       // don't need to notify of this, because it gets polled | ||||
|  | ||||
|       // when done, free the chunks | ||||
|  | ||||
|       // for efficiency we just take the mutex once around the whole thing, | ||||
|       // though this spreads the mutex logic over two files | ||||
|       SDL_LockMutex(chunk_cache_mutex); | ||||
|       for (j=0; j < 4; ++j) | ||||
|          for (i=0; i < 4; ++i) { | ||||
|             deref_fastchunk(mw->chunks[j][i]); | ||||
|             mw->chunks[j][i] = NULL; | ||||
|          } | ||||
|       SDL_UnlockMutex(chunk_cache_mutex); | ||||
|    } | ||||
|    return 0; | ||||
| } | ||||
|  | ||||
| int request_chunk(int chunk_x, int chunk_y) | ||||
| { | ||||
|    int i; | ||||
|    for (i=0; i < num_mesh_workers; ++i) { | ||||
|       mesh_worker *mw = &mesh_data[i]; | ||||
|       if (mw->state == WSTATE_idle) { | ||||
|          mw->request_cx = chunk_x; | ||||
|          mw->request_cy = chunk_y; | ||||
|          mw->state = WSTATE_requested; | ||||
|          SDL_SemPost(mw->request_received); | ||||
|          ++num_meshes_started; | ||||
|          return 1; | ||||
|       } | ||||
|    } | ||||
|    return 0; | ||||
| } | ||||
|  | ||||
| void prepare_threads(void) | ||||
| { | ||||
|    int i; | ||||
|    int num_proc = SDL_GetCPUCount(); | ||||
|  | ||||
|    if (num_proc > 6) | ||||
|       num_mesh_workers = num_proc/2; | ||||
|    else if (num_proc > 4) | ||||
|       num_mesh_workers = 4; | ||||
|    else  | ||||
|       num_mesh_workers = num_proc-1; | ||||
|  | ||||
| // @TODO | ||||
| //   Thread usage is probably pretty terrible; need to make a | ||||
| //   separate queue of needed chunks, instead of just generating | ||||
| //   one request per thread per frame, and a separate queue of | ||||
| //   results. (E.g. If it takes 1.5 frames to build mesh, thread | ||||
| //   is idle for 0.5 frames.) To fake this for now, I've just | ||||
| //   doubled the number of threads to let those serve as a 'queue', | ||||
| //   but that's dumb. | ||||
|  | ||||
|    num_mesh_workers *= 2; // try to get better thread usage | ||||
|  | ||||
|    if (num_mesh_workers > MAX_MESH_WORKERS) | ||||
|       num_mesh_workers = MAX_MESH_WORKERS; | ||||
|  | ||||
|    for (i=0; i < num_mesh_workers; ++i) { | ||||
|       mesh_worker *data = &mesh_data[i]; | ||||
|       data->request_received = SDL_CreateSemaphore(0); | ||||
|       data->chunk_server_done_processing = SDL_CreateSemaphore(0); | ||||
|       SDL_CreateThread(mesh_worker_handler, "mesh worker", data); | ||||
|    } | ||||
| } | ||||
|  | ||||
|  | ||||
| // "better" buffer uploading | ||||
| #if 0 | ||||
|    if (glBufferStorage) { | ||||
|       glDeleteBuffersARB(1, &vb->vbuf); | ||||
|       glGenBuffersARB(1, &vb->vbuf); | ||||
|  | ||||
|       glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf); | ||||
|       glBufferStorage(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, 0); | ||||
|       glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | ||||
|    } else { | ||||
|       glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf); | ||||
|       glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, GL_STATIC_DRAW_ARB); | ||||
|       glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | ||||
|    } | ||||
| #endif | ||||
|  | ||||
|  | ||||
| typedef struct | ||||
| { | ||||
|    float x,y,z,w; | ||||
| } plane; | ||||
|  | ||||
| static plane frustum[6]; | ||||
|  | ||||
| static void matd_mul(double out[4][4], double src1[4][4], double src2[4][4]) | ||||
| { | ||||
|    int i,j,k; | ||||
|    for (j=0; j < 4; ++j) { | ||||
|       for (i=0; i < 4; ++i) { | ||||
|          double t=0; | ||||
|          for (k=0; k < 4; ++k) | ||||
|             t += src1[k][i] * src2[j][k]; | ||||
|          out[i][j] = t; | ||||
|       } | ||||
|    } | ||||
| } | ||||
|  | ||||
| // https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/ | ||||
| static void compute_frustum(void) | ||||
| { | ||||
|    int i; | ||||
|    GLdouble mv[4][4],proj[4][4], mvproj[4][4]; | ||||
|    glGetDoublev(GL_MODELVIEW_MATRIX , mv[0]); | ||||
|    glGetDoublev(GL_PROJECTION_MATRIX, proj[0]); | ||||
|    matd_mul(mvproj, proj, mv); | ||||
|    for (i=0; i < 4; ++i) { | ||||
|       (&frustum[0].x)[i] = (float) (mvproj[3][i] + mvproj[0][i]); | ||||
|       (&frustum[1].x)[i] = (float) (mvproj[3][i] - mvproj[0][i]); | ||||
|       (&frustum[2].x)[i] = (float) (mvproj[3][i] + mvproj[1][i]); | ||||
|       (&frustum[3].x)[i] = (float) (mvproj[3][i] - mvproj[1][i]); | ||||
|       (&frustum[4].x)[i] = (float) (mvproj[3][i] + mvproj[2][i]); | ||||
|       (&frustum[5].x)[i] = (float) (mvproj[3][i] - mvproj[2][i]); | ||||
|    }    | ||||
| } | ||||
|  | ||||
| static int test_plane(plane *p, float x0, float y0, float z0, float x1, float y1, float z1) | ||||
| { | ||||
|    // return false if the box is entirely behind the plane | ||||
|    float d=0; | ||||
|    assert(x0 <= x1 && y0 <= y1 && z0 <= z1); | ||||
|    if (p->x > 0) d += x1*p->x; else d += x0*p->x; | ||||
|    if (p->y > 0) d += y1*p->y; else d += y0*p->y; | ||||
|    if (p->z > 0) d += z1*p->z; else d += z0*p->z; | ||||
|    return d + p->w >= 0; | ||||
| } | ||||
|  | ||||
| static int is_box_in_frustum(float *bmin, float *bmax) | ||||
| { | ||||
|    int i; | ||||
|    for (i=0; i < 6; ++i) | ||||
|       if (!test_plane(&frustum[i], bmin[0], bmin[1], bmin[2], bmax[0], bmax[1], bmax[2])) | ||||
|          return 0; | ||||
|    return 1; | ||||
| } | ||||
|  | ||||
| float compute_priority(int cx, int cy, float x, float y) | ||||
| { | ||||
|    float distx, disty, dist2; | ||||
|    distx = (cx*16+8) - x; | ||||
|    disty = (cy*16+8) - y; | ||||
|    dist2 = distx*distx + disty*disty; | ||||
|    return view_dist_in_chunks*view_dist_in_chunks * 16 * 16 - dist2; | ||||
| } | ||||
|  | ||||
| int chunk_locations, chunks_considered, chunks_in_frustum; | ||||
| int quads_considered, quads_rendered; | ||||
| int chunk_storage_rendered, chunk_storage_considered, chunk_storage_total; | ||||
| int update_frustum = 1; | ||||
|  | ||||
| #ifdef SHORTVIEW | ||||
| int max_chunk_storage = 450 << 20; | ||||
| int min_chunk_storage = 350 << 20; | ||||
| #else | ||||
| int max_chunk_storage = 900 << 20; | ||||
| int min_chunk_storage = 800 << 20; | ||||
| #endif | ||||
|  | ||||
| float min_priority = -500; // this really wants to be in unit space, not squared space | ||||
|  | ||||
| int num_meshes_uploaded; | ||||
|  | ||||
| void update_meshes_from_render_thread(void) | ||||
| { | ||||
|    int i; | ||||
|    for (i=0; i < num_mesh_workers; ++i) { | ||||
|       mesh_worker *mw = &mesh_data[i]; | ||||
|       if (mw->state == WSTATE_mesh_ready) { | ||||
|          upload_mesh_data(&mw->rm); | ||||
|          ++num_meshes_uploaded; | ||||
|          mw->state = WSTATE_idle; | ||||
|       } | ||||
|    } | ||||
| } | ||||
|  | ||||
| extern float tex2_alpha; | ||||
| extern int global_hack; | ||||
| int num_threads_active; | ||||
| float chunk_server_activity; | ||||
|  | ||||
| void render_caves(float campos[3]) | ||||
| { | ||||
|    float x = campos[0], y = campos[1]; | ||||
|    int qchunk_x, qchunk_y; | ||||
|    int cam_x, cam_y; | ||||
|    int i,j, rad; | ||||
|  | ||||
|    compute_frustum(); | ||||
|  | ||||
|    chunk_locations = chunks_considered = chunks_in_frustum = 0; | ||||
|    quads_considered = quads_rendered = 0; | ||||
|    chunk_storage_total = chunk_storage_considered = chunk_storage_rendered = 0; | ||||
|  | ||||
|    cam_x = (int) floor(x+0.5); | ||||
|    cam_y = (int) floor(y+0.5); | ||||
|  | ||||
|    qchunk_x = (((int) floor(x)+16) >> 5) << 1; | ||||
|    qchunk_y = (((int) floor(y)+16) >> 5) << 1; | ||||
|  | ||||
|    glEnable(GL_ALPHA_TEST); | ||||
|    glAlphaFunc(GL_GREATER, 0.5); | ||||
|  | ||||
|    stbglUseProgram(main_prog); | ||||
|    setup_uniforms(campos); // set uniforms to default values inefficiently | ||||
|    glActiveTextureARB(GL_TEXTURE2_ARB); | ||||
|    stbglEnableVertexAttribArray(0); | ||||
|  | ||||
|    { | ||||
|       float lighting[2][3] = { { campos[0],campos[1],campos[2] }, { 0.75,0.75,0.65f } }; | ||||
|       float bright = 8; | ||||
|       lighting[1][0] *= bright; | ||||
|       lighting[1][1] *= bright; | ||||
|       lighting[1][2] *= bright; | ||||
|       stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]); | ||||
|    } | ||||
|  | ||||
|    if (global_hack) | ||||
|       set_tex2_alpha(tex2_alpha); | ||||
|  | ||||
|    num_meshes_uploaded = 0; | ||||
|    update_meshes_from_render_thread(); | ||||
|  | ||||
|    // traverse all in-range chunks and analyze them | ||||
|    for (j=-view_dist_in_chunks; j <= view_dist_in_chunks; j += 2) { | ||||
|       for (i=-view_dist_in_chunks; i <= view_dist_in_chunks; i += 2) { | ||||
|          float priority; | ||||
|          int cx = qchunk_x + i; | ||||
|          int cy = qchunk_y + j; | ||||
|  | ||||
|          priority = compute_priority(cx, cy, x, y); | ||||
|          if (priority >= min_priority) { | ||||
|             int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1); | ||||
|             int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1); | ||||
|             chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x]; | ||||
|             ++chunk_locations; | ||||
|             if (cm->state == STATE_valid && priority >= 0) { | ||||
|                // check if chunk pos actually matches | ||||
|                if (cm->chunk_x != cx || cm->chunk_y != cy) { | ||||
|                   // we have a stale chunk we need to recreate | ||||
|                   free_chunk(slot_x, slot_y); // it probably will have already gotten freed, but just in case | ||||
|                } | ||||
|             } | ||||
|             if (cm->state == STATE_invalid) { | ||||
|                cm->chunk_x = cx; | ||||
|                cm->chunk_y = cy; | ||||
|                cm->state = STATE_needed; | ||||
|             } | ||||
|             cm->priority = priority; | ||||
|          } | ||||
|       } | ||||
|    } | ||||
|  | ||||
|    // draw front-to-back | ||||
|    for (rad = 0; rad <= view_dist_in_chunks; rad += 2) { | ||||
|       for (j=-rad; j <= rad; j += 2) { | ||||
|          // if j is +- rad, then iterate i through all values | ||||
|          // if j isn't +-rad, then i should be only -rad & rad | ||||
|          int step = 2; | ||||
|          if (abs(j) != rad) | ||||
|             step = 2*rad; | ||||
|          for (i=-rad; i <= rad; i += step) { | ||||
|             int cx = qchunk_x + i; | ||||
|             int cy = qchunk_y + j; | ||||
|             int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1); | ||||
|             int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1); | ||||
|             chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x]; | ||||
|             if (cm->state == STATE_valid && cm->priority >= 0) { | ||||
|                ++chunks_considered; | ||||
|                quads_considered += cm->num_quads; | ||||
|                if (is_box_in_frustum(cm->bounds[0], cm->bounds[1])) { | ||||
|                   ++chunks_in_frustum; | ||||
|  | ||||
|                   // @TODO if in range | ||||
|                   stbglUniform3fv(uniform_loc[STBVOX_UNIFORM_transform], 3, cm->transform[0]); | ||||
|                   glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf); | ||||
|                   glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, 4, (void*) 0); | ||||
|                   glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex); | ||||
|                   glDrawArrays(GL_QUADS, 0, cm->num_quads*4); | ||||
|                   quads_rendered += cm->num_quads; | ||||
|  | ||||
|                   chunk_storage_rendered += cm->vbuf_size + cm->fbuf_size; | ||||
|                } | ||||
|                chunk_storage_considered += cm->vbuf_size + cm->fbuf_size; | ||||
|             } | ||||
|          } | ||||
|       } | ||||
|    } | ||||
|  | ||||
|    stbglDisableVertexAttribArray(0); | ||||
|    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | ||||
|    glActiveTextureARB(GL_TEXTURE0_ARB); | ||||
|  | ||||
|    stbglUseProgram(0); | ||||
|    num_meshes_started = 0; | ||||
|  | ||||
|    { | ||||
|       #define MAX_QUEUE  8 | ||||
|       float highest_priority[MAX_QUEUE]; | ||||
|       int highest_i[MAX_QUEUE], highest_j[MAX_QUEUE]; | ||||
|       float lowest_priority = view_dist_in_chunks * view_dist_in_chunks * 16 * 16.0f; | ||||
|       int lowest_i = -1, lowest_j = -1; | ||||
|  | ||||
|       for (i=0; i < MAX_QUEUE; ++i) { | ||||
|          highest_priority[i] = min_priority; | ||||
|          highest_i[i] = -1; | ||||
|          highest_j[i] = -1; | ||||
|       } | ||||
|  | ||||
|       for (j=0; j < CACHED_MESH_NUM_Y; ++j) { | ||||
|          for (i=0; i < CACHED_MESH_NUM_X; ++i) { | ||||
|             chunk_mesh *cm = &cached_chunk_mesh[j][i]; | ||||
|             if (cm->state == STATE_valid) { | ||||
|                cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y); | ||||
|                chunk_storage_total += cm->vbuf_size + cm->fbuf_size; | ||||
|                if (cm->priority < lowest_priority) { | ||||
|                   lowest_priority = cm->priority; | ||||
|                   lowest_i = i; | ||||
|                   lowest_j = j; | ||||
|                } | ||||
|             } | ||||
|             if (cm->state == STATE_needed) { | ||||
|                cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y); | ||||
|                if (cm->priority < min_priority) | ||||
|                   cm->state = STATE_invalid; | ||||
|                else if (cm->priority > highest_priority[0]) { | ||||
|                   int k; | ||||
|                   highest_priority[0] = cm->priority; | ||||
|                   highest_i[0] = i; | ||||
|                   highest_j[0] = j; | ||||
|                   // bubble this up to right place | ||||
|                   for (k=0; k < MAX_QUEUE-1; ++k) { | ||||
|                      if (highest_priority[k] > highest_priority[k+1]) { | ||||
|                         highest_priority[k] = highest_priority[k+1]; | ||||
|                         highest_priority[k+1] = cm->priority; | ||||
|                         highest_i[k] = highest_i[k+1]; | ||||
|                         highest_i[k+1] = i; | ||||
|                         highest_j[k] = highest_j[k+1]; | ||||
|                         highest_j[k+1] = j; | ||||
|                      } else { | ||||
|                         break; | ||||
|                      } | ||||
|                   } | ||||
|                } | ||||
|             } | ||||
|          } | ||||
|       } | ||||
|  | ||||
|  | ||||
|       // I couldn't find any straightforward logic that avoids | ||||
|       // the hysteresis problem of continually creating & freeing | ||||
|       // a block on the margin, so I just don't free a block until | ||||
|       // it's out of range, but this doesn't actually correctly | ||||
|       // handle when the cache is too small for the given range | ||||
|       if (chunk_storage_total >= min_chunk_storage && lowest_i >= 0) { | ||||
|          if (cached_chunk_mesh[lowest_j][lowest_i].priority < -1200) // -1000? 0? | ||||
|             free_chunk(lowest_i, lowest_j); | ||||
|       } | ||||
|  | ||||
|       if (chunk_storage_total < max_chunk_storage && highest_i[0] >= 0) { | ||||
|          for (j=MAX_QUEUE-1; j >= 0; --j) { | ||||
|             if (highest_j[0] >= 0) { | ||||
|                chunk_mesh *cm = &cached_chunk_mesh[highest_j[j]][highest_i[j]]; | ||||
|                if (request_chunk(cm->chunk_x, cm->chunk_y)) { | ||||
|                   cm->state = STATE_requested; | ||||
|                } else { | ||||
|                   // if we couldn't queue this one, skip the remainder | ||||
|                   break; | ||||
|                } | ||||
|             } | ||||
|          } | ||||
|       } | ||||
|    } | ||||
|  | ||||
|    update_meshes_from_render_thread(); | ||||
|  | ||||
|    num_threads_active = 0; | ||||
|    for (i=0; i < num_mesh_workers; ++i) { | ||||
|       num_threads_active += (mesh_data[i].state == WSTATE_running); | ||||
|    } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user