aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2018-05-08 20:39:47 +1000
committerBen Skeggs <bskeggs@redhat.com>2018-05-18 15:01:26 +1000
commit5f6474a4e6ce3291abb1843b279a23a0bb050d37 (patch)
treea75e8a6f196cda8fbcaa0c14594420ef713dcdc7
parentdrm/nouveau/gr/gf100-: virtualise trap_mp (diff)
downloadlinux-dev-5f6474a4e6ce3291abb1843b279a23a0bb050d37.tar.xz
linux-dev-5f6474a4e6ce3291abb1843b279a23a0bb050d37.zip
drm/nouveau/gr/gf100-: port tile mapping calculations from NVGPU
There's also a couple of hardcoded tables for a couple of very specific configurations that NVGPU's algorithm didn't work for. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c118
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c1
22 files changed, 153 insertions, 77 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 4c25389fe80a..949e1216b8ba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1116,27 +1116,14 @@ gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
+ int i;
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
- /* and the second... */
+ /* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 4b7b4f8f75fd..bc4e86bbb9d4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -194,27 +194,14 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
+ int i;
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
- /* and the second... */
+ /* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f05d9d4c6e5c..519b109f40d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -1652,6 +1652,82 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
return ret;
}
+void
+gf100_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+ static const u8 primes[] = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
+ };
+ int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
+ u32 mul_factor, comm_denom;
+ u8 gpc_map[GPC_MAX];
+ bool sorted;
+
+ switch (gr->tpc_total) {
+ case 15: gr->screen_tile_row_offset = 0x06; break;
+ case 14: gr->screen_tile_row_offset = 0x05; break;
+ case 13: gr->screen_tile_row_offset = 0x02; break;
+ case 11: gr->screen_tile_row_offset = 0x07; break;
+ case 10: gr->screen_tile_row_offset = 0x06; break;
+ case 7:
+ case 5: gr->screen_tile_row_offset = 0x01; break;
+ case 3: gr->screen_tile_row_offset = 0x02; break;
+ case 2:
+ case 1: gr->screen_tile_row_offset = 0x01; break;
+ default: gr->screen_tile_row_offset = 0x03;
+ for (i = 0; i < ARRAY_SIZE(primes); i++) {
+ if (gr->tpc_total % primes[i]) {
+ gr->screen_tile_row_offset = primes[i];
+ break;
+ }
+ }
+ break;
+ }
+
+ /* Sort GPCs by TPC count, highest-to-lowest. */
+ for (i = 0; i < gr->gpc_nr; i++)
+ gpc_map[i] = i;
+ sorted = false;
+
+ while (!sorted) {
+ for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
+ if (gr->tpc_nr[gpc_map[i + 1]] >
+ gr->tpc_nr[gpc_map[i + 0]]) {
+ u8 swap = gpc_map[i];
+ gpc_map[i + 0] = gpc_map[i + 1];
+ gpc_map[i + 1] = swap;
+ sorted = false;
+ }
+ }
+ }
+
+ /* Determine tile->GPC mapping */
+ mul_factor = gr->gpc_nr * gr->tpc_max;
+ if (mul_factor & 1)
+ mul_factor = 2;
+ else
+ mul_factor = 1;
+
+ comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
+
+ for (i = 0; i < gr->gpc_nr; i++) {
+ init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
+ init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
+ run_err[i] = init_frac[i] + init_err[i];
+ }
+
+ for (i = 0; i < gr->tpc_total;) {
+ for (j = 0; j < gr->gpc_nr; j++) {
+ if ((run_err[j] * 2) >= comm_denom) {
+ gr->tile[i++] = gpc_map[j];
+ run_err[j] += init_frac[j] - comm_denom;
+ } else {
+ run_err[j] += init_frac[j];
+ }
+ }
+ }
+}
+
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
@@ -1691,45 +1767,8 @@ gf100_gr_oneinit(struct nvkm_gr *base)
}
}
- /*XXX: these need figuring out... though it might not even matter */
- switch (device->chipset) {
- case 0xc0:
- if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
- gr->screen_tile_row_offset = 0x07;
- } else
- if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
- gr->screen_tile_row_offset = 0x05;
- } else
- if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
- gr->screen_tile_row_offset = 0x06;
- }
- break;
- case 0xc3: /* 450, 4/0/0/0, 2 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xc4: /* 460, 3/4/0/0, 4 */
- gr->screen_tile_row_offset = 0x01;
- break;
- case 0xc1: /* 2/0/0/0, 1 */
- gr->screen_tile_row_offset = 0x01;
- break;
- case 0xc8: /* 4/4/3/4, 5 */
- gr->screen_tile_row_offset = 0x06;
- break;
- case 0xce: /* 4/4/0/0, 4 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xcf: /* 4/0/0/0, 3 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xd7:
- case 0xd9: /* 1/0/0/0, 1 */
- case 0xea: /* gk20a */
- case 0x12b: /* gm20b */
- gr->screen_tile_row_offset = 0x01;
- break;
- }
-
+ memset(gr->tile, 0xff, sizeof(gr->tile));
+ gr->func->oneinit_tiles(gr);
return 0;
}
@@ -2164,6 +2203,7 @@ gf100_gr_gpccs_ucode = {
static const struct gf100_gr_func
gf100_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index c25b93a0cb03..53a173e023b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -107,12 +107,13 @@ struct gf100_gr {
u8 ppc_tpc_nr[GPC_MAX][4];
u8 ppc_tpc_min;
+ u8 screen_tile_row_offset;
+ u8 tile[TPC_MAX];
+
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size;
u32 *data;
-
- u8 screen_tile_row_offset;
};
int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
@@ -123,6 +124,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);
struct gf100_gr_func {
void (*dtor)(struct gf100_gr *);
+ void (*oneinit_tiles)(struct gf100_gr *);
int (*init)(struct gf100_gr *);
void (*init_gpc_mmu)(struct gf100_gr *);
void (*init_r405a14)(struct gf100_gr *);
@@ -164,6 +166,7 @@ struct gf100_gr_func {
};
int gf100_gr_rops(struct gf100_gr *);
+void gf100_gr_oneinit_tiles(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
@@ -191,6 +194,7 @@ void gm107_gr_init_400054(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);
+void gm200_gr_oneinit_tiles(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
index df9cbed7ce50..8b49b8fe6d2c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
@@ -114,6 +114,7 @@ gf104_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf104_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
index 8ffa0fd1134f..6432aeba0a14 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
@@ -111,6 +111,7 @@ gf108_gr_init_r405a14(struct gf100_gr *gr)
static const struct gf100_gr_func
gf108_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_r405a14 = gf108_gr_init_r405a14,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index 0d4293e3e4ea..4e007c945233 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -86,6 +86,7 @@ gf110_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf110_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index e3c1dbbfbf34..2ddb728fb7ca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -150,6 +150,7 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
static const struct gf100_gr_func
gf117_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index 1ed70b93a10a..f0f10a4d8a26 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -177,6 +177,7 @@ gf119_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf119_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index 86819ab7f9a4..d57fb5ff1fe9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -448,6 +448,7 @@ gk104_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk104_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
index e30d94ff23d7..41997ebda719 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
@@ -350,6 +350,7 @@ gk110_gr_init_419eb4(struct gf100_gr *gr)
static const struct gf100_gr_func
gk110_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
index 253b98181ac4..b7a6479c6ec2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
@@ -102,6 +102,7 @@ gk110b_gr_pack_mmio[] = {
static const struct gf100_gr_func
gk110b_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
index 702e9094c1c8..5f1e71abe504 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
@@ -161,6 +161,7 @@ gk208_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk208_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 95f7d859e634..ab4e5380eba2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -282,6 +282,7 @@ gk20a_gr_init(struct gf100_gr *gr)
static const struct gf100_gr_func
gk20a_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index d67bf9465baa..98f74fe7007d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -391,6 +391,7 @@ gm107_gr_gpccs_ucode = {
static const struct gf100_gr_func
gm107_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 03b255e9b812..3e017fdd23ed 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -77,6 +77,46 @@ gm200_gr_init_rop_active_fbps(struct gf100_gr *gr)
nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
}
+static u8
+gm200_gr_tile_map_6_24[] = {
+ 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2,
+};
+
+static u8
+gm200_gr_tile_map_4_16[] = {
+ 0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0,
+};
+
+static u8
+gm200_gr_tile_map_2_8[] = {
+ 0, 1, 1, 0, 0, 1, 1, 0,
+};
+
+void
+gm200_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+ /*XXX: Not sure what this is about. The algorithm from NVGPU
+ * seems to work for all boards I tried from earlier (and
+ * later) GPUs except in these specific configurations.
+ *
+ * Let's just hardcode them for now.
+ */
+ if (gr->gpc_nr == 2 && gr->tpc_total == 8) {
+ memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total);
+ gr->screen_tile_row_offset = 1;
+ } else
+ if (gr->gpc_nr == 4 && gr->tpc_total == 16) {
+ memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total);
+ gr->screen_tile_row_offset = 4;
+ } else
+ if (gr->gpc_nr == 6 && gr->tpc_total == 24) {
+ memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total);
+ gr->screen_tile_row_offset = 5;
+ } else {
+ gf100_gr_oneinit_tiles(gr);
+ }
+}
+
int
gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
int index, struct nvkm_gr **pgr)
@@ -117,6 +157,7 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
static const struct gf100_gr_func
gm200_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index d2f9c7bf9f03..29d3b9445cf6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -64,6 +64,7 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gm20b_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
index e5f941f81e07..70d0aa2c1076 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -64,6 +64,7 @@ gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
static const struct gf100_gr_func
gp100_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
index 09e2665e4988..71bfe2d8c3f6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
@@ -42,6 +42,7 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gp102_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
index 844fc9d63e5c..234c970bb0dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
@@ -26,6 +26,7 @@
static const struct gf100_gr_func
gp104_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
index 674385da3d43..6c5724017c71 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
@@ -28,6 +28,7 @@
static const struct gf100_gr_func
gp107_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
index 6103186a3724..aaaa2844ec20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
@@ -27,6 +27,7 @@
static const struct gf100_gr_func
gp10b_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,