Mitzubishi published this paper, incidentally.
Not only does it visualize deep-vs-shallow networks, it also provides some Preliminary solutions to the problems of rough reward surfaces.
@article{DBLP:journals/corr/abs-2102-07920,
author = {Kei Ota and
Devesh K. Jha and
Asako Kanezaki},
title = {Training Larger Networks for Deep Reinforcement Learning},
journal = {CoRR},
volume = {abs/2102.07920},
year = {2021},
url = {https://arxiv.org/abs/2102.07920},
eprinttype = {arXiv},
eprint = {2102.07920},
timestamp = {Thu, 18 Feb 2021 15:26:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2102-07920.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}