Comparing nextstate to this method.
This tries to solve the TV problem because the hash function has the same representation power of the learner, so infinite interesting sequences should not be able to occur. However, it is not clear that this works….
@article{DBLP:journals/corr/abs-1810-12894,
author = {Yuri Burda and
Harrison Edwards and
Amos J. Storkey and
Oleg Klimov},
title = {Exploration by Random Network Distillation},
journal = {CoRR},
volume = {abs/1810.12894},
year = {2018},
url = {http://arxiv.org/abs/1810.12894},
archivePrefix = {arXiv},
eprint = {1810.12894},
timestamp = {Thu, 08 Nov 2018 10:57:46 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1810-12894.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}