Hindsight-Experience-Replay.html

<!DOCTYPE html>


<html class="theme-next mist use-motion" lang="zh-Hans">
<head><meta name="generator" content="Hexo 3.8.0">
  <meta name="google-site-verification" content="zu-9nWphPjrzXV8v514mkHknIz4dNfHlib56-KNAu44">
  <meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="theme-color" content="#222">


  <script src="/lib/pace/pace.min.js?v=1.0.2"></script>
  <link href="/lib/pace/pace-theme-flash.min.css?v=1.0.2" rel="stylesheet">


<meta http-equiv="Cache-Control" content="no-transform">
<meta http-equiv="Cache-Control" content="no-siteapp">


<script>
(function(i,s,o,g,r,a,m){i["DaoVoiceObject"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;a.charset="utf-8";m.parentNode.insertBefore(a,m)})(window,document,"script",('https:' == document.location.protocol ? 'https:' : 'http:') + "//widget.daovoice.io/widget/356f1943.js","daovoice")
daovoice('init', {
  app_id: "356f1943"
});
daovoice('update');
</script>


  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css">


<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css">

<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css">


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16.png?v=5.1.4">


  <link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">


  <meta name="keywords" content="rl,">


  <link rel="alternate" href="/atom.xml" title="Keavnn'Blog" type="application/atom+xml">


<script>
    (function(){
        if(''){
            if (prompt('请输入文章密码','') !== ''){
                alert('密码错误！');
                history.back();
            }
        }
    })();
</script>

<meta name="description" content="本文介绍了一个“事后诸葛亮”的经验池机制，简称为HER，它可以很好地应用于稀疏奖励和二分奖励的问题中，不需要复杂的奖励函数工程设计。 推荐：  稀疏奖励问题的一种解决方案 通俗易懂">
<meta name="keywords" content="rl">
<meta property="og:type" content="article">
<meta property="og:title" content="Hindsight Experience Replay">
<meta property="og:url" content="http://StepNeverStop.github.io/Hindsight-Experience-Replay.html">
<meta property="og:site_name" content="Keavnn&#39;Blog">
<meta property="og:description" content="本文介绍了一个“事后诸葛亮”的经验池机制，简称为HER，它可以很好地应用于稀疏奖励和二分奖励的问题中，不需要复杂的奖励函数工程设计。 推荐：  稀疏奖励问题的一种解决方案 通俗易懂">
<meta property="og:locale" content="zh-Hans">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/hindsight.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/Her.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/pseudo.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/tasks.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/finalvsfuture.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/singlegoal.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/rewardshape.png">
<meta property="og:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/fourmodel.png">
<meta property="og:updated_time" content="2019-05-30T09:52:24.494Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Hindsight Experience Replay">
<meta name="twitter:description" content="本文介绍了一个“事后诸葛亮”的经验池机制，简称为HER，它可以很好地应用于稀疏奖励和二分奖励的问题中，不需要复杂的奖励函数工程设计。 推荐：  稀疏奖励问题的一种解决方案 通俗易懂">
<meta name="twitter:image" content="http://stepneverstop.github.io/Hindsight-Experience-Replay/hindsight.png">


<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Mist',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":true,"onmobile":true},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":true,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>


  <link rel="canonical" href="http://StepNeverStop.github.io/Hindsight-Experience-Replay.html">


  <title>Hindsight Experience Replay | Keavnn'Blog</title>
  

</head>

<body itemscope="" itemtype="http://schema.org/WebPage" lang="zh-Hans">

  
  <div class="container sidebar-position-left page-post-detail">
    <div class="headband"></div>
    <a href="https://github.com/StepNeverStop" class="github-corner" aria-label="View source on GitHub" rel="external nofollow" target="_blank"><svg width="80" height="80" viewbox="0 0 250 250" style="fill:#151513; color:#fff; position: absolute; top: 0; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"/><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"/><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"/></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
    <header id="header" class="header" itemscope="" itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">Keavnn'Blog</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <h1 class="site-subtitle" itemprop="description">If it is to be, it is up to me.</h1>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br>
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br>
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br>
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br>
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br>
            
            归档
          </a>
        </li>
      

        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br>
            
            搜索
          </a>
        </li>
      
    </ul>
  

    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off" placeholder="搜索..." spellcheck="false" type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>


    </div>
  
</nav>


 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  <article class="post post-type-normal" itemscope="" itemtype="http://schema.org/Article">
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://StepNeverStop.github.io/Hindsight-Experience-Replay.html">

    <span hidden itemprop="author" itemscope="" itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Keavnn">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/images/Kicon.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope="" itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Keavnn'Blog">
    </span>

    
      <header class="post-header">

        
          <h2 class="post-title" itemprop="name headline">Hindsight Experience Replay</h2>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2019-05-28T18:38:56+08:00">
                2019-05-28
              </time>
            

              <span class="post-meta-divider">|</span>
            

              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-check-o"></i>
              </span>
              
                <span class="post-meta-item-text">更新于&#58;</span>
              
              <time title="更新于" itemprop="dateModified" datetime="2019-05-30T17:52:24+08:00">
                2019-05-30
              </time>
            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope="" itemtype="http://schema.org/Thing">
                  <a href="/categories/ReinforcementLearning/" itemprop="url" rel="index">
                    <span itemprop="name">ReinforcementLearning</span>
                  </a>
                </span>

                
            </span>
          

            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  3.4k
                </span>
              

                <span class="post-meta-divider">|</span>
              

                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  14
                </span>
              
            </div>
          

        </div>
      </header>
    

    <div class="post-body" itemprop="articleBody">

      
        <p>本文介绍了一个“事后诸葛亮”的经验池机制，简称为<strong>HER</strong>，它可以很好地应用于<strong>稀疏奖励</strong>和<strong>二分奖励</strong>的问题中，不需要复杂的奖励函数工程设计。</p>
<p>推荐：</p>
<ul>
<li>稀疏奖励问题的一种解决方案</li>
<li>通俗易懂</li>
</ul>
<a id="more"></a>
<h1 id="简介"><a href="#简介" class="headerlink" title="简介"></a>简介</h1><p>论文地址：<a href="https://papers.nips.cc/paper/7090-hindsight-experience-replay.pdf" rel="external nofollow" target="_blank">https://papers.nips.cc/paper/7090-hindsight-experience-replay.pdf</a></p>
<blockquote>
<p>Dealing with sparse rewards is one of the biggest challenges in Reinforcement Learning (RL).  </p>
</blockquote>
<p>强化学习问题中最棘手的问题之一就是稀疏奖励。</p>
<p>本文提出了一个新颖的技术：Hindsight Experience Replay（HER），可以从稀疏、二分的奖励问题中高效采样并进行学习，而且可以应用于<strong>所有的Off-Policy</strong>算法中。</p>
<p><img src="./Hindsight-Experience-Replay/hindsight.png" alt=""></p>
<p>Hindsight意为事后，结合强化学习中序贯决策问题的特性，我们很容易就可以猜想到，“事后”要不然指的是在状态s下执行动作a之后，要不然指的就是当一个episode结束之后。其实，文中对常规经验池的改进也正是运用了这样的含义。</p>
<blockquote>
<p>HER lets an agent learn from undesired outcomes and tackles the problem of sparse rewards in Reinforcement Learning (RL).——Zhao, R., &amp; Tresp, V. (2018). Energy-Based Hindsight Experience Prioritization. <em>CoRL</em>.</p>
</blockquote>
<p>HER使智能体从没达到的结果中去学习，解决了强化学习中稀疏奖励的问题。</p>
<h2 id="二分奖励-binary-reward"><a href="#二分奖励-binary-reward" class="headerlink" title="二分奖励 binary reward"></a>二分奖励 binary reward</h2><p>简言之，完成目标为一个值，没完成目标为另一个值。如：</p>
<ul>
<li>$S_{T}=Goal，r=0$</li>
<li>$S\neq Goal, r=-1. for \ S \in \mathbb{S}$</li>
</ul>
<h2 id="稀疏奖励-sparse-reward"><a href="#稀疏奖励-sparse-reward" class="headerlink" title="稀疏奖励 sparse reward"></a>稀疏奖励 sparse reward</h2><p>简言之，完成目标的episode太少或者完成目标的步数太长，导致负奖励的样本数过多</p>
<h1 id="文中精要"><a href="#文中精要" class="headerlink" title="文中精要"></a>文中精要</h1><p>在机器人领域，要想使强化学习训练它完美执行某任务，往往需要设计合理的奖励函数，但是设计这样的奖励函数工程师不仅需要懂得强化学习的领域知识，也需要懂得机器人、运动学等领域的知识。而且，有这些知识也未必能设计出很好的奖励函数供智能体进行学习。因此，如果可以从简单的奖励函数（如二分奖励）学习到可完成任务的模型，那就不需要费心设计复杂的奖励函数了。</p>
<p>文中介绍了一个例子来引入HER：</p>
<ul>
<li>名称：bit-flipping environment</li>
<li>状态空间$\mathcal{S}=\left \{ 0,1 \right \}^{n}$</li>
<li>动作空间$\mathcal{A}=\left \{ 0,1,\cdots,n-1 \right \}$</li>
<li>规则：对于每个episode，均匀采样长度为$n$的初始状态$s_{0}$（如$n=5，s_{0}=10101$）和目标状态$s_{g}$，每一步从动作空间中选取一个动作$a$，翻转$s_{0}$第$a$个位置的值，如$a=1\Rightarrow s_{1}=11101$，直到回合结束或者翻转后的状态与$s_{g}$相同</li>
<li>奖励函数：$r_{g}(s,a)=-\left [ s \neq g \right ]$，即达到目标状态则为0，未达到目标状态则为-1。这个很容易理解，$s \neq g \Rightarrow true \doteq 1，s = g \Rightarrow false \doteq 0$</li>
</ul>
<p><em>注：下文如无特殊说明，$g$即表示目标状态$s_{g}$</em></p>
<blockquote>
<p>Standard RL algorithms are bound to fail in this environment for n &gt; 40 because they will never experience any reward other than -1. Notice that using techniques for improving exploration (e.g. VIME (Houthooft et al., 2016), count-based exploration (Ostrovski et al., 2017) or bootstrapped DQN (Osband et al., 2016)) does not help here because the real problem is not in lack of diversity of states being visited, rather it is simply impractical to explore such a large state space.  </p>
</blockquote>
<p>当序列长度$n$大于40时，传统的强化学习算法就算有各种探索机制的加持，也不能学会解决这个问题，因为这个问题完全不是缺乏探索，而是<strong>状态太多，探索不完</strong>，导致奖励极其稀疏，算法根本不知道需要优化的目标在哪里。</p>
<p>为了解决这个问题，作者指出了两个思路：</p>
<ol>
<li>使用shaped reward（简言之，将reward设计成某些变量的函数，如$r_{g}(s,a)=-\left || s-g \right ||^{2}$，即奖励函数为当前状态与目标状态的欧氏距离的负数），将训练的算法逐步引导至奖励函数增大的决策空间。但是这种方法可能很难应用于复杂的问题中。</li>
<li>使用HER——事后经验池机制</li>
</ol>
<h2 id="HER"><a href="#HER" class="headerlink" title="HER"></a>HER</h2><blockquote>
<p>The pivotal idea behind our approach is to re-examine this trajectory with a different goal — while this trajectory may not help us learn how to achieve the state g, it definitely tells us something about how to achieve the state $s_{T}$ .</p>
</blockquote>
<p>HER的主要思想就是：<strong>为什么一定要考虑我们设定的目标呢？假设我们想让一个智能体学会移动到某个位置，它在一个episode中没有学到移动到目标位置就算失败吗？假定序列为$s_{0},s_{1},s_{2}, \cdots ,s_{T}$，目标为$g$，我们何不换一种思路考虑：如果我们在episode开始前就将目标状态$g$设置为$s_{T}$，即$g=s_{T}$，那么这样看来智能体不就算是完成目标了吗？</strong></p>
<p><img src="./Hindsight-Experience-Replay/Her.png" alt=""></p>
<p>HER就是运用了这个思想对经验池进行了扩充，将稀疏奖励问题给转化成非稀疏奖励，大大的扩展了经验池中完成任务的经验数量。</p>
<p>HER主要特点：</p>
<ul>
<li>传统经验池存入的是状态$s$，而HER存入的是$s||g$，也就是<code>tf.concat(s,g)</code></li>
<li>训练算法的输入也是$s||g$，也就是需要在当前状态后边连结上<strong>每个episode的</strong>目标状态，每个episode的目标状态可能不同</li>
<li>HER对经验池进行了扩充，不仅存入实际采样得到的transition/experience，$\left ( s_{t}||g,a_{t},r_{t},s_{t+1}||g \right )$，也要在回合结束时<strong>重新设置目标状态</strong>，得到相应的奖励值（在二分奖励问题中，只有在$s=g$时奖励才需要更改），存入“事后”（当初如果这样就好啦！）的经验$\left ( s_{t}||g’,a_{t},r_{t}’,s_{t+1}||g’ \right )$，详见伪代码，这个事后经验究竟存入多少份、多少种，由超参数$k$控制，下文讲解。</li>
<li>HER更适合解决多目标问题，多目标的意思为，目标点非固定，每个episode的目标状态可以不相同。详见实验部分</li>
</ul>
<p>HER的几种扩展方式：</p>
<blockquote>
<p>future — replay with k random states which come from the same episode as the transition being replayed and were observed after it,<br>episode — replay with k random states coming from the same episode as the transition being replayed,<br>random — replay with k random states encountered so far in the whole training procedure.</p>
</blockquote>
<ul>
<li>未来模式——future：在一个序列$s_{0},s_{1},s_{2},\cdots,s_{T}$中，如果遍历到状态$s_{2}$，则在$s_{3},\cdots,s_{T}$之间随机抽取$k$个状态作为目标状态$g’$，并依此向经验池中存入$\left ( s_{2}||g’,a_{2},r_{2}’,s_{3}||g’ \right )$，<strong>特点：一个episode的后续部分</strong></li>
<li><p>回合模式——episode：在一个序列$s_{0},s_{1},s_{2},…,s_{T}$中，如果遍历到状态$s_{2}$，则在整个序列中随机抽取$k$个状态作为目标状态$g’$，并依此向经验池中存入$\left ( s_{2}||g’,a_{2},r_{2}’,s_{3}||g’ \right )$，<strong>特点：一个episode</strong></p>
</li>
<li><p>随机模式——random：在一个序列$s_{0},s_{1},s_{2},…,s_{T}$中，如果遍历到状态$s_{2}$，则在多个序列$\tau_{0},\tau_{1},\tau_{2},\cdots$中随机抽取$k$个状态作为目标状态$g’$，并依此向经验池中存入$\left ( s_{2}||g’,a_{2},r_{2}’,s_{3}||g’ \right )$，<strong>特点：多个episode</strong></p>
</li>
<li><p>最终模式——final：在一个序列$s_{0},s_{1},s_{2},\cdots,s_{T}$中，如果遍历到状态$s_{2}$，则之间令$g’=s_{T}$，并向经验池中存入$\left ( s_{2}||g’,a_{2},r_{2}’,s_{3}||g’ \right )$，<strong>特点：一个episode的最后一个状态，如果设置k，则存入k个相同的经验</strong></p>
</li>
</ul>
<h2 id="伪代码"><a href="#伪代码" class="headerlink" title="伪代码"></a>伪代码</h2><p><img src="./Hindsight-Experience-Replay/pseudo.png" alt=""></p>
<p>解析：</p>
<ol>
<li>伪代码中没有提到超参数$k$，其实在循环条件$\textbf{for} \ g’ \in G \ \textbf{do}$中循环执行了$k$次</li>
<li>$||$操作为连结操作，简言之，将两个长度为5的向量合并成一个长度为10的向量</li>
<li>$G:=\mathbb{S}(\textbf{current episode})$即为上文提到的四种扩展模式：future、episode、random、final。</li>
<li>奖励函数$r(s,a,g)=-\left [ f_{g}(s)=0 \right ]$即为前文提到的$r_{g}(s,a)=-\left [ s \neq g \right ]$，即完成为0，未完成为-1，具体奖励函数可以根据我们的使用环境设计</li>
<li>$a_{t} \leftarrow \pi_{b}(s_{t}||g)$表示神经网络的输入为当前状态与目标状态的连结</li>
</ol>
<h2 id="HER的优点"><a href="#HER的优点" class="headerlink" title="HER的优点"></a>HER的优点</h2><ol>
<li>可解决稀疏奖励、二分奖励问题</li>
<li>可适用于所有的Off-Policy算法</li>
<li>提升了数据采样效率</li>
</ol>
<h1 id="实验部分"><a href="#实验部分" class="headerlink" title="实验部分"></a>实验部分</h1><p>文中实验结果：<a href="https://goo.gl/SMrQnI" rel="external nofollow" target="_blank">https://goo.gl/SMrQnI</a></p>
<p>实验部分的完整细节请参考论文原文。</p>
<h2 id="环境"><a href="#环境" class="headerlink" title="环境"></a>环境</h2><ul>
<li>7自由度机械臂</li>
<li>模拟环境：MuJoCo</li>
<li>任务分为3种<ul>
<li>Pushing，推：锁定机械臂的钳子，移动机械臂将物体推到目标点</li>
<li>Sliding，滑动：类似于冰球运动，锁定机械臂的钳子，移动机械臂给与物体一个力，使物体可以在较光滑的桌面上滑动并且达到目标位置</li>
<li>Pick-and-place，摆放：解锁钳子，使用机械臂夹起物体并移动至空中目标点</li>
</ul>
</li>
</ul>
<p><img src="./Hindsight-Experience-Replay/tasks.png" alt=""></p>
<h2 id="算法"><a href="#算法" class="headerlink" title="算法"></a>算法</h2><ul>
<li>DDPG</li>
<li>Adam优化器</li>
<li>多层感知机MLPs</li>
<li>ReLU激活函数</li>
<li>8核并行，更新参数后取平均</li>
<li>A-C网络都是3个隐藏层，每层64个隐节点，Actor输出层用tanh激活函数</li>
<li>经验池大小为$10^{6}$，折扣因子$\gamma=0.98$，学习率$\alpha=0.001$，探索因子$\epsilon = 0.2$</li>
</ul>
<blockquote>
<p>With probability 20% we sample (uniformly) a random action from the hypercube of valid actions. </p>
</blockquote>
<p>DDPG使用了随机探索机制</p>
<h2 id="训练结果"><a href="#训练结果" class="headerlink" title="训练结果"></a>训练结果</h2><h3 id="final模式与future模式对比"><a href="#final模式与future模式对比" class="headerlink" title="final模式与future模式对比"></a>final模式与future模式对比</h3><p><img src="./Hindsight-Experience-Replay/finalvsfuture.png" alt=""></p>
<ul>
<li>红色曲线为future模式，蓝色曲线为final模式，绿色曲线为使用了<a href="https://arxiv.org/pdf/1703.01310.pdf" rel="external nofollow" target="_blank">count-based</a>的DDPG，褐红色虚线为原始DDPG</li>
<li>从左至右依次是Pushing，Sliding，Pick-and-place任务</li>
<li>超参数$k=4$</li>
<li>这个实验中，目标状态会变，即为多个目标状态</li>
</ul>
<p>结果分析：</p>
<ul>
<li>future模式比final效果更好</li>
<li>使用了count-based的DDPG智能稍微解决一下Sliding任务</li>
<li>使用HER的DDPG可以完全胜任三个任务</li>
<li>证明了HER是使从稀疏、二分奖励问题中学习成为可能的关键因素</li>
</ul>
<h3 id="单个目标状态的实验"><a href="#单个目标状态的实验" class="headerlink" title="单个目标状态的实验"></a>单个目标状态的实验</h3><p><img src="./Hindsight-Experience-Replay/singlegoal.png" alt=""></p>
<ul>
<li>蓝色曲线为使用了HER的DDPG，文中并未说明HER是哪种模式，<strong>猜测</strong>是final模式，因为文中实验部分之前都是以final模式进行举例</li>
<li>绿色曲线代表应用了count-based的DDPG，褐红色虚线为原始DDPG</li>
<li>实验中，目标状态都为同一状态$g$</li>
</ul>
<p>结果分析：</p>
<ul>
<li>DDPG+HER比原始DDPG的性能要好很多</li>
<li><strong>相比于多个目标的实验，可以发现，在多目标的任务中DDPG训练更快</strong>，所以在实际中，即使我们只关心一个目标，我们最好也使用多个目标来训练</li>
</ul>
<h3 id="HER应用于reward-shaping问题中"><a href="#HER应用于reward-shaping问题中" class="headerlink" title="HER应用于reward shaping问题中"></a>HER应用于reward shaping问题中</h3><p>前文已经说过，reward shaping可以简单理解为将奖励函数设置为某些变量的函数，如$r_{g}(s,a)=-\left || s-g \right ||^{2}$，即奖励函数为当前状态与目标状态的欧氏距离的负数</p>
<p><img src="./Hindsight-Experience-Replay/rewardshape.png" alt=""></p>
<ul>
<li>奖励函数为$r_{g}(s,a)=-\left || s-g \right ||^{2}$</li>
</ul>
<p>结果分析：</p>
<ul>
<li><p>无论使用怎样的reward shaping函数，DDPG、DDPG+HER都不能解决这个问题</p>
</li>
<li><p>作者认为原因有二：</p>
<ul>
<li><blockquote>
<p>There is a huge discrepancy between what we optimize (i.e. a shaped reward function) and the success condition (i.e.: is the object within some radius from the goal at the end of the episode);  </p>
</blockquote>
<p>判定完成目标的条件和要优化的问题有巨大的矛盾（虽然我也不理解这到底是什么意思，索性就直接抄了过来）</p>
</li>
<li><blockquote>
<p>Shaped rewards penalize for inappropriate behaviour (e.g. moving the box in a wrong direction) which may hinder exploration. It can cause the agent to learn not to touch the box at all if it can not manipulate it precisely and we noticed such behaviour in some of our experiments. </p>
</blockquote>
<p>reward shaping阻碍了探索</p>
</li>
</ul>
</li>
<li><blockquote>
<p>Our results suggest that domain-agnostic reward shaping does not work well (at least in the simple forms we have tried). Of course for every problem there exists a reward which makes it easy (Ng et al., 1999) but designing such shaped rewards requires a lot of domain knowledge and may in some cases not be much easier than directly scripting the policy. This strengthens our belief that learning from sparse, binary rewards is an important problem. </p>
</blockquote>
<p>研究结果表明，与领域无关的reward shaping效果并不好</p>
</li>
</ul>
<h3 id="四种模式比较"><a href="#四种模式比较" class="headerlink" title="四种模式比较"></a>四种模式比较</h3><p><img src="./Hindsight-Experience-Replay/fourmodel.png" alt=""></p>
<ul>
<li>红色代表future模式，蓝色代表final模式，绿色代表episode模式，紫色代表episode模式，褐红色虚线代表原始DDPG</li>
<li>横坐标代表超参数$k$，第一行三个图的纵坐标代表最高得分，第二行三个图的纵坐标代表平均得分</li>
</ul>
<p>结果分析：</p>
<ul>
<li><p>效果：future&gt;final&gt;episode&gt;random&gt;no HER</p>
</li>
<li><p>稳定性：final(好)=no-HER(差)&gt;future&gt;episode&gt;random</p>
</li>
<li><p>future模式是唯一一个可以解决Sliding任务的，在$k=4$或者$k=8$时效果最好</p>
</li>
<li><p>增大$k$超过8会使性能有所下降，主要是因为$k$过大导致经验池中原始真实数据所占的比例太小</p>
</li>
<li><blockquote>
<p>It confirms that the most valuable goals for replay are the ones which are going to be achieved in the near future </p>
</blockquote>
<p>它证实了回放经验中最有价值的目标是那些在不久的将来能实现的目标</p>
</li>
</ul>
<p><em>注：作者根据 future 模式提出了最近邻的 future 模式，即把$g’$设置为$s_{t+1}$，并且进行了实验，实验结果不如 future 模式。</em></p>

      
    </div>
    
    
    <div>
      
        <div>
    
        <div style="text-align:center;color: #ccc;font-size:14px;">-------------本文结束<i class="fa fa-heart"></i>感谢您的阅读-------------</div>
    
</div>
      
    </div>

    <div>
      
        
<div class="my_post_copyright">
  <script src="//cdn.bootcss.com/clipboard.js/1.5.10/clipboard.min.js"></script>
  
  <!-- JS库 sweetalert 可修改路径 -->
  <script src="https://cdn.bootcss.com/jquery/2.0.0/jquery.min.js"></script>
  <script src="https://unpkg.com/sweetalert/dist/sweetalert.min.js"></script>
  <p><span>本文标题:</span><a href="/Hindsight-Experience-Replay.html">Hindsight Experience Replay</a></p>
  <p><span>文章作者:</span><a href="/" title="访问 Keavnn 的个人博客">Keavnn</a></p>
  <p><span>发布时间:</span>2019年05月28日 - 18:05</p>
  <p><span>最后更新:</span>2019年05月30日 - 17:05</p>
  <p><span>原始链接:</span><a href="/Hindsight-Experience-Replay.html" title="Hindsight Experience Replay">http://StepNeverStop.github.io/Hindsight-Experience-Replay.html</a>
    <span class="copy-path" title="点击复制文章链接"><i class="fa fa-clipboard" data-clipboard-text="http://StepNeverStop.github.io/Hindsight-Experience-Replay.html" aria-label="复制成功！"></i></span>
  </p>
  <p><span>许可协议:</span><i class="fa fa-creative-commons"></i> <a rel="external nofollow" href="https://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank" title="Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0)">署名-非商业性使用-相同方式共享 4.0 国际</a> 转载请保留原文链接及作者。</p>  
</div>
<script> 
    var clipboard = new Clipboard('.fa-clipboard');
    $(".fa-clipboard").click(function(){
      clipboard.on('success', function(){
        swal({   
          title: "",   
          text: '复制成功',
          icon: "success", 
          showConfirmButton: true
          });
    });
    });  
</script>


    </div>

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div>如果您获得了帮助，也可以资助一下小的啦~</div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏啦</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="/images/wechatpay.jpg" alt="Keavnn 微信">
        <p>微信</p>
      </div>
    

      <div id="alipay" style="display: inline-block">
        <img id="alipay_qr" src="/images/alipay.jpg" alt="Keavnn 支付宝">
        <p>支付宝</p>
      </div>
    

  </div>
</div>

      </div>
    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/rl/" rel="tag"> <i class="fa fa-tag"></i> rl</a>
          
        </div>
      

        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/Prioritized-Experience-Replay.html" rel="next" title="Prioritized Experience Replay">
                <i class="fa fa-chevron-left"></i> Prioritized Experience Replay
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/energy-based-hindsight-experience-prioritization.html" rel="prev" title="Energy-Based Hindsight Experience Prioritization">
                Energy-Based Hindsight Experience Prioritization <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

    </footer>
  </div>
  
  
  </article>


    <div class="post-spread">
      
        <!-- Go to www.addthis.com/dashboard to customize your tools -->
<div class="addthis_inline_share_toolbox">
  <script type="text/javascript" src="//s7.addthis.com/js/300/addthis_widget.js#pubid=ra-5cefbfc88c13b0e7" async="async"></script>
</div>

      
    </div>
  </div>


          </div>
          

    <div class="comments" id="comments">
      <div id="lv-container" data-id="city" data-uid="MTAyMC80MTk0NS8xODQ5MQ=="></div>
    </div>

  
        </div>
        
          
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
      <div id="sidebar-dimmer"></div>
    
    <div class="sidebar-inner">

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview-wrap">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview-wrap sidebar-panel">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope="" itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image" src="/images/Kicon.jpg" alt="Keavnn">
            
              <p class="site-author-name" itemprop="name">Keavnn</p>
              <p class="site-description motion-element" itemprop="description">If it is to be, it is up to me.</p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/archives/">
              
                  <span class="site-state-item-count">51</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

              <div class="site-state-item site-state-categories">
                <a href="/categories/index.html">
                  <span class="site-state-item-count">11</span>
                  <span class="site-state-item-name">分类</span>
                </a>
              </div>
            

              <div class="site-state-item site-state-tags">
                <a href="/tags/index.html">
                  <span class="site-state-item-count">26</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          
            <div class="feed-link motion-element">
              <a href="/atom.xml" rel="alternate">
                <i class="fa fa-rss"></i>
                RSS
              </a>
            </div>
          

            <div class="links-of-author motion-element">
                
                  <span class="links-of-author-item">
                    <a href="https://github.com/StepNeverStop" target="_blank" title="GitHub" rel="external nofollow">
                      
                        <i class="fa fa-fw fa-github"></i>GitHub</a>
                  </span>
                
                  <span class="links-of-author-item">
                    <a href="mailto:oooooooooooooooocoo@vip.qq.com" target="_blank" title="E-Mail" rel="external nofollow">
                      
                        <i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  </span>
                
            </div>
          

            <div class="cc-license motion-element" itemprop="license">
              <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" class="cc-opacity" target="_blank" rel="external nofollow">
                <img src="/images/cc-by-nc-sa.svg" alt="Creative Commons">
              </a>
            </div>
          

            <div class="links-of-blogroll motion-element links-of-blogroll-inline">
              <div class="links-of-blogroll-title">
                <i class="fa  fa-fw fa-link"></i>
                推荐阅读
              </div>
              <ul class="links-of-blogroll-list">
                
                  <li class="links-of-blogroll-item">
                    <a href="https://bluefisher.github.io" title="Fisher Chang" target="_blank" rel="external nofollow">Fisher Chang</a>
                  </li>
                
              </ul>
            </div>
          

        </div>
      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#简介"><span class="nav-number">1.</span> <span class="nav-text">简介</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#二分奖励-binary-reward"><span class="nav-number">1.1.</span> <span class="nav-text">二分奖励 binary reward</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#稀疏奖励-sparse-reward"><span class="nav-number">1.2.</span> <span class="nav-text">稀疏奖励 sparse reward</span></a></li></ol></li><li class="nav-item nav-level-1"><a class="nav-link" href="#文中精要"><span class="nav-number">2.</span> <span class="nav-text">文中精要</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#HER"><span class="nav-number">2.1.</span> <span class="nav-text">HER</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#伪代码"><span class="nav-number">2.2.</span> <span class="nav-text">伪代码</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#HER的优点"><span class="nav-number">2.3.</span> <span class="nav-text">HER的优点</span></a></li></ol></li><li class="nav-item nav-level-1"><a class="nav-link" href="#实验部分"><span class="nav-number">3.</span> <span class="nav-text">实验部分</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#环境"><span class="nav-number">3.1.</span> <span class="nav-text">环境</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#算法"><span class="nav-number">3.2.</span> <span class="nav-text">算法</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#训练结果"><span class="nav-number">3.3.</span> <span class="nav-text">训练结果</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#final模式与future模式对比"><span class="nav-number">3.3.1.</span> <span class="nav-text">final模式与future模式对比</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#单个目标状态的实验"><span class="nav-number">3.3.2.</span> <span class="nav-text">单个目标状态的实验</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#HER应用于reward-shaping问题中"><span class="nav-number">3.3.3.</span> <span class="nav-text">HER应用于reward shaping问题中</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#四种模式比较"><span class="nav-number">3.3.4.</span> <span class="nav-text">四种模式比较</span></a></li></ol></li></ol></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

    </div>
  </aside>


      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <script async src="https://busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>
<div class="copyright">&copy; <span itemprop="copyrightYear">2020</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Keavnn</span>

  
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item-icon">
      <i class="fa fa-area-chart"></i>
    </span>
    
      <span class="post-meta-item-text">Site words total count&#58;</span>
    
    <span title="Site words total count">80.3k</span>
  
</div>

<div class="powered-by">
<i class="fa fa-user-md"></i><span id="busuanzi_container_site_pv">
    本站总访问量<span id="busuanzi_value_site_pv"></span>次
</span>
</div>


<!-- <div class="theme-info">
  <div class="powered-by"></div>
  <span class="post-count">博客全站共80.3k字</span>
</div> -->
        

      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
          <span id="scrollpercent"><span>0</span>%</span>
        
      </div>
    

  </div>

  
<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>


    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

    <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

    <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

    <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  

  <script type="text/javascript" src="/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.4"></script>


  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.4"></script>


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.4"></script>


    <script type="text/javascript">
      (function(d, s) {
        var j, e = d.getElementsByTagName(s)[0];
        if (typeof LivereTower === 'function') { return; }
        j = d.createElement(s);
        j.src = 'https://cdn-city.livere.com/js/embed.dist.js';
        j.async = true;
        e.parentNode.insertBefore(j, e);
      })(document, 'script');
    </script>
  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'auto') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>


<script>
(function(){
    var bp = document.createElement('script');
    var curProtocol = window.location.protocol.split(':')[0];
    if (curProtocol === 'https') {
        bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';        
    }
    else {
        bp.src = 'http://push.zhanzhang.baidu.com/push.js';
    }
    var s = document.getElementsByTagName("script")[0];
    s.parentNode.insertBefore(bp, s);
})();
</script>


    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  

<script src="/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({"pluginRootPath":"live2dw/","pluginJsPath":"lib/","pluginModelPath":"assets/","tagMode":false,"debug":false,"model":{"jsonPath":"/live2dw/assets/hijiki.model.json"},"display":{"position":"left","width":150,"height":300},"mobile":{"show":true},"log":false});</script></body>
</html>