From 9937c077100df2735aa3967691315a45e958d4df Mon Sep 17 00:00:00 2001
From: shixiaowen03 <shixiaowen03@meituan.com>
Date: Sun, 23 Dec 2018 13:46:34 +0800
Subject: [PATCH] rainbow

---
 .idea/workspace.xml                    | 333 +++++++++++++++----------
 RL/Basic-DisRL-Demo/Categorical_DQN.py |   6 +-
 RL/Basic-Rainbow-Net/agent.py          | 294 ++++++++++++++++++++++
 RL/Basic-Rainbow-Net/batchEnv.py       | 157 ++++++++++++
 RL/Basic-Rainbow-Net/huberLoss.py      |  71 ++++++
 RL/Basic-Rainbow-Net/main.py           | 162 ++++++++++++
 RL/Basic-Rainbow-Net/model.py          | 199 +++++++++++++++
 RL/Basic-Rainbow-Net/preprocessor.py   | 126 ++++++++++
 RL/Basic-Rainbow-Net/replayMemory.py   |  85 +++++++
 RL/Basic-Rainbow-Net/sumTree.py        |  56 +++++
 10 files changed, 1351 insertions(+), 138 deletions(-)
 create mode 100644 RL/Basic-Rainbow-Net/agent.py
 create mode 100644 RL/Basic-Rainbow-Net/batchEnv.py
 create mode 100644 RL/Basic-Rainbow-Net/huberLoss.py
 create mode 100644 RL/Basic-Rainbow-Net/main.py
 create mode 100644 RL/Basic-Rainbow-Net/model.py
 create mode 100644 RL/Basic-Rainbow-Net/preprocessor.py
 create mode 100644 RL/Basic-Rainbow-Net/replayMemory.py
 create mode 100644 RL/Basic-Rainbow-Net/sumTree.py
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 9b672fbf..a5455b41 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -1,7 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="ChangeListManager">
-    <list default="true" id="69f12537-6c3d-43b1-a62a-ad07d70a14cd" name="Default" comment="" />
+    <list default="true" id="69f12537-6c3d-43b1-a62a-ad07d70a14cd" name="Default" comment="">
+      <change type="MODIFICATION" beforePath="$PROJECT_DIR$/RL/Basic-DisRL-Demo/Categorical_DQN.py" afterPath="$PROJECT_DIR$/RL/Basic-DisRL-Demo/Categorical_DQN.py" />
+    </list>
     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
     <option name="TRACKING_ENABLED" value="true" />
     <option name="SHOW_DIALOG" value="false" />
@@ -27,7 +29,7 @@
     <SUITE FILE_PATH="coverage/tensorflow1_2$NoisyNetDQN.coverage" NAME="NoisyNetDQN Coverage Results" MODIFIED="1545129767563" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-NoisyNet-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$A2C.coverage" NAME="A2C Coverage Results" MODIFIED="1543483826546" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-A2C-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$HindSightExperienceReplay.coverage" NAME="HindSightExperienceReplay Coverage Results" MODIFIED="1544968249611" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-HER-Demo" />
-    <SUITE FILE_PATH="coverage/tensorflow1_2$main__1_.coverage" NAME="main (1) Coverage Results" MODIFIED="1545365395991" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-DisRL-Demo" />
+    <SUITE FILE_PATH="coverage/tensorflow1_2$main__1_.coverage" NAME="main (1) Coverage Results" MODIFIED="1545377037092" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-DisRL-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$preprocess__1_.coverage" NAME="preprocess (1) Coverage Results" MODIFIED="1542602954609" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/recommendation/Basic-MKR-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$kg_preprocess.coverage" NAME="kg_preprocess Coverage Results" MODIFIED="1541604856562" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/kg" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$plotResult.coverage" NAME="plotResult Coverage Results" MODIFIED="1529723400750" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/recommendation/Basic-MLR-Demo" />
@@ -45,40 +47,52 @@
     <SUITE FILE_PATH="coverage/tensorflow1_2$A3C.coverage" NAME="A3C Coverage Results" MODIFIED="1543484075025" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-A3C-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$tf_practice.coverage" NAME="tf_practice Coverage Results" MODIFIED="1537593989869" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/recommendation/Basic-XDeepFM-Demo/test_some_code" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$prepare_data_for_transx.coverage" NAME="prepare_data_for_transx Coverage Results" MODIFIED="1541603869565" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/kg" />
-    <SUITE FILE_PATH="coverage/tensorflow1_2$main__2_.coverage" NAME="main (2) Coverage Results" MODIFIED="1542614921574" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/recommendation/Basic-MKR-Demo" />
+    <SUITE FILE_PATH="coverage/tensorflow1_2$main__2_.coverage" NAME="main (2) Coverage Results" MODIFIED="1545543706778" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-Rainbow-Net" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$train.coverage" NAME="train Coverage Results" MODIFIED="1544449110606" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/basic/Basic-Transformer-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$DRQN.coverage" NAME="DRQN Coverage Results" MODIFIED="1530434931296" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/RL/Basic-DRQN-Demo" />
     <SUITE FILE_PATH="coverage/tensorflow1_2$ssfsdfsdf.coverage" NAME="ssfsdfsdf Coverage Results" MODIFIED="1545133607011" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
   </component>
   <component name="FileEditorManager">
     <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
-      <file leaf-file-name="Categorical_DQN.py" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/Categorical_DQN.py">
+      <file leaf-file-name="main.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/main.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="282">
-              <caret line="112" column="40" lean-forward="false" selection-start-line="112" selection-start-column="40" selection-end-line="112" selection-end-column="57" />
+            <state relative-caret-position="-668">
+              <caret line="25" column="25" lean-forward="true" selection-start-line="25" selection-start-column="25" selection-end-line="25" selection-end-column="25" />
               <folding>
-                <element signature="e#0#23#0" expanded="true" />
+                <element signature="e#0#15#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="agent.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/agent.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="4155">
+              <caret line="277" column="25" lean-forward="true" selection-start-line="277" selection-start-column="25" selection-end-line="277" selection-end-column="25" />
+              <folding>
+                <element signature="e#23#41#0" expanded="true" />
               </folding>
             </state>
           </provider>
         </entry>
       </file>
-      <file leaf-file-name="readme" pinned="false" current-in-tab="true">
-        <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/readme">
+      <file leaf-file-name="huberLoss.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/huberLoss.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="15">
-              <caret line="1" column="3" lean-forward="false" selection-start-line="1" selection-start-column="3" selection-end-line="1" selection-end-column="35" />
+            <state relative-caret-position="-287">
+              <caret line="26" column="0" lean-forward="true" selection-start-line="26" selection-start-column="0" selection-end-line="26" selection-end-column="0" />
               <folding />
             </state>
           </provider>
         </entry>
       </file>
-      <file leaf-file-name="NoisyNetDQN.py" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/RL/Basic-NoisyNet-Demo/NoisyNetDQN.py">
+      <file leaf-file-name="model.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/model.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="430">
-              <caret line="78" column="91" lean-forward="true" selection-start-line="75" selection-start-column="8" selection-end-line="78" selection-end-column="91" />
+            <state relative-caret-position="135">
+              <caret line="153" column="13" lean-forward="true" selection-start-line="153" selection-start-column="13" selection-end-line="153" selection-end-column="37" />
               <folding>
                 <element signature="e#0#23#0" expanded="true" />
               </folding>
@@ -86,36 +100,48 @@
           </provider>
         </entry>
       </file>
-      <file leaf-file-name="utils.py" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/utils.py">
+      <file leaf-file-name="replayMemory.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/replayMemory.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="30">
-              <caret line="2" column="0" lean-forward="false" selection-start-line="2" selection-start-column="0" selection-end-line="2" selection-end-column="0" />
+            <state relative-caret-position="586">
+              <caret line="66" column="39" lean-forward="false" selection-start-line="66" selection-start-column="39" selection-end-line="66" selection-end-column="39" />
               <folding>
-                <element signature="e#0#23#0" expanded="true" />
+                <element signature="e#0#18#0" expanded="true" />
               </folding>
             </state>
           </provider>
         </entry>
       </file>
-      <file leaf-file-name="main.py" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/main.py">
+      <file leaf-file-name="sumTree.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/sumTree.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="-114">
-              <caret line="8" column="0" lean-forward="true" selection-start-line="8" selection-start-column="0" selection-end-line="8" selection-end-column="0" />
+            <state relative-caret-position="313">
+              <caret line="51" column="18" lean-forward="true" selection-start-line="51" selection-start-column="18" selection-end-line="51" selection-end-column="18" />
+              <folding />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="batchEnv.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/batchEnv.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="407">
+              <caret line="156" column="30" lean-forward="false" selection-start-line="156" selection-start-column="30" selection-end-line="156" selection-end-column="30" />
               <folding>
-                <element signature="e#1#32#0" expanded="true" />
+                <element signature="e#0#28#0" expanded="true" />
               </folding>
             </state>
           </provider>
         </entry>
       </file>
-      <file leaf-file-name="Config.py" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/Config.py">
+      <file leaf-file-name="preprocessor.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/preprocessor.py">
           <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="330">
-              <caret line="22" column="0" lean-forward="true" selection-start-line="22" selection-start-column="0" selection-end-line="22" selection-end-column="0" />
-              <folding />
+            <state relative-caret-position="75">
+              <caret line="5" column="6" lean-forward="false" selection-start-line="5" selection-start-column="6" selection-end-line="5" selection-end-column="26" />
+              <folding>
+                <element signature="e#45#63#0" expanded="true" />
+              </folding>
             </state>
           </provider>
         </entry>
@@ -153,14 +179,6 @@
   <component name="IdeDocumentHistory">
     <option name="CHANGED_PATHS">
       <list>
-        <option value="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/kg/Fast-TransX/transE/transE.cpp" />
-        <option value="$PROJECT_DIR$/nlp/basic_seq2seq.py" />
-        <option value="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/main.py" />
-        <option value="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/train.py" />
-        <option value="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/README.md" />
-        <option value="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/data_loader.py" />
-        <option value="$PROJECT_DIR$/recommendation/Basic-DKN-Demo/dkn.py" />
-        <option value="$PROJECT_DIR$/recommendation/Basic-RippleNet-Demo/preprocess.py" />
         <option value="$PROJECT_DIR$/recommendation/Basic-RippleNet-Demo/data/ratings_final.txt" />
         <option value="$PROJECT_DIR$/recommendation/Basic-RippleNet-Demo/main.py" />
         <option value="$PROJECT_DIR$/recommendation/Basic-RippleNet-Demo/train.py" />
@@ -202,8 +220,16 @@
         <option value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/utils.py" />
         <option value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/main.py" />
         <option value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/readme" />
-        <option value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/Categorical_DQN.py" />
         <option value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/Config.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/Categorical_DQN.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/preprocessor.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/batchEnv.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/sumTree.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/replayMemory.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/model.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/huberLoss.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/agent.py" />
+        <option value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/main.py" />
       </list>
     </option>
   </component>
@@ -250,13 +276,7 @@
               <item name="tensorflow1.2" type="b2602c69:ProjectViewProjectNode" />
               <item name="tensorflow1.2" type="462c0819:PsiDirectoryNode" />
               <item name="RL" type="462c0819:PsiDirectoryNode" />
-              <item name="Basic-DisRL-Demo" type="462c0819:PsiDirectoryNode" />
-            </path>
-            <path>
-              <item name="tensorflow1.2" type="b2602c69:ProjectViewProjectNode" />
-              <item name="tensorflow1.2" type="462c0819:PsiDirectoryNode" />
-              <item name="RL" type="462c0819:PsiDirectoryNode" />
-              <item name="Basic-NoisyNet-Demo" type="462c0819:PsiDirectoryNode" />
+              <item name="Basic-Rainbow-Net" type="462c0819:PsiDirectoryNode" />
             </path>
             <path>
               <item name="tensorflow1.2" type="b2602c69:ProjectViewProjectNode" />
@@ -310,7 +330,7 @@
       </list>
     </option>
   </component>
-  <component name="RunManager" selected="Python.main (1)">
+  <component name="RunManager" selected="Python.main (2)">
     <configuration name="Categorical_DQN" type="PythonConfigurationType" factoryName="Python" temporary="true">
       <option name="INTERPRETER_OPTIONS" value="" />
       <option name="PARENT_ENVS" value="true" />
@@ -329,38 +349,38 @@
       <option name="SHOW_COMMAND_LINE" value="false" />
       <option name="EMULATE_TERMINAL" value="false" />
     </configuration>
-    <configuration name="NoisyNetDQN" type="PythonConfigurationType" factoryName="Python" temporary="true">
+    <configuration name="main (1)" type="PythonConfigurationType" factoryName="Python" temporary="true">
       <option name="INTERPRETER_OPTIONS" value="" />
       <option name="PARENT_ENVS" value="true" />
       <envs>
         <env name="PYTHONUNBUFFERED" value="1" />
       </envs>
       <option name="SDK_HOME" value="" />
-      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/RL/Basic-NoisyNet-Demo" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/RL/Basic-DisRL-Demo" />
       <option name="IS_MODULE_SDK" value="true" />
       <option name="ADD_CONTENT_ROOTS" value="true" />
       <option name="ADD_SOURCE_ROOTS" value="true" />
       <module name="tensorflow1.2" />
       <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
-      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/RL/Basic-NoisyNet-Demo/NoisyNetDQN.py" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/main.py" />
       <option name="PARAMETERS" value="" />
       <option name="SHOW_COMMAND_LINE" value="false" />
       <option name="EMULATE_TERMINAL" value="false" />
     </configuration>
-    <configuration name="main (1)" type="PythonConfigurationType" factoryName="Python" temporary="true">
+    <configuration name="main (2)" type="PythonConfigurationType" factoryName="Python" temporary="true">
       <option name="INTERPRETER_OPTIONS" value="" />
       <option name="PARENT_ENVS" value="true" />
       <envs>
         <env name="PYTHONUNBUFFERED" value="1" />
       </envs>
       <option name="SDK_HOME" value="" />
-      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/RL/Basic-DisRL-Demo" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/RL/Basic-Rainbow-Net" />
       <option name="IS_MODULE_SDK" value="true" />
       <option name="ADD_CONTENT_ROOTS" value="true" />
       <option name="ADD_SOURCE_ROOTS" value="true" />
       <module name="tensorflow1.2" />
       <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
-      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/RL/Basic-DisRL-Demo/main.py" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/RL/Basic-Rainbow-Net/main.py" />
       <option name="PARAMETERS" value="" />
       <option name="SHOW_COMMAND_LINE" value="false" />
       <option name="EMULATE_TERMINAL" value="false" />
@@ -403,18 +423,18 @@
     </configuration>
     <list size="5">
       <item index="0" class="java.lang.String" itemvalue="Python.main" />
-      <item index="1" class="java.lang.String" itemvalue="Python.NoisyNetDQN" />
-      <item index="2" class="java.lang.String" itemvalue="Python.ssfsdfsdf" />
-      <item index="3" class="java.lang.String" itemvalue="Python.main (1)" />
-      <item index="4" class="java.lang.String" itemvalue="Python.Categorical_DQN" />
+      <item index="1" class="java.lang.String" itemvalue="Python.ssfsdfsdf" />
+      <item index="2" class="java.lang.String" itemvalue="Python.main (1)" />
+      <item index="3" class="java.lang.String" itemvalue="Python.Categorical_DQN" />
+      <item index="4" class="java.lang.String" itemvalue="Python.main (2)" />
     </list>
     <recent_temporary>
       <list size="5">
-        <item index="0" class="java.lang.String" itemvalue="Python.main (1)" />
-        <item index="1" class="java.lang.String" itemvalue="Python.Categorical_DQN" />
-        <item index="2" class="java.lang.String" itemvalue="Python.main" />
-        <item index="3" class="java.lang.String" itemvalue="Python.ssfsdfsdf" />
-        <item index="4" class="java.lang.String" itemvalue="Python.NoisyNetDQN" />
+        <item index="0" class="java.lang.String" itemvalue="Python.main (2)" />
+        <item index="1" class="java.lang.String" itemvalue="Python.main (1)" />
+        <item index="2" class="java.lang.String" itemvalue="Python.Categorical_DQN" />
+        <item index="3" class="java.lang.String" itemvalue="Python.main" />
+        <item index="4" class="java.lang.String" itemvalue="Python.ssfsdfsdf" />
       </list>
     </recent_temporary>
   </component>
@@ -442,17 +462,17 @@
   </component>
   <component name="ToolWindowManager">
     <frame x="0" y="0" width="1440" height="900" extended-state="0" />
-    <editor active="true" />
     <layout>
-      <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.1695279" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
-      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
+      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32889965" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
       <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
-      <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
-      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.38331318" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Run" active="true" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.38331318" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
       <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32889965" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
       <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32889965" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.21886337" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.1695279" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
+      <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
       <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
-      <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.21886337" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
       <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.39782345" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
       <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
       <window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
@@ -462,7 +482,6 @@
       <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
       <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
       <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
-      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
       <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
     </layout>
   </component>
@@ -498,52 +517,6 @@
     <watches-manager />
   </component>
   <component name="editorHistoryManager">
-    <entry file="file://$PROJECT_DIR$/recommendation/Basic-MKR-Demo/train.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="1142">
-          <caret line="124" column="0" lean-forward="false" selection-start-line="124" selection-start-column="0" selection-end-line="124" selection-end-column="0" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/recommendation/Basic-MKR-Demo/layers.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="494">
-          <caret line="89" column="32" lean-forward="false" selection-start-line="70" selection-start-column="8" selection-end-line="89" selection-end-column="32" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/recommendation/Basic-MKR-Demo/model.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="251">
-          <caret line="96" column="0" lean-forward="false" selection-start-line="96" selection-start-column="0" selection-end-line="96" selection-end-column="0" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-PPO-Demo/simple-PPO.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="-144">
-          <caret line="49" column="59" lean-forward="false" selection-start-line="49" selection-start-column="59" selection-end-line="49" selection-end-column="59" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-A2C-Demo/run_CartPole.py" />
-    <entry file="file://$PROJECT_DIR$/RL/Basic-A2C-Demo/Actor.py" />
-    <entry file="file://$PROJECT_DIR$/RL/Basic-A2C-Demo/Critic.py" />
-    <entry file="file://$PROJECT_DIR$/RL/Basic-AC-Demo.py" />
-    <entry file="file://$USER_HOME$/Anaconda/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="16125">
-          <caret line="1095" column="0" lean-forward="false" selection-start-line="1095" selection-start-column="0" selection-end-line="1095" selection-end-column="0" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-Policy-Network/RL_brain.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="0">
-          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
-        </state>
-      </provider>
-    </entry>
     <entry file="file://$PROJECT_DIR$/RL/Basic-Policy-Network/run_CartPole.py">
       <provider selected="true" editor-type-id="text-editor">
         <state relative-caret-position="210">
@@ -768,7 +741,7 @@
         <state relative-caret-position="30">
           <caret line="2" column="0" lean-forward="false" selection-start-line="2" selection-start-column="0" selection-end-line="2" selection-end-column="0" />
           <folding>
-            <element signature="e#0#23#0" expanded="true" />
+            <element signature="e#0#23#0" expanded="false" />
           </folding>
         </state>
       </provider>
@@ -783,51 +756,143 @@
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/main.py">
+    <entry file="file://$PROJECT_DIR$/RL/Basic-NoisyNet-Demo/NoisyNetDQN.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="-114">
-          <caret line="8" column="0" lean-forward="true" selection-start-line="8" selection-start-column="0" selection-end-line="8" selection-end-column="0" />
+        <state relative-caret-position="430">
+          <caret line="78" column="91" lean-forward="false" selection-start-line="75" selection-start-column="8" selection-end-line="78" selection-end-column="91" />
           <folding>
-            <element signature="e#1#32#0" expanded="true" />
+            <element signature="e#0#23#0" expanded="false" />
           </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-NoisyNet-Demo/NoisyNetDQN.py">
+    <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/Config.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="430">
-          <caret line="78" column="91" lean-forward="true" selection-start-line="75" selection-start-column="8" selection-end-line="78" selection-end-column="91" />
+        <state relative-caret-position="330">
+          <caret line="22" column="0" lean-forward="false" selection-start-line="22" selection-start-column="0" selection-end-line="22" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/readme">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="15">
+          <caret line="1" column="3" lean-forward="false" selection-start-line="1" selection-start-column="3" selection-end-line="1" selection-end-column="35" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/main.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="134">
+          <caret line="10" column="47" lean-forward="false" selection-start-line="10" selection-start-column="47" selection-end-line="10" selection-end-column="47" />
           <folding>
-            <element signature="e#0#23#0" expanded="true" />
+            <element signature="e#1#32#0" expanded="false" />
           </folding>
         </state>
       </provider>
     </entry>
+    <entry file="file://$USER_HOME$/Anaconda/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="161">
+          <caret line="569" column="0" lean-forward="false" selection-start-line="569" selection-start-column="0" selection-end-line="569" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
     <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/Categorical_DQN.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="282">
-          <caret line="112" column="40" lean-forward="false" selection-start-line="112" selection-start-column="40" selection-end-line="112" selection-end-column="57" />
+        <state relative-caret-position="93">
+          <caret line="70" column="22" lean-forward="true" selection-start-line="70" selection-start-column="22" selection-end-line="70" selection-end-column="22" />
           <folding>
-            <element signature="e#0#23#0" expanded="true" />
+            <element signature="e#0#23#0" expanded="false" />
           </folding>
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/Config.py">
+    <entry file="file://$USER_HOME$/Anaconda/anaconda/lib/python3.6/site-packages/gym/wrappers/time_limit.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="330">
-          <caret line="22" column="0" lean-forward="true" selection-start-line="22" selection-start-column="0" selection-end-line="22" selection-end-column="0" />
+        <state relative-caret-position="161">
+          <caret line="28" column="8" lean-forward="false" selection-start-line="28" selection-start-column="8" selection-end-line="28" selection-end-column="8" />
           <folding />
         </state>
       </provider>
     </entry>
-    <entry file="file://$PROJECT_DIR$/RL/Basic-DisRL-Demo/readme">
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/preprocessor.py">
       <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="15">
-          <caret line="1" column="3" lean-forward="false" selection-start-line="1" selection-start-column="3" selection-end-line="1" selection-end-column="35" />
+        <state relative-caret-position="75">
+          <caret line="5" column="6" lean-forward="false" selection-start-line="5" selection-start-column="6" selection-end-line="5" selection-end-column="26" />
+          <folding>
+            <element signature="e#45#63#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/batchEnv.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="407">
+          <caret line="156" column="30" lean-forward="false" selection-start-line="156" selection-start-column="30" selection-end-line="156" selection-end-column="30" />
+          <folding>
+            <element signature="e#0#28#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/replayMemory.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="586">
+          <caret line="66" column="39" lean-forward="false" selection-start-line="66" selection-start-column="39" selection-end-line="66" selection-end-column="39" />
+          <folding>
+            <element signature="e#0#18#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/sumTree.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="313">
+          <caret line="51" column="18" lean-forward="true" selection-start-line="51" selection-start-column="18" selection-end-line="51" selection-end-column="18" />
           <folding />
         </state>
       </provider>
     </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/huberLoss.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="-287">
+          <caret line="26" column="0" lean-forward="true" selection-start-line="26" selection-start-column="0" selection-end-line="26" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/model.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="135">
+          <caret line="153" column="13" lean-forward="true" selection-start-line="153" selection-start-column="13" selection-end-line="153" selection-end-column="37" />
+          <folding>
+            <element signature="e#0#23#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/main.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="-668">
+          <caret line="25" column="25" lean-forward="true" selection-start-line="25" selection-start-column="25" selection-end-line="25" selection-end-column="25" />
+          <folding>
+            <element signature="e#0#15#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/RL/Basic-Rainbow-Net/agent.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="4155">
+          <caret line="277" column="25" lean-forward="true" selection-start-line="277" selection-start-column="25" selection-end-line="277" selection-end-column="25" />
+          <folding>
+            <element signature="e#23#41#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
   </component>
 </project>
\ No newline at end of file
diff --git a/RL/Basic-DisRL-Demo/Categorical_DQN.py b/RL/Basic-DisRL-Demo/Categorical_DQN.py
index 1f98aeb1..35646218 100644
--- a/RL/Basic-DisRL-Demo/Categorical_DQN.py
+++ b/RL/Basic-DisRL-Demo/Categorical_DQN.py
@@ -13,7 +13,7 @@ def __init__(self,env,config):
         self.config = config
         self.v_max = self.config.v_max
         self.v_min = self.config.v_min
-        self.atoms = self.config.atoms
+        self.atoms = self.config.atoms # 价值采样点的个数
 
         self.epsilon = self.config.INITIAL_EPSILON
         self.state_shape = env.observation_space.shape
@@ -23,10 +23,8 @@ def __init__(self,env,config):
 
         target_state_shape = [1]
         target_state_shape.extend(self.state_shape)
-
         self.state_input = tf.placeholder(tf.float32,target_state_shape)
         self.action_input = tf.placeholder(tf.int32,[1,1])
-
         self.m_input = tf.placeholder(tf.float32,[self.atoms])
 
         self.delta_z = (self.v_max - self.v_min) / (self.atoms - 1)
@@ -80,7 +78,6 @@ def build_cate_dqn_net(self):
         self.q_target = tf.reduce_sum(self.z_target * self.z)
 
         self.cross_entropy_loss = -tf.reduce_sum(self.m_input * tf.log(self.z_eval))
-
         self.optimizer = tf.train.AdamOptimizer(self.config.LEARNING_RATE).minimize(self.cross_entropy_loss)
 
         eval_params = tf.get_collection("eval_net_params")
@@ -97,6 +94,7 @@ def train(self,s,r,action,s_,gamma):
         a_ = tf.argmax(list_q_).eval()
         m = np.zeros(self.atoms)
         p = self.sess.run(self.z_target,feed_dict = {self.state_input:[s_],self.action_input:[[a_]]})[0]
+        m = np.zeros(self.atoms)
         for j in range(self.atoms):
             Tz = min(self.v_max,max(self.v_min,r+gamma * self.z[j]))
             bj = (Tz - self.v_min) / self.delta_z # 分在第几个块里
diff --git a/RL/Basic-Rainbow-Net/agent.py b/RL/Basic-Rainbow-Net/agent.py
new file mode 100644
index 00000000..176e3d97
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/agent.py
@@ -0,0 +1,294 @@
+"""Main DQN agent."""
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+import random
+from huberLoss import mean_huber_loss, weighted_huber_loss
+
+EPSILON_BEGIN = 1.0
+EPSILON_END = 0.1
+BETA_BEGIN = 0.5
+BETA_END = 1.0
+
+class DQNAgent():
+    def __init__(self,
+                 eval_model,
+                 target_model,
+                 memory,
+                 num_actions,
+                 gamma,
+                 update_freq,
+                 target_update_freq,
+                 update_target_params_ops,
+                 batch_size,
+                 is_double_dqn,
+                 is_per,
+                 is_distributional,
+                 num_step,
+                 is_noisy,
+                 learning_rate,
+                 rmsp_decay,
+                 rmsp_momentum,
+                 rmsp_epsilon):
+
+        self._eval_model = eval_model
+        self._target_model = target_model
+        self._memory = memory
+        self._num_actions = num_actions
+        self._gamma = gamma
+        self._update_freq = update_freq
+        self._target_update_freq = target_update_freq
+        self._update_target_params_ops = update_target_params_ops
+        self._batch_size = batch_size
+        self._is_double_dqn = is_double_dqn
+        self._is_per = is_per
+        self._is_distributional = is_distributional
+        self._num_step = num_step
+        self._is_noisy = is_noisy
+        self._learning_rate = learning_rate
+        self._rmsp_decay = rmsp_decay
+        self._rmsp_momentum = rmsp_momentum
+        self._rmsp_epsilon = rmsp_epsilon
+        self._update_times = 0
+        self._beta = EPSILON_BEGIN
+        self._beta_increment = (EPSILON_END-BETA_BEGIN)/2000000.0
+        self._epsilon = EPSILON_BEGIN if is_noisy else 0.
+        self._epsilon_increment =  (EPSILON_END - EPSILON_BEGIN)/2000000.0 if is_noisy==0 else 0.
+        self._action_ph = tf.placeholder(tf.int32,[None,2],'action_ph')
+        self._reward_ph = tf.placeholder(tf.float32,name='reward_ph')
+        self._is_terminal_ph = tf.placeholder(tf.float32,name='is_terminal_ph')
+        self._action_chosen_by_eval_ph = tf.placeholder(tf.int32,[None,2],'action_chosen_by_eval_ph')
+        self._loss_weight_ph = tf.placeholder(tf.float32,name='loss_weight_ph')
+        self._error_op,self._train_op = self._get_error_and_train_op(self._reward_ph,self._is_terminal_ph,
+                                                                     self._action_ph,self._action_chosen_by_eval_ph,
+                                                                     self._loss_weight_ph)
+
+
+    def _get_error_and_train_op(self,reward_ph,
+                                is_terminal_ph,
+                                action_ph,
+                                action_chosen_by_eval_ph,
+                                loss_weight_ph):
+
+        if self._is_distributional == 0:
+            q_values_target = self._target_model['q_values']
+            q_values_eval = self._eval_model['q_values']
+
+            if self._is_double_dqn:
+                max_q = tf.gather_nd(q_values_target,action_chosen_by_eval_ph) # 如果是double-dqn，动作由eval-net选出，q值由target-net得到
+            else:
+                max_q = tf.reduce_max(q_values_target,axis=1)
+
+            target = reward_ph + (1.0 - is_terminal_ph) * (self._gamma ** self._num_step) * max_q # 这里是多步的dqn
+            gathered_outputs = tf.gather_nd(q_values_eval,action_ph,name='gathered_outputs')
+
+            if self._is_per == 1:
+                loss = weighted_huber_loss(target,gathered_outputs,loss_weight_ph)
+            else:
+                loss = mean_huber_loss(target,gathered_outputs)
+            train_op = tf.train.RMSPropOptimizer(self._learning_rate,decay=self._rmsp_decay,
+                                                 momentum=self._rmsp_momentum,epsilon=self._rmsp_epsilon).minimize(loss)
+
+            error_op = tf.abs(gathered_outputs - target,name='abs_error')
+            return train_op,error_op
+
+        else:
+            N_atoms = 51
+            V_Max = 20.0
+            V_Min = 0.0
+            Delta_z = (V_Max - V_Min) / (N_atoms - 1)
+            z_list = tf.constant([V_Min + i * Delta_z for i in range(N_atoms)], dtype=tf.float32)
+
+            q_distributional_values_target = self._target_model['q_distributional_network'] # batch_size * num_actions * N_atoms
+            tmp_batch_size = tf.shape(q_distributional_values_target)[0] # batch_size
+
+            if self._is_double_dqn:
+                q_distributional_chosen_by_action_target = tf.gather_nd(q_distributional_values_target,action_chosen_by_eval_ph)
+            else:
+                action_chosen_by_target_q = tf.cast(tf.argmax(self._target_model['q_values'], axis=1), tf.int32)
+                q_distributional_chosen_by_action_target = tf.gather_nd(q_distributional_values_target,
+                                                                  tf.concat([tf.reshape(tf.range(tmp_batch_size),[-1,1]),
+                                                                             tf.reshape(action_chosen_by_target_q,[-1,1])],axis=1))
+
+
+            target = tf.tile(tf.reshape(reward_ph,[-1,1]),[1,N_atoms]) + \
+                     (self._gamma * self._num_step) * \
+                     tf.multiply(tf.reshape(z_list,[1,N_atoms]),(1.0 - tf.tile(tf.reshape(is_terminal_ph,[-1,1]),[1,N_atoms])))
+
+            target = tf.clip_by_value(target,V_Min,V_Max)
+
+            b = (target - V_Min) / Delta_z
+
+            u,l = tf.ceil(b),tf.floor(b)
+
+            u_id,l_id = tf.cast(u,tf.int32),tf.cast(l,tf.int32)
+
+            u_minus_b,b_minus_l = u - b,b - l
+            q_distributional_values_eval = self._eval_model['q_distributional_network']
+
+            q_distributional_chosen_by_action_eval = tf.gather_nd(q_distributional_values_eval,action_ph)
+
+            index_help = tf.tile(tf.reshape(tf.range(tmp_batch_size),[-1,1]),[1,N_atoms])
+
+            index_help = tf.expand_dims(index_help,-1) # batch * N_atoms * 1
+            u_id = tf.concat([index_help,tf.expand_dims(u_id,-1)],axis=2)
+            l_id = tf.concat([index_help,tf.expand_dims(l_id,-1)],axis=2)
+
+            error = q_distributional_chosen_by_action_target * u_minus_b * \
+                    tf.log(tf.gather_nd(q_distributional_chosen_by_action_eval, l_id)) \
+                    + q_distributional_chosen_by_action_target * b_minus_l * \
+                      tf.log(tf.gather_nd(q_distributional_chosen_by_action_eval, u_id))
+            error = tf.reduce_sum(error, axis=1)
+
+            if self._is_per == 1:
+                loss = tf.negative(error * loss_weight_ph)
+            else:
+                loss = tf.negative(error)
+
+            train_op = tf.train.RMSPropOptimizer(self._learning_rate,
+                                                 decay=self._rmsp_decay, momentum=self._rmsp_momentum,
+                                                 epsilon=self._rmsp_epsilon).minimize(loss)
+            error_op = tf.abs(error, name='abs_error')
+            return error_op, train_op
+
+    def select_action(self,sess,state,epsilon,model):
+        batch_size = len(state)
+        if np.random.rand() < epsilon:
+            action = np.random.randint(0,self._num_actions,size=(batch_size,))
+        else:
+            state = state.astype(np.float32) / 255.0
+            feed_dict = {model['input_frames'] :state}
+            action = sess.run(model['action'],feed_dict=feed_dict)
+        return action
+
+    def get_multi_step_sample(self,env,sess,num_step,epsilon):
+        old_state,action,reward,new_state,is_terminal = env.get_state()
+        total_reward = np.sign(reward)
+        total_is_terminal = is_terminal
+
+        next_action = self.select_action(sess,new_state,epsilon,self._eval_model)
+        env.take_action(next_action)
+
+        for i in range(1,num_step):
+            _,_,reward,new_state,is_terminal = env.get_state()
+            total_reward += self._gamma ** i * np.sign(reward)
+            total_is_terminal += is_terminal
+            next_action = self.select_action(sess,new_state,epsilon,self._eval_model)
+            env.take_action(next_action)
+
+        return old_state,action,total_reward,new_state,np.sign(total_is_terminal)
+
+    def fit(self,sess,env,num_iterations,do_train=True):
+
+        num_environment = env.num_process
+        env.reset()
+
+        for t in range(0,num_iterations,num_environment):
+            # 准备数据
+            old_state,action,reward,new_state,is_terminal = self.get_multi_step_sample(env,sess,self._num_step,self._epsilon)
+            self._memory.append(old_state,action,reward,new_state,is_terminal) # 插入数据
+            if self._epsilon > EPSILON_END:
+                self._epsilon += num_environment * self._epsilon_increment
+            if do_train:
+                num_update = sum([1 if i % self._update_freq == 0 else 0 for i in range(t, t + num_environment)])
+                # 抽取数据
+                for _ in range(num_update):
+                    if self._is_per == 1:
+                        (old_state_list, action_list, reward_list, new_state_list, is_terminal_list), \
+                        idx_list, p_list, sum_p, count = self._memory.sample(self._batch_size)
+                    else:
+                        old_state_list, action_list, reward_list, new_state_list, is_terminal_list \
+                            = self._memory.sample(self._batch_size)
+
+                    feed_dict = {self._target_model['input_frames']: new_state_list.astype(np.float32) / 255.0,
+                                 self._eval_model['input_frames']: old_state_list.astype(np.float32) / 255.0,
+                                 self._action_ph: list(enumerate(action_list)),
+                                 self._reward_ph: np.array(reward_list).astype(np.float32),
+                                 self._is_terminal_ph: np.array(is_terminal_list).astype(np.float32),
+                                 }
+
+                    if self._is_double_dqn:
+                        action_chosen_by_online = sess.run(self._eval_model['action'], feed_dict={
+                                    self._eval_model['input_frames']: new_state_list.astype(np.float32)/255.0})
+                        feed_dict[self._action_chosen_by_eval_ph] = list(enumerate(action_chosen_by_online))
+
+                    if self._is_per == 1:
+                        # Annealing weight beta
+                        feed_dict[self._loss_weight_ph] = (np.array(p_list)*count/sum_p)**(-self._beta)
+                        error, _ = sess.run([self._error_op, self._train_op], feed_dict=feed_dict)
+                        self._memory.update(idx_list, error)
+                    else:
+                        sess.run(self._train_op, feed_dict=feed_dict)
+
+                    self._update_times += 1
+                    if self._beta < BETA_END:
+                        self._beta += self._beta_increment
+
+                    if self._update_times%self._target_update_freq == 0:
+                        sess.run(self._update_target_params_ops)
+
+
+    def _get_error(self, sess, old_state, action, reward, new_state, is_terminal):
+        '''
+        Get TD error for Prioritized Experience Replay
+        '''
+        feed_dict = {self._target_model['input_frames']: new_state.astype(np.float32)/255.0,
+                     self._eval_model['input_frames']: old_state.astype(np.float32)/255.0,
+                     self._action_ph: list(enumerate(action)),
+                     self._reward_ph: np.array(reward).astype(np.float32),
+                     self._is_terminal_ph: np.array(is_terminal).astype(np.float32),
+                     }
+
+        if self._is_double_dqn:
+            action_chosen_by_online = sess.run(self._eval_model['action'], feed_dict={
+                        self._eval_model['input_frames']: new_state.astype(np.float32)/255.0})
+            feed_dict[self._action_chosen_by_eval_ph] = list(enumerate(action_chosen_by_online))
+
+        error = sess.run(self._error_op, feed_dict=feed_dict)
+        return error
+
+    def get_mean_max_Q(self, sess, samples):
+        mean_max = []
+        INCREMENT = 1000
+        for i in range(0, len(samples), INCREMENT):
+            feed_dict = {self._eval_model['input_frames']:
+                samples[i: i + INCREMENT].astype(np.float32)/255.0}
+            mean_max.append(sess.run(self._eval_model['mean_max_Q'],
+                feed_dict = feed_dict))
+        return np.mean(mean_max)
+
+
+    def evaluate(self, sess, env, num_episode):
+        """Evaluate num_episode games by online model.
+        Parameters
+        ----------
+        sess: tf.Session
+        env: batchEnv.BatchEnvironment
+          This is your paralleled Atari environment.
+        num_episode: int
+          This is the number of episode of games to evaluate
+        Returns
+        -------
+        reward list for each episode
+        """
+        num_environment = env.num_process
+        env.reset()
+        reward_of_each_environment  = np.zeros(num_environment)
+        rewards_list = []
+
+        num_finished_episode = 0
+
+        while num_finished_episode < num_episode:
+            old_state, action, reward, new_state, is_terminal = env.get_state()
+            action = self.select_action(sess, new_state, 0, self._eval_model)
+            env.take_action(action)
+            for i, r, is_t in zip(range(num_environment), reward, is_terminal):
+                if not is_t:
+                    reward_of_each_environment[i] += r
+                else:
+                    rewards_list.append(reward_of_each_environment[i])
+                    reward_of_each_environment[i] = 0
+                    num_finished_episode += 1
+        return np.mean(rewards_list), np.std(rewards_list)
+
diff --git a/RL/Basic-Rainbow-Net/batchEnv.py b/RL/Basic-Rainbow-Net/batchEnv.py
new file mode 100644
index 00000000..3caccc9f
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/batchEnv.py
@@ -0,0 +1,157 @@
+import multiprocessing as mp
+from multiprocessing import Pool, sharedctypes
+import numpy as np
+import gym
+import sys
+from preprocessor import Preprocessor
+import time
+
+SKIP_START_FRAME_NUM = 42
+
+class Atari_ParallelWorker(mp.Process):
+    def __init__(self,
+                i,
+                env_name,
+                pipe,
+                window_size,
+                input_shape,
+                num_frame_per_action,
+                max_episode_length,
+                state,
+                lock):
+        super(Atari_ParallelWorker,self).__init__()
+        self.id = i
+        self.pipe = pipe
+        self.window_size = window_size
+        self.num_frame_per_action = num_frame_per_action
+        self.max_episode_length = max_episode_length
+        self.state = state
+        self.lock = lock
+        self.env = gym.make(env_name)
+        self.env.seed(np.random.randint(222))
+        self.preprocessor = Preprocessor(window_size,input_shape)
+        self.time = 0
+        self._reset()
+
+    def _take_action(self,action):
+        reward = 0
+        old_state = self.preprocessor.get_state() # 得到4帧的画面
+        for _ in range(self.num_frame_per_action):
+            self.time += 1
+            state,intermediate_reward,is_terminal,_ = self.env.step(action)
+            reward += intermediate_reward
+            self.preprocessor.process_state_for_memory(state) # 将每一帧的画面进行压缩，同时将HistoryPreprocessor中存放的历史记录往前覆盖
+            if is_terminal:
+                self._reset()
+                break
+        new_state = self.preprocessor.get_state() # 拿到新的state，这里的新的state是old_state后的四帧动画，reward是这四帧动画的奖励和
+        if self.time > self.max_episode_length:
+            is_terminal = True
+            self._reset()
+
+            # write 'sara' into mp.Array
+            np.ctypeslib.as_array(self.state['old_state'])[self.id] = old_state
+            np.ctypeslib.as_array(self.state['action'])[self.id] = action
+            np.ctypeslib.as_array(self.state['reward'])[self.id] = reward
+            np.ctypeslib.as_array(self.state['new_state'])[self.id] = new_state
+            np.ctypeslib.as_array(self.state['is_terminal'])[self.id] = is_terminal
+
+    def run(self):
+        print('Environment worker %d: run'%(self.id,))
+        # This lock to ensure all the process prepared before take actions
+        self.lock.release()
+        while True:
+            command, context = self.pipe.recv()
+            if command == 'CLOSE':
+                self.env.close()
+                self.pipe.close()
+                break
+            elif command == 'ACTION':
+                self._take_action(action=context)
+                self.lock.release()
+            elif command == 'RESET':
+                self._reset()
+                self.lock.release()
+            else:
+                raise NotImplementedError()
+
+
+    def _reset(self):
+        self.env.reset()
+        self.preprocessor.reset()
+        self.time = 0
+        for _ in range(SKIP_START_FRAME_NUM - self.window_size):
+            self.env.step(0)
+        for _ in range(self.window_size):
+            state,_,_,_ = self.env.step(0)
+            self.preprocessor.process_state_for_memory(state)
+
+
+
+
+class BatchEnvironment():
+    def __init__(self,env_name,
+                 num_process,
+                 window_size,
+                 input_shape,
+                 num_frame_per_action,
+                 max_episode_length):
+        self.num_process = num_process # 并行的游戏数量
+        self.env_name = env_name
+        self.workers = []
+        self.pipes = []
+        self.locks = []
+
+        def get_multiprocess_numpy(dtype,shape):
+            tmp = np.ctypeslib.as_ctypes(np.zeros(shape,dtype=dtype))
+            return sharedctypes.Array(tmp._type_,tmp,lock=False)
+
+        self.state = {
+            'old_state':get_multiprocess_numpy(np.uint8,shape=(num_process,input_shape[0],input_shape[1],window_size)),
+            'action':get_multiprocess_numpy(np.uint8,shape=(num_process,)),
+            'reward':get_multiprocess_numpy(np.int16,shape=(num_process,)),
+            'new_state':get_multiprocess_numpy(np.uint8,shape=(num_process,input_shape[0],input_shape[1],window_size)),
+            'is_terminal':get_multiprocess_numpy(np.uint8,shape=(num_process,))
+        }
+
+        for i in range(num_process):
+            parent_pipe,child_pipe = mp.Pipe()
+            lock = mp.Lock()
+            self.pipes.append(parent_pipe)
+            self.locks.append(lock)
+
+            lock.acquire()
+            worker = Atari_ParallelWorker(i, env_name, child_pipe, window_size,
+                    input_shape, num_frame_per_action, max_episode_length, self.state, lock)
+
+            worker.start()
+            self.workers.append(worker)
+
+    def take_action(self, action_list):
+        assert len(action_list) == self.num_process
+        for pipe, action, lock in zip(self.pipes, action_list, self.locks):
+            lock.acquire()
+            pipe.send(('ACTION', action))
+
+    def reset(self):
+        for pipe, lock in zip(self.pipes, self.locks):
+            lock.acquire()
+            pipe.send(('RESET', None))
+
+    def get_state(self):
+        for lock in self.locks:
+            lock.acquire()
+
+        old_state = np.ctypeslib.as_array(self.state['old_state'])
+        action = np.ctypeslib.as_array(self.state['action'])
+        reward = np.ctypeslib.as_array(self.state['reward'])
+        new_state = np.ctypeslib.as_array(self.state['new_state'])
+        is_terminal = np.ctypeslib.as_array(self.state['is_terminal'])
+
+        for lock in self.locks:
+            lock.release()
+        return np.copy(old_state), action, reward, np.copy(new_state), is_terminal
+
+    def close(self):
+        for worker in self.workers:
+            worker.terminate()
\ No newline at end of file
diff --git a/RL/Basic-Rainbow-Net/huberLoss.py b/RL/Basic-Rainbow-Net/huberLoss.py
new file mode 100644
index 00000000..12a1b1a7
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/huberLoss.py
@@ -0,0 +1,71 @@
+"""Loss functions."""
+
+import tensorflow as tf
+
+
+def huber_loss(y_true, y_pred, max_grad=1.):
+    """Calculate the huber loss.
+    See https://en.wikipedia.org/wiki/Huber_loss
+    Parameters
+    ----------
+    y_true: np.array, tf.Tensor
+      Target value.
+    y_pred: np.array, tf.Tensor
+      Predicted value.
+    max_grad: float, optional
+      Positive floating point value. Represents the maximum possible
+      gradient magnitude.
+    Returns
+    -------
+    tf.Tensor
+      The huber loss.
+    """
+    a = tf.abs(y_true - y_pred)
+    less_than_max = 0.5 * tf.square(a)
+    greater_than_max = max_grad * (a - 0.5 * max_grad)
+    return tf.where(a <= max_grad, x=less_than_max, y=greater_than_max)
+
+
+
+def mean_huber_loss(y_true, y_pred, max_grad=1.):
+    """Return mean huber loss.
+    Same as huber_loss, but takes the mean over all values in the
+    output tensor.
+    Parameters
+    ----------
+    y_true: np.array, tf.Tensor
+      Target value.
+    y_pred: np.array, tf.Tensor
+      Predicted value.
+    max_grad: float, optional
+      Positive floating point value. Represents the maximum possible
+      gradient magnitude.
+    Returns
+    -------
+    tf.Tensor
+      The mean huber loss.
+    """
+    return tf.reduce_mean(huber_loss(y_true, y_pred, max_grad=max_grad))
+
+
+def weighted_huber_loss(y_true, y_pred, weights, max_grad=1.):
+    """Return mean huber loss.
+    Same as huber_loss, but takes the mean over all values in the
+    output tensor.
+    Parameters
+    ----------
+    y_true: np.array, tf.Tensor
+      Target value.
+    y_pred: np.array, tf.Tensor
+      Predicted value.
+    weights: np.array, tf.Tensor
+      weights value.
+    max_grad: float, optional
+      Positive floating point value. Represents the maximum possible
+      gradient magnitude.
+    Returns
+    -------
+    tf.Tensor
+      The mean huber loss.
+    """
+    return tf.reduce_mean(weights*huber_loss(y_true, y_pred, max_grad=max_grad))
\ No newline at end of file
diff --git a/RL/Basic-Rainbow-Net/main.py b/RL/Basic-Rainbow-Net/main.py
new file mode 100644
index 00000000..6162475c
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/main.py
@@ -0,0 +1,162 @@
+import argparse
+import gym
+
+import numpy as np
+import random
+import tensorflow as tf
+import PIL
+
+from batchEnv import BatchEnvironment
+from replayMemory import ReplayMemory, PriorityExperienceReplay
+from model import create_deep_q_network, create_duel_q_network, create_model, create_distributional_model
+from agent import DQNAgent
+
+
+NUM_FRAME_PER_ACTION = 4
+UPDATE_FREQUENCY = 4 # do one batch update when UPDATE_FREQUENCY number of new samples come
+TARGET_UPDATE_FREQENCY = 10000 # target-net更新频率
+REPLAYMEMORY_SIZE = 500000 # 经验池的大小
+MAX_EPISODE_LENGTH = 100000 # 最大的序列长度
+RMSP_EPSILON = 0.01
+RMSP_DECAY = 0.95
+RMSP_MOMENTUM =0.95
+NUM_FIXED_SAMPLES = 10000
+NUM_BURN_IN = 50000
+LINEAR_DECAY_LENGTH = 4000000
+NUM_EVALUATE_EPSIODE = 20
+
+def get_fixed_samples(env,num_actions,num_samples):
+    fixed_samples = []
+    num_environment = env.num_process
+    env.reset()
+
+    for _ in range(0,num_samples,num_environment):
+        old_state,action,reward,new_state,is_terminal = env.get_state()
+        action = np.random.randint(0,num_actions,size = (num_environment,))
+        env.take_action(action)
+        for state in new_state:
+            fixed_samples.append(state)
+    return np.array(fixed_samples)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Run DQN on Atari Space Invaders')
+    parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name')
+    parser.add_argument('--seed', default=10703, type=int, help='Random seed')
+    parser.add_argument('--input_shape', default=(84,84), help='Input shape')
+    parser.add_argument('--gamma', default=0.99, help='Discount factor')
+    parser.add_argument('--epsilon', default=0.1, help='Exploration probability in epsilon-greedy')
+    parser.add_argument('--learning_rate', default=0.00025, help='Training learning rate.')
+    parser.add_argument('--window_size', default=4, type = int, help=
+                                'Number of frames to feed to the Q-network')
+    parser.add_argument('--batch_size', default=32, type = int, help=
+                                'Batch size of the training part')
+    parser.add_argument('--num_process', default=3, type = int, help=
+                                'Number of parallel environment')
+    parser.add_argument('--num_iteration', default=20000000, type = int, help=
+                                'number of iterations to train')
+    parser.add_argument('--eval_every', default=0.001, type = float, help=
+                                'What fraction of num_iteration to run between evaluations.')
+    parser.add_argument('--is_duel', default=1, type = int, help=
+                                'Whether use duel DQN, 0 means no, 1 means yes.')
+    parser.add_argument('--is_double', default=1, type = int, help=
+                                'Whether use double DQN, 0 means no, 1 means yes.')
+    parser.add_argument('--is_per', default=1, type = int, help=
+                                'Whether use PriorityExperienceReplay, 0 means no, 1 means yes.')
+    parser.add_argument('--is_distributional', default=1, type = int, help=
+                                'Whether use distributional DQN, 0 means no, 1 means yes.')
+    parser.add_argument('--num_step', default=1, type = int, help=
+                                'Num Step for multi-step DQN, 3 is recommended')
+    parser.add_argument('--is_noisy', default=1, type = int, help=
+                                'Whether use NoisyNet, 0 means no, 1 means yes.')
+
+
+    args = parser.parse_args()
+    args.input_shape = tuple(args.input_shape)
+    print('Environment : %s.' % (args.env,))
+
+    env = gym.make(args.env)
+    num_actions = env.action_space.n
+    print('number actions %d' % (num_actions,))
+
+    env.close()
+
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    tf.set_random_seed(args.seed)
+
+    batch_environment = BatchEnvironment(args.env, args.num_process,
+                                         args.window_size, args.input_shape, NUM_FRAME_PER_ACTION, MAX_EPISODE_LENGTH)
+
+    # 是否使用优先经验回放
+    if args.is_per == 1:
+        replay_memory = PriorityExperienceReplay(REPLAYMEMORY_SIZE,args.window_size,args.input_shape)
+    else:
+        replay_memory = ReplayMemory(REPLAYMEMORY_SIZE,args.window_size,args.input_shape)
+
+    create_network_fn = create_deep_q_network if args.is_duel == 0 else create_duel_q_network
+
+    create_model_fn = create_model if args.is_distributional == 0 else create_distributional_model
+
+    noisy = True if args.is_noisy == 1 else False
+
+    eval_model,eval_params = create_model_fn(args.window_size,args.input_shape,num_actions,
+                                             'eval_model',create_network_fn,trainable=True,noisy=noisy)
+    target_model,target_params = create_model_fn(args.window_size,args.input_shape,num_actions,
+                                                 'target_model',create_network_fn,trainable=False,noisy=noisy)
+
+    update_target_params_ops = [t.assign(s) for s,t in zip(eval_params,target_params)]
+
+    agent = DQNAgent(eval_model,
+                     target_model,
+                     replay_memory,
+                     num_actions,
+                     args.gamma,
+                     UPDATE_FREQUENCY,
+                     TARGET_UPDATE_FREQENCY,
+                     update_target_params_ops,
+                     args.batch_size,
+                     args.is_double,
+                     args.is_per,
+                     args.is_distributional,
+                     args.num_step,
+                     args.is_noisy,
+                     args.learning_rate,
+                     RMSP_DECAY,
+                     RMSP_MOMENTUM,
+                     RMSP_EPSILON)
+
+
+    with tf.Session() as sess:
+        sess.run(tf.global_variables_initializer())
+
+        sess.run(update_target_params_ops)
+
+        print('prepare fixed samples for mean max q')
+
+        fixed_samples = get_fixed_samples(batch_environment, num_actions, NUM_FIXED_SAMPLES)
+
+        agent.fit(sess,batch_environment,NUM_BURN_IN,do_train=False)
+
+        # Begin to train:
+        fit_iteration = int(args.num_iteration * args.eval_every)
+
+        for i in range(0, args.num_iteration, fit_iteration):
+            # Evaluate:
+            reward_mean, reward_var = agent.evaluate(sess, batch_environment, NUM_EVALUATE_EPSIODE)
+            mean_max_Q = agent.get_mean_max_Q(sess, fixed_samples)
+            print("%d, %f, %f, %f" % (i, mean_max_Q, reward_mean, reward_var))
+            # Train:
+            agent.fit(sess, batch_environment, fit_iteration, do_train=True)
+
+    batch_environment.close()
+
+
+
+
+
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/RL/Basic-Rainbow-Net/model.py b/RL/Basic-Rainbow-Net/model.py
new file mode 100644
index 00000000..0d12176c
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/model.py
@@ -0,0 +1,199 @@
+import tensorflow as tf
+import numpy as np
+
+def create_conv_network(input_frames,trainable):
+    conv1_W = tf.get_variable(shape=[8,8,4,16],name='conv1_W',
+                              trainable=trainable,initializer=tf.contrib.layers.xavier_initializer())
+    conv1_b = tf.Variable(tf.zeros([16], dtype=tf.float32),
+                          name='conv1_b', trainable=trainable)
+    conv1 = tf.nn.conv2d(input_frames, conv1_W, strides=[1, 4, 4, 1],
+                         padding='VALID', name='conv1')
+    # (batch size, 20, 20, 16)
+    output1 = tf.nn.relu(conv1 + conv1_b, name='output1')
+
+    conv2_W = tf.get_variable(shape=[4, 4, 16, 32], name='conv2_W',
+                              trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
+    conv2_b = tf.Variable(tf.zeros([32], dtype=tf.float32), name='conv2_b',
+                          dtype=tf.float32, trainable=trainable)
+    conv2 = tf.nn.conv2d(output1, conv2_W, strides=[1, 2, 2, 1],
+                         padding='VALID', name='conv2')
+    # (batch size, 9, 9, 32)
+    output2 = tf.nn.relu(conv2 + conv2_b, name='output2')
+
+    flat_output2_size = 9 * 9 * 32
+    flat_output2 = tf.reshape(output2, [-1, flat_output2_size], name='flat_output2')
+
+    return flat_output2, flat_output2_size, [conv1_W, conv1_b, conv2_W, conv2_b]
+
+
+
+def create_deep_q_network(input_frames,num_actions,trainable,noisy):
+    flat_output,flat_output_size,parameter_list = create_conv_network(input_frames,trainable)
+
+    if noisy == False:
+        fc1_W = tf.get_variable(shape=[flat_output_size,256],name='fc1_W',
+                                trainable=trainable,initializer=tf.contrib.layers.xavier_initializer())
+
+        fc1_b = tf.Variable(tf.zeros([256],dtype=tf.float32),name='fc1_b',
+                            trainable=trainable)
+
+        output3 = tf.nn.relu(tf.matmul(flat_output,fc1_W)+fc1_b,name='output3')
+
+        fc2_W = tf.get_variable(shape=[256,num_actions],name='fc2_W',trainable=trainable,
+                                initializer=tf.contrib.layers.xavier_initializer())
+
+        fc2_b = tf.Variable(tf.zeros([num_actions],dtype=tf.float32),name='fc2_b',trainable=trainable)
+
+        q_network = tf.nn.relu(tf.matmul(output3,fc2_W) + fc2_b,name='q_network')
+
+        parameter_list += [fc1_W,fc1_b,fc2_W,fc2_b]
+
+    else:
+
+        output3, parameter_list_output3 = noisy_dense(flat_output, name='noisy_fc1',
+                                                      input_size=flat_output_size, output_size=256,
+                                                      activation_fn=tf.nn.relu, trainable=trainable)
+        q_network, parameter_list_q_network = noisy_dense(output3, name='noisy_fc2',
+                                                          input_size=256, output_size=num_actions, trainable=trainable)
+        parameter_list += parameter_list_output3 + parameter_list_q_network
+    return q_network, parameter_list
+
+
+def create_duel_q_network(input_frames,num_actions,trainable,noisy):
+    flat_output, flat_output_size, parameter_list = create_conv_network(input_frames, trainable)
+
+    if noisy == False:
+        fcV_W = tf.get_variable(shape=[flat_output_size, 512], name='fcV_W',
+                                trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
+        fcV_b = tf.Variable(tf.zeros([512], dtype=tf.float32), name='fcV_b',
+                            dtype=tf.float32, trainable=trainable)
+        outputV = tf.nn.relu(tf.matmul(flat_output, fcV_W) + fcV_b, name='outputV')
+
+        fcV2_W = tf.get_variable(shape=[512, 1], name='fcV2_W',
+                                 trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
+        fcV2_b = tf.Variable(tf.zeros([1], dtype=tf.float32), name='fcV2_b',
+                             trainable=trainable)
+        outputV2 = tf.matmul(outputV, fcV2_W) + fcV2_b # V
+
+
+        fcA_W = tf.get_variable(shape=[flat_output_size, 512], name='fcA_W',
+                                trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
+        fcA_b = tf.Variable(tf.zeros([512], dtype=tf.float32), name='fcA_b',
+                            trainable=trainable)
+        outputA = tf.nn.relu(tf.matmul(flat_output, fcA_W) + fcA_b, name='outputA')
+
+        fcA2_W = tf.get_variable(shape=[512, num_actions], name='fcA2_W',
+                                 trainable=trainable, initializer=tf.contrib.layers.xavier_initializer())
+        fcA2_b = tf.Variable(tf.zeros([num_actions], dtype=tf.float32), name='fcA2_b',
+                             trainable=trainable)
+        outputA2 = tf.matmul(outputA, fcA2_W) + fcA2_b # 优势函数
+
+        parameter_list += [fcV_W, fcV_b, fcV2_W, fcV2_b, fcA_W, fcA_b, fcA2_W, fcA2_b]
+    else:
+        outputV, parameter_list_outputV = noisy_dense(flat_output, name='fcV',
+                                                      input_size=flat_output_size, output_size=512, trainable=trainable,
+                                                      activation_fn=tf.nn.relu)
+        outputV2, parameter_list_outputV2 = noisy_dense(outputV, name='fcV2',
+                                                        input_size=512, output_size=1, trainable=trainable)
+        ouputA, parameter_list_outputA = noisy_dense(flat_output, name='fcA',
+                                                     input_size=flat_output_size, output_size=512, trainable=trainable,
+                                                     activation_fn=tf.nn.relu)
+        outputA2, parameter_list_outputA2 = noisy_dense(ouputA, name='fcA2',
+                                                        input_size=512, output_size=num_actions, trainable=trainable)
+        parameter_list += parameter_list_outputA + parameter_list_outputA2 + \
+                          parameter_list_outputV + parameter_list_outputV2
+
+    q_network = tf.nn.relu(outputV2 + outputA2 - tf.reduce_mean(outputA2), name='q_network')
+
+    return q_network, parameter_list
+
+
+def create_model(window,input_shape,num_actions,model_name,create_network_fn,trainable,noisy):
+    """创建Q网络"""
+    with tf.variable_scope(model_name):
+        input_frames = tf.placeholder(tf.float32,[None,input_shape[0],input_shape[1],window],name='input_frames')
+        q_network,parameter_list = create_network_fn(input_frames,num_actions,trainable,noisy)
+
+        mean_max_q = tf.reduce_mean(tf.reduce_max(q_network,axis=[1]),name='mean_max_q')
+        action = tf.argmax(q_network,axis=1)
+
+        model = {
+            'q_values':q_network,
+            'input_frames':input_frames,
+            'mean_max_q':mean_max_q,
+            'action':action,
+        }
+
+    return model,parameter_list
+
+
+def create_distributional_model(window,input_shape,num_actions,model_name,create_network_fn,trainable,noisy):
+    N_atoms = 51
+    V_Max = 20.0
+    V_Min = 0.0
+    Delta_z = (V_Max - V_Min) / (N_atoms - 1)
+    z_list = tf.constant([V_Min + i * Delta_z for i in range(N_atoms)],dtype=tf.float32)
+    z_list_broadcasted = tf.tile(tf.reshape(z_list,[1,N_atoms]),[num_actions,1]) # batch * num_actions * N_atoms
+
+    with tf.variable_scope(model_name):
+        input_frames = tf.placeholder(tf.float32,[None,input_shape[0],input_shape[1],window],name='input_frames')
+        q_distributional_network,parameter_list = create_network_fn(input_frames,num_actions * N_atoms,trainable,noisy)
+
+        q_distributional_network = tf.reshape(q_distributional_network,[-1,num_actions,N_atoms])
+
+        q_distributional_network = tf.nn.softmax(q_distributional_network,dim=2)
+        # 防止NAN
+        q_distributional_network = tf.clip_by_value(q_distributional_network, 1e-8, 1.0 - 1e-8)
+        q_network = tf.multiply(q_distributional_network ,z_list_broadcasted)
+        q_network = tf.reduce_sum(q_network,axis=2,name='q_values')
+
+        mean_max_q = tf.reduce_mean(tf.reduce_max(q_network,axis=[1]),name='mean_max_q')
+        action = tf.argmax(q_network,axis=1)
+
+        model = {
+            'q_distributional_network' : q_distributional_network,
+            'q_values':q_network,
+            'input_frames':input_frames,
+            'mean_max_q':mean_max_q,
+            'action':action
+        }
+
+    return model,parameter_list
+
+
+def noisy_dense(x,input_size,output_size,name,trainable,activation_fn=tf.identity):
+
+    def f(x):
+        return tf.multiply(tf.sign(x),tf.pow(tf.abs(x),0.5))
+
+    mu_init = tf.random_uniform_initializer(minval=-1*1/np.power(input_size, 0.5),
+                                                maxval=1*1/np.power(input_size, 0.5))
+
+    sigma_init = tf.constant_initializer(0.4 / np.power(input_size,0.5))
+
+    p = tf.random_normal([input_size,1])
+    q = tf.random_normal([1,output_size])
+
+    f_p = f(p)
+    f_q = f(q)
+
+    w_epsilon = f_p * f_q
+    b_epsilon = tf.squeeze(f_q)
+
+    w_mu = tf.get_variable(name + "/w_mu", [input_size, output_size],
+                           initializer=mu_init, trainable=trainable)
+    w_sigma = tf.get_variable(name + "/w_sigma", [input_size, output_size],
+                              initializer=sigma_init, trainable=trainable)
+
+    w = w_mu + tf.multiply(w_sigma, w_epsilon)
+    ret = tf.matmul(x,w)
+
+    b_mu = tf.get_variable(name + "/b_mu", [output_size],
+                           initializer=mu_init, trainable=trainable)
+    b_sigma = tf.get_variable(name + "/b_sigma", [output_size],
+                              initializer=sigma_init, trainable=trainable)
+    b = b_mu + tf.multiply(b_sigma, b_epsilon)
+    return activation_fn(ret + b), [w_mu, w_sigma, b_mu, b_sigma]
+
+
+
diff --git a/RL/Basic-Rainbow-Net/preprocessor.py b/RL/Basic-Rainbow-Net/preprocessor.py
new file mode 100644
index 00000000..ef280c7d
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/preprocessor.py
@@ -0,0 +1,126 @@
+"""Preprocessors for Atari pixel output."""
+
+import numpy as np
+from PIL import Image
+
+class HistoryPreprocessor:
+    """Keeps the last k states.
+    Useful for domains where you need velocities, but the state
+    contains only positions.
+    When the environment starts, this will just fill the initial
+    sequence values with zeros k times.
+    Parameters
+    ----------
+    history_length: int
+      Number of previous states to prepend to state being processed.
+    """
+
+    def __init__(self, input_shape, history_length=1):
+        self._WIDTH = input_shape[0]
+        self._HEIGHT = input_shape[1]
+        self.history = np.zeros(shape=(1, self._WIDTH, self._HEIGHT, history_length+1), dtype = np.uint8)
+        self.history_length = history_length
+
+
+    def process_state_for_memory(self, state):
+        """You only want history when you're deciding the current action to take.
+        Returns last history_length processed states, where each is the max of
+        the raw state and the previous raw state.
+        """
+        self.history[0,:,:,1:]=self.history[0,:,:,:self.history_length]
+        self.history[0,:,:,0]=state
+
+    def reset(self):
+        """Reset the history sequence.
+        Useful when you start a new episode.
+        """
+        self.history = np.zeros(shape=(1, self._WIDTH, self._HEIGHT, self.history_length+1), dtype = np.uint8)
+
+    def get_state(self):
+        result = np.zeros(shape=(1, self._WIDTH, self._HEIGHT, self.history_length), dtype = np.uint8)
+
+        for i in range(self.history_length):
+            result[0,:,:,i]=np.maximum(self.history[0,:,:,i], self.history[0,:,:,i+1])
+        return result
+
+
+
+class AtariPreprocessor:
+    """Converts images to greyscale and downscales.
+    Based on the preprocessing step described in:
+    @article{mnih15_human_level_contr_throug_deep_reinf_learn,
+    author =	 {Volodymyr Mnih and Koray Kavukcuoglu and David
+                  Silver and Andrei A. Rusu and Joel Veness and Marc
+                  G. Bellemare and Alex Graves and Martin Riedmiller
+                  and Andreas K. Fidjeland and Georg Ostrovski and
+                  Stig Petersen and Charles Beattie and Amir Sadik and
+                  Ioannis Antonoglou and Helen King and Dharshan
+                  Kumaran and Daan Wierstra and Shane Legg and Demis
+                  Hassabis},
+    title =	 {Human-Level Control Through Deep Reinforcement
+                  Learning},
+    journal =	 {Nature},
+    volume =	 518,
+    number =	 7540,
+    pages =	 {529-533},
+    year =	 2015,
+    doi =        {10.1038/nature14236},
+    url =	 {http://dx.doi.org/10.1038/nature14236},
+    }
+    You may also want to max over frames to remove flickering. Some
+    games require this (based on animations and the limited sprite
+    drawing capabilities of the original Atari).
+    Parameters
+    ----------
+    new_size: 2 element tuple
+      The size that each image in the state should be scaled to. e.g
+      (84, 84) will make each image in the output have shape (84, 84).
+    """
+
+    def __init__(self, input_shape):
+        self._WIDTH = input_shape[0]
+        self._HEIGHT = input_shape[1]
+
+
+    def process_state_for_memory(self, state):
+        """Scale, convert to greyscale and store as uint8.
+        We don't want to save floating point numbers in the replay
+        memory. We get the same resolution as uint8, but use a quarter
+        to an eigth of the bytes (depending on float32 or float64)
+        """
+        image = Image.fromarray(state)
+        image = image.convert('L')
+        image = image.resize((self._WIDTH, self._HEIGHT), Image.LANCZOS)
+        #image.show()
+        image = np.array(image,dtype=np.uint8)
+        return image.reshape((1, self._WIDTH, self._HEIGHT))
+
+
+class Preprocessor:
+    """Combination of both an Atari preprocessor and history preprocessor."""
+    def __init__(self, window_size, input_shape):
+        self._atari_preprocessor = AtariPreprocessor(input_shape)
+        self._history_preprocessor = HistoryPreprocessor(input_shape, history_length=window_size)
+
+    def process_state_for_memory(self, state):
+        if state.shape == (210, 160, 3):
+            state = self._atari_preprocessor.process_state_for_memory(state)
+            state = self._history_preprocessor.process_state_for_memory(state)
+        else:
+            raise Exception("Shape Error in preprocessor"+str(state.shape))
+        return state
+
+    def reset(self):
+        self._history_preprocessor.reset()
+
+    def process_reward(self, reward):
+        """Get sign of reward: -1, 0 or 1."""
+        return np.sign(reward)
+
+    def get_state(self):
+        return self._history_preprocessor.get_state()
+
+    def state2float(self, state):
+        if state.dtype != np.uint8:
+            raise Exception("Error, State should be in np.unit8")
+        return state.astype(np.float16) / 255.0
\ No newline at end of file
diff --git a/RL/Basic-Rainbow-Net/replayMemory.py b/RL/Basic-Rainbow-Net/replayMemory.py
new file mode 100644
index 00000000..3a3e5432
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/replayMemory.py
@@ -0,0 +1,85 @@
+import numpy as np
+import random
+from sumTree import SumTree
+
+class ReplayMemory():
+
+    def __init__(self,max_size,window_size,input_shape):
+
+        self._max_size = max_size
+        self._window_size = window_size
+        self._WIDTH = input_shape[0]
+        self._HEIGHT = input_shape[1]
+        self._memory = []
+
+
+    def append(self,old_state,action,reward,new_state,is_terminal):
+
+        num_sample = len(old_state)
+        if len(self._memory) >= self._max_size:
+            del(self._memory[0:num_sample])
+
+        for o_s,a,r,n_s,i_t in zip(old_state,action,reward,new_state,is_terminal):
+            self._memory.append((o_s,a,r,n_s,i_t))
+
+    def sample(self,batch_size,indexes=None):
+
+        samples = random.sample(self._memory,min(batch_size,len(self._memory)))
+        zipped = list(zip(*samples))
+        zipped[0] = np.reshape(zipped[0],(-1,self._WIDTH,self._HEIGHT,self._window_size))
+        zipped[3] = np.reshape(zipped[3],(-1,self._WIDTH,self._HEIGHT,self._window_size))
+        return zipped
+
+
+class PriorityExperienceReplay():
+
+    def __init__(self,max_size,
+                 window_size,
+                 input_shape):
+        self.tree = SumTree(max_size)
+        self._max_size = max_size
+        self._window_size = window_size
+        self._WIDTH = input_shape[0]
+        self._HEIGHT = input_shape[1]
+        self.e = 0.01
+        self.a = 0.6
+
+
+    def _getPriority(self,error):
+        return (error + self.e) ** self.a
+
+    def append(self,old_state,action,reward,new_state,is_terminal):
+        for o_s,a,r,n_s,i_t in zip(old_state,action,reward,new_state,is_terminal):
+            p = self._getPriority(0.5)
+            self.tree.add(p,data=(o_s,a,r,n_s,i_t))
+
+    def sample(self,batch_size,indexes=None):
+        data_batch = []
+        idx_batch = []
+        p_batch = []
+        segment = self.tree.total_and_count()[0] / batch_size
+
+        for i in range(batch_size):
+            a = segment * i
+            b = segment * (i + 1)
+
+            s = random.uniform(a,b)
+            (idx,p,data) = self.tree.get(s)
+            data_batch.append(data)
+            idx_batch.append(idx)
+            p_batch.append(p)
+
+        zipped = list(zip(*data_batch))
+        zipped[0] = np.reshape(zipped[0], (-1, self._WIDTH, self._HEIGHT, self._window_size))
+        zipped[3] = np.reshape(zipped[3], (-1, self._WIDTH, self._HEIGHT, self._window_size))
+
+        sum_p,count = self.tree.total_and_count()
+
+        return zipped,idx_batch,p_batch,sum_p,count
+
+    def update(self,idx_list,error_list):
+        for idx,error in zip(idx_list,error_list):
+            p = self._getPriority(error)
+            self.tree.update(idx,p)
+
+
diff --git a/RL/Basic-Rainbow-Net/sumTree.py b/RL/Basic-Rainbow-Net/sumTree.py
new file mode 100644
index 00000000..7919b9a4
--- /dev/null
+++ b/RL/Basic-Rainbow-Net/sumTree.py
@@ -0,0 +1,56 @@
+import numpy
+
+class SumTree():
+    write = 0
+    count = 0
+
+    def __init__(self,capacity):
+        self.capacity = capacity
+        self.tree = numpy.zeros( 2 * capacity - 1)
+        self.data = numpy.zeros( capacity ,dtype = object)
+
+
+    def _propagate(self,idx,change):
+        parent = (idx - 1) // 2
+        self.tree[parent] += change
+
+        if parent!=0:
+            self._propagate(parent,change)
+
+    def _retrieve(self, idx, s):
+        left = 2 * idx + 1
+        right = left + 1
+
+        if left >= len(self.tree):
+            return idx
+
+        if s <= self.tree[left]:
+            return self._retrieve(left, s)
+        else:
+            return self._retrieve(right, s-self.tree[left])
+
+    def total_and_count(self):
+        return self.tree[0], self.count
+
+
+    def update(self,idx,p):
+        change = p - self.tree[idx] # 得到变化的数值，
+        self.tree[idx] = p
+        self._propagate(idx,change) # 根据叶子结点的变化，不断修改父节点的值
+
+    def add(self,p,data):
+        idx = self.write + self.capacity - 1 # 得到叶子结点在树中的位置
+        self.data[self.write] = data
+        self.update(idx,p)
+
+        self.write += 1
+        if self.write >= self.capacity:
+            self.write = 0
+        if self.count < self.capacity:
+            self.count += 1
+
+    def get(self, s):
+        idx = self._retrieve(0, s)
+        dataIdx = idx - self.capacity + 1
+
+        return (idx, self.tree[idx], self.data[dataIdx])
\ No newline at end of file