Update the filtering pattern for candidate keyphrases

update the test cases for each language and the examples in the juputer notebook.
NC0DER · Oct 12, 2023 · 9868626 · 9868626
1 parent 713fbb3
commit 9868626
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 59 deletions.
diff --git a/LMRank/model.py b/LMRank/model.py
@@ -151,7 +151,7 @@ def extract_candidate_keyphrases(
 
             keep_nouns_adjs: (bool)
                 A boolean flag that controls if the candidate keyphrases
-                are composed only from nouns, proper nouns and adjectives.
+                are composed only from nouns and adjectives.
 
         Output: 
             <object>: (List[Tuple[str, int]])
@@ -182,7 +182,7 @@ def extract_candidate_keyphrases(
                 if chunk.text.lower() not in nlp.Defaults.stop_words
                 and chunk[0].pos_ not in {'PRON', 'PART'}
                 and all(
-                    term.pos_ in {'PROPN','NOUN', 'ADJ'} 
+                    term.pos_ in {'NOUN', 'ADJ'}
                     if keep_nouns_adjs else True for term in chunk
                 )
                 and len(chunk.text) > 2

diff --git a/examples/LMRank.ipynb b/examples/LMRank.ipynb
@@ -117,24 +117,24 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "z__eUDiRqvn1",
-        "outputId": "7334bae6-6bb0-457c-f791-3f5b31d4883f"
+        "outputId": "a1a11575-ad0d-405c-9b2c-2c351737d632"
       },
       "execution_count": 3,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "[('conventional algorithms', 0.03220074744562463),\n",
-              " ('machine learning', 0.0320379078219184),\n",
-              " ('training data', 0.02651275416153127),\n",
-              " ('artificial intelligence', 0.023564133570545886),\n",
-              " ('computational statistics', 0.018363250279455255),\n",
-              " ('speech recognition', 0.017827318362436336),\n",
-              " ('computer vision', 0.017721180700768415),\n",
-              " ('data', 0.01647833767159313),\n",
-              " ('sample data', 0.014187748325602852),\n",
-              " ('predictions', 0.014133139194664955)]"
+              "[('conventional algorithms', 0.0339121588687132),\n",
+              " ('machine learning', 0.033740664613849336),\n",
+              " ('training data', 0.027921859040422804),\n",
+              " ('artificial intelligence', 0.02481652460388809),\n",
+              " ('computational statistics', 0.019339223782752442),\n",
+              " ('speech recognition', 0.018774808054718494),\n",
+              " ('computer vision', 0.018663029368508974),\n",
+              " ('data', 0.01735413148266217),\n",
+              " ('sample data', 0.014941801460342798),\n",
+              " ('predictions', 0.014884289952972495)]"
             ]
           },
           "metadata": {},
@@ -274,24 +274,24 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "ZMFpRBm-zdtB",
-        "outputId": "060eb4af-ee82-48c0-e0ca-ddbeee7d6fbf"
+        "outputId": "4259b42e-5ff4-4d3b-cde0-e9b3c8523872"
       },
       "execution_count": 7,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "[('intelligenza artificiale', 0.034458516187358144),\n",
-              " ('sistema intelligente', 0.030585801686949892),\n",
-              " ('intelligenza umana', 0.028680306043423823),\n",
-              " ('pensiero umano', 0.02519935951215235),\n",
-              " ('ragionamento', 0.02082366775859126),\n",
-              " ('filosofi', 0.020529689835492422),\n",
-              " ('sistemi informatici', 0.02007152977482128),\n",
-              " ('umanità', 0.01979030713503242),\n",
-              " ('esseri umani', 0.01853905189982051),\n",
-              " ('logica', 0.017243187892945246)]"
+              "[('intelligenza artificiale', 0.03616688087824202),\n",
+              " ('sistema intelligente', 0.03210216714390278),\n",
+              " ('intelligenza umana', 0.030102201922570942),\n",
+              " ('pensiero umano', 0.02644867900662448),\n",
+              " ('ragionamento', 0.02185605169932884),\n",
+              " ('sistemi informatici', 0.021066624646953618),\n",
+              " ('umanità', 0.0207714597112899),\n",
+              " ('esseri umani', 0.019458170456635688),\n",
+              " ('filosofi', 0.018742693130278605),\n",
+              " ('logica', 0.018233726824707544)]"
             ]
           },
           "metadata": {},
@@ -309,9 +309,9 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "0Qxh1wD7zjZn",
-        "outputId": "122863cc-2950-4fb9-9dde-b686fdc4200b"
+        "outputId": "cfaa726a-32b1-4540-aea1-8b8168221541"
       },
-      "execution_count": 11,
+      "execution_count": 12,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -330,7 +330,7 @@
             ]
           },
           "metadata": {},
-          "execution_count": 11
+          "execution_count": 12
         }
       ]
     },
@@ -344,24 +344,24 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "CVaI5zPTzmp2",
-        "outputId": "6329f9a9-8693-4da0-e0cb-8a4def6e29ec"
+        "outputId": "97d061f4-960b-4410-d48e-63ce86c464e2"
       },
       "execution_count": 14,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "[('人工知能研究', 0.04668628289446463),\n",
-              " ('artificial intelligence）', 0.04656258182323882),\n",
-              " ('情報工学科', 0.04152230442364311),\n",
-              " ('ai教育研究', 0.03392002682114533),\n",
-              " ('手順（アルゴリズム', 0.02920999737583826),\n",
-              " ('研究分野', 0.02885210634012013),\n",
-              " ('ai（エーアイ）', 0.02758989330102413),\n",
-              " ('応用例', 0.026380280664661655),\n",
-              " ('専門家', 0.02523460291693004),\n",
-              " ('データ（事前情報', 0.024605315035618417)]"
+              "[('言語運用', 0.06911519908036695),\n",
+              " ('人工知能研究', 0.045180491059296994),\n",
+              " ('人工知能（じんこうちのう', 0.04397241175715518),\n",
+              " ('ai教育研究', 0.032825995759342204),\n",
+              " ('手順（アルゴリズム', 0.028267877338899624),\n",
+              " ('研究分野', 0.027921529485176774),\n",
+              " ('応用例', 0.02551845421616052),\n",
+              " ('専門家', 0.024380506026640082),\n",
+              " ('データ（事前情報', 0.023811712994498947),\n",
+              " ('コンピュータ上', 0.023366504106138903)]"
             ]
           },
           "metadata": {},
@@ -424,24 +424,24 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "WUDdFX_aAEY_",
-        "outputId": "7e275875-69e7-4a2e-a73a-abfe12d72733"
+        "outputId": "fd01f6e4-17d9-476c-a0aa-cd154d4b9d1a"
       },
       "execution_count": 17,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "[('machine learning', 0.03719560323413626),\n",
-              " ('artificial intelligence', 0.022943256543643164),\n",
-              " ('speech recognition', 0.022299272421197006),\n",
-              " ('training data', 0.019498117324721367),\n",
-              " ('computer vision', 0.018394674218814935),\n",
-              " ('computational statistics', 0.012852583117731707),\n",
-              " ('conventional algorithms', 0.012310507678891275),\n",
-              " ('agriculture', 0.009199340093852123),\n",
-              " ('data', 0.009049208308237789),\n",
-              " ('predictions', 0.008900549505358414)]"
+              "[('machine learning', 0.0391724821985474),\n",
+              " ('artificial intelligence', 0.0241626490871842),\n",
+              " ('speech recognition', 0.023484438374647136),\n",
+              " ('training data', 0.020534411682555247),\n",
+              " ('computer vision', 0.01937232100451887),\n",
+              " ('computational statistics', 0.013535673062125213),\n",
+              " ('conventional algorithms', 0.01296478887224714),\n",
+              " ('agriculture', 0.009688268363237202),\n",
+              " ('data', 0.009530157345050614),\n",
+              " ('predictions', 0.009373598385901035)]"
             ]
           },
           "metadata": {},