Skip to content

Commit

Permalink
Update the filtering pattern for candidate keyphrases
Browse files Browse the repository at this point in the history
update the test cases for each language and the examples in the juputer notebook.
  • Loading branch information
NC0DER committed Oct 12, 2023
1 parent 713fbb3 commit 9868626
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 59 deletions.
4 changes: 2 additions & 2 deletions LMRank/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def extract_candidate_keyphrases(
keep_nouns_adjs: (bool)
A boolean flag that controls if the candidate keyphrases
are composed only from nouns, proper nouns and adjectives.
are composed only from nouns and adjectives.
Output:
<object>: (List[Tuple[str, int]])
Expand Down Expand Up @@ -182,7 +182,7 @@ def extract_candidate_keyphrases(
if chunk.text.lower() not in nlp.Defaults.stop_words
and chunk[0].pos_ not in {'PRON', 'PART'}
and all(
term.pos_ in {'PROPN','NOUN', 'ADJ'}
term.pos_ in {'NOUN', 'ADJ'}
if keep_nouns_adjs else True for term in chunk
)
and len(chunk.text) > 2
Expand Down
94 changes: 47 additions & 47 deletions examples/LMRank.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -117,24 +117,24 @@
"base_uri": "https://localhost:8080/"
},
"id": "z__eUDiRqvn1",
"outputId": "7334bae6-6bb0-457c-f791-3f5b31d4883f"
"outputId": "a1a11575-ad0d-405c-9b2c-2c351737d632"
},
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[('conventional algorithms', 0.03220074744562463),\n",
" ('machine learning', 0.0320379078219184),\n",
" ('training data', 0.02651275416153127),\n",
" ('artificial intelligence', 0.023564133570545886),\n",
" ('computational statistics', 0.018363250279455255),\n",
" ('speech recognition', 0.017827318362436336),\n",
" ('computer vision', 0.017721180700768415),\n",
" ('data', 0.01647833767159313),\n",
" ('sample data', 0.014187748325602852),\n",
" ('predictions', 0.014133139194664955)]"
"[('conventional algorithms', 0.0339121588687132),\n",
" ('machine learning', 0.033740664613849336),\n",
" ('training data', 0.027921859040422804),\n",
" ('artificial intelligence', 0.02481652460388809),\n",
" ('computational statistics', 0.019339223782752442),\n",
" ('speech recognition', 0.018774808054718494),\n",
" ('computer vision', 0.018663029368508974),\n",
" ('data', 0.01735413148266217),\n",
" ('sample data', 0.014941801460342798),\n",
" ('predictions', 0.014884289952972495)]"
]
},
"metadata": {},
Expand Down Expand Up @@ -274,24 +274,24 @@
"base_uri": "https://localhost:8080/"
},
"id": "ZMFpRBm-zdtB",
"outputId": "060eb4af-ee82-48c0-e0ca-ddbeee7d6fbf"
"outputId": "4259b42e-5ff4-4d3b-cde0-e9b3c8523872"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[('intelligenza artificiale', 0.034458516187358144),\n",
" ('sistema intelligente', 0.030585801686949892),\n",
" ('intelligenza umana', 0.028680306043423823),\n",
" ('pensiero umano', 0.02519935951215235),\n",
" ('ragionamento', 0.02082366775859126),\n",
" ('filosofi', 0.020529689835492422),\n",
" ('sistemi informatici', 0.02007152977482128),\n",
" ('umanità', 0.01979030713503242),\n",
" ('esseri umani', 0.01853905189982051),\n",
" ('logica', 0.017243187892945246)]"
"[('intelligenza artificiale', 0.03616688087824202),\n",
" ('sistema intelligente', 0.03210216714390278),\n",
" ('intelligenza umana', 0.030102201922570942),\n",
" ('pensiero umano', 0.02644867900662448),\n",
" ('ragionamento', 0.02185605169932884),\n",
" ('sistemi informatici', 0.021066624646953618),\n",
" ('umanità', 0.0207714597112899),\n",
" ('esseri umani', 0.019458170456635688),\n",
" ('filosofi', 0.018742693130278605),\n",
" ('logica', 0.018233726824707544)]"
]
},
"metadata": {},
Expand All @@ -309,9 +309,9 @@
"base_uri": "https://localhost:8080/"
},
"id": "0Qxh1wD7zjZn",
"outputId": "122863cc-2950-4fb9-9dde-b686fdc4200b"
"outputId": "cfaa726a-32b1-4540-aea1-8b8168221541"
},
"execution_count": 11,
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
Expand All @@ -330,7 +330,7 @@
]
},
"metadata": {},
"execution_count": 11
"execution_count": 12
}
]
},
Expand All @@ -344,24 +344,24 @@
"base_uri": "https://localhost:8080/"
},
"id": "CVaI5zPTzmp2",
"outputId": "6329f9a9-8693-4da0-e0cb-8a4def6e29ec"
"outputId": "97d061f4-960b-4410-d48e-63ce86c464e2"
},
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[('人工知能研究', 0.04668628289446463),\n",
" ('artificial intelligence)', 0.04656258182323882),\n",
" ('情報工学科', 0.04152230442364311),\n",
" ('ai教育研究', 0.03392002682114533),\n",
" ('手順(アルゴリズム', 0.02920999737583826),\n",
" ('研究分野', 0.02885210634012013),\n",
" ('ai(エーアイ)', 0.02758989330102413),\n",
" ('応用例', 0.026380280664661655),\n",
" ('専門家', 0.02523460291693004),\n",
" ('データ(事前情報', 0.024605315035618417)]"
"[('言語運用', 0.06911519908036695),\n",
" ('人工知能研究', 0.045180491059296994),\n",
" ('人工知能(じんこうちのう', 0.04397241175715518),\n",
" ('ai教育研究', 0.032825995759342204),\n",
" ('手順(アルゴリズム', 0.028267877338899624),\n",
" ('研究分野', 0.027921529485176774),\n",
" ('応用例', 0.02551845421616052),\n",
" ('専門家', 0.024380506026640082),\n",
" ('データ(事前情報', 0.023811712994498947),\n",
" ('コンピュータ上', 0.023366504106138903)]"
]
},
"metadata": {},
Expand Down Expand Up @@ -424,24 +424,24 @@
"base_uri": "https://localhost:8080/"
},
"id": "WUDdFX_aAEY_",
"outputId": "7e275875-69e7-4a2e-a73a-abfe12d72733"
"outputId": "fd01f6e4-17d9-476c-a0aa-cd154d4b9d1a"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[('machine learning', 0.03719560323413626),\n",
" ('artificial intelligence', 0.022943256543643164),\n",
" ('speech recognition', 0.022299272421197006),\n",
" ('training data', 0.019498117324721367),\n",
" ('computer vision', 0.018394674218814935),\n",
" ('computational statistics', 0.012852583117731707),\n",
" ('conventional algorithms', 0.012310507678891275),\n",
" ('agriculture', 0.009199340093852123),\n",
" ('data', 0.009049208308237789),\n",
" ('predictions', 0.008900549505358414)]"
"[('machine learning', 0.0391724821985474),\n",
" ('artificial intelligence', 0.0241626490871842),\n",
" ('speech recognition', 0.023484438374647136),\n",
" ('training data', 0.020534411682555247),\n",
" ('computer vision', 0.01937232100451887),\n",
" ('computational statistics', 0.013535673062125213),\n",
" ('conventional algorithms', 0.01296478887224714),\n",
" ('agriculture', 0.009688268363237202),\n",
" ('data', 0.009530157345050614),\n",
" ('predictions', 0.009373598385901035)]"
]
},
"metadata": {},
Expand Down
Loading

0 comments on commit 9868626

Please sign in to comment.