Skip to content
This repository has been archived by the owner on Jul 29, 2024. It is now read-only.

Commit

Permalink
Version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Alessandro Greco committed Nov 27, 2022
1 parent 7f18309 commit e9854dc
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 0 deletions.
211 changes: 211 additions & 0 deletions PDF_Parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import sys
import requests
import base64
import json
from prettytable import PrettyTable
import sys
from termcolor import colored
import os


log_output = ""

def argv_error():
print("./script [option] [value] ...")
print("-p\t\tUse this argoument for set your PDF path\n or --path\n")
print("-A\t\tUse this argoument for set your VirusTotal API Key\n or --API-Key\n")
print("-v\t\tUse this argoument for view a lot more information\n or --verbose\n")
print("-l\t\tUse this argoument for save in a log file all verbose information\n or --log\n")
print("Examples:")
print("$ python3 script.py -p <PDF_DOCUMENT_PATH> -A <VirusTotal_API_Key>")
print("\tGeneric example")
print("$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --verbose")
print("\tIt will print everything in output")
print("$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --log")
print("\tIt will print everything in a log file in the same directory where is the script PDF_Parser.py")

def generic_error(error: str) -> None:
print("\nSomething went wrong... check your private key and your pdf path :-/")
print(f"\n{error}")

"""
{
"data": {
"type": "analysis",
"id": "<base64_file_id>"
}
}
"""
def upload_file(File_Path: str, VirusTotal_API_Key: str, verbose: bool) -> str:

url = "https://www.virustotal.com/api/v3/files"

files = {"file": (File_Path, open(File_Path, "rb"), "application/pdf")}
headers = {
"accept": "application/json",
"x-apikey": VirusTotal_API_Key
}

response = requests.post(url, files=files, headers=headers)

if response.status_code == 200:
global log_output
if verbose:
print(response.text)
if log_output != "":
log_output += "\n"+response.text

return response.text
else:
generic_error(f"[*] Error occurred in upload_file function.\nError code: {response.status_code}")
return "-1"

"""
Load json response as a dict
"""
def json_load(json_in_string: str):
return json.loads(json_in_string)

"""
Get data -> id from VirusTotal response
"""
def get_base64_file_id_from_response(response: str) -> str:
response_in_dict = json_load(response)
return response_in_dict["data"]["id"]

"""
Decrypt from base64
"""
def decrypt_from_base64(encrypted_string: str) -> str:
return base64.b64decode(encrypted_string).decode('ascii')


"""
For check the file you must pass as parameters:
- MD5 of a file uploaded
- Your VirusTotal private Key
"""
def check_file(FileMD5: str, VirusTotal_API_Key: str) -> str:

url = f"https://www.virustotal.com/api/v3/files/{FileMD5}"

headers = {
"accept": "application/json",
"x-apikey": VirusTotal_API_Key
}

response = requests.get(url, headers=headers)

if response.status_code == 200:
return response.text
else:
generic_error(f"[*] Error occurred in check_file function.\nError code: {response.status_code}")
return "-1"

"""
Parserize the response
"""
def response_parser(response: str, verbose: bool):
response_in_dict = json_load(response)
antivirus_supported = response_in_dict["data"]["attributes"]["last_analysis_results"]

malicious = 0
global log_output


cve = {}
for antivirus in antivirus_supported:
if response_in_dict["data"]["attributes"]["last_analysis_results"][f"{antivirus}"]["category"] == "malicious":
malicious += 1
cve[f"{antivirus}"] = response_in_dict["data"]["attributes"]["last_analysis_results"][f"{antivirus}"]["result"]

table = ["Result", "CVE"]
tab = PrettyTable(table)

for antivirus in cve.keys():
tab.add_row([f"{antivirus}", f"{cve[antivirus]}"])

if verbose:
print(tab)

if log_output != "":
log_output += "\n"+str(tab)

else:
for antivirus in antivirus_supported:
if response_in_dict["data"]["attributes"]["last_analysis_results"][f"{antivirus}"]["category"] == "malicious":
malicious += 1

print_string = ""
color = ""
if malicious > 2:
print_string = 'This document is most likely malicious!!!'
color = "red"
elif malicious == 1:
print_string = "A malicious control has been detected but it could be a false positive."
color = "yellow"
else:
print_string = "This file is safe. :-)"
color = "yellow"

print(colored(f'\n{print_string}', color, attrs=['reverse', 'blink']))

if log_output != "":
log_output += "\n"+print_string

if __name__ == "__main__":

File_Path = ""
VirusTotal_API_Key = ""
verbose = False
log = False

for i in range(1, len(sys.argv)):
if((sys.argv[i] == "-p" or sys.argv[i] == "--path") and (len(sys.argv) > i+1)):
File_Path = sys.argv[i+1]
elif((sys.argv[i] == "-A" or sys.argv[i] == "--API-Key") and (len(sys.argv) > i+1)):
VirusTotal_API_Key = sys.argv[i+1]
elif((sys.argv[i] == "-v" or sys.argv[i] == "--verbose")):
verbose = True
elif((sys.argv[i] == "-l" or sys.argv[i] == "--log")):
log = True
elif((sys.argv[i] == "-h" or sys.argv[i] == "--help")):
argv_error()
exit()

if File_Path != "" and VirusTotal_API_Key != "":
init_print = f"Your File: {File_Path}\nYour API: {VirusTotal_API_Key[:5]}***"
if verbose:
print(init_print)
if log:
log_output += "\n"+init_print
else:
argv_error()
exit()

response = upload_file(File_Path, VirusTotal_API_Key, verbose)

if response == "-1":
exit()

encrypted_FileMD5 = get_base64_file_id_from_response(response)

plaintext_FileMD5 = decrypt_from_base64(encrypted_FileMD5)

FileMD5 = plaintext_FileMD5.split(":")[0]
MD5_print = f"MD5: {FileMD5}"
if verbose:
print(MD5_print)
if log:
log_output += "\n"+MD5_print

VirusTotal_response = check_file(FileMD5, VirusTotal_API_Key)

if VirusTotal_response == "-1":
exit()

response_parser(VirusTotal_response, verbose)

f = open("PDF_Parser.log", "w")
f.write(log_output)
f.close()
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,55 @@
# Parser-PDF-VirusTotal-Based
Parser PDF Based on VirusTotal API

![](docs/log%20file.png)
<center>Log file example</center>


![](docs/terminal%20verbose.png)
<center>Terminal output example</center>


## How to use

Use the command -h or --help for receive the following output...

```
./script [option] [value] ...
-p Use this argoument for set your PDF path
or --path
-A Use this argoument for set your VirusTotal API Key
or --API-Key
-v Use this argoument for view a lot more information
or --verbose
-l Use this argoument for save in a log file all verbose information
or --log
Examples:
$ python3 script.py -p <PDF_DOCUMENT_PATH> -A <VirusTotal_API_Key>
Generic example
$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --verbose
It will print everything in output
$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --log
It will print everything in a log file in the same directory where is the script PDF_Parser.py
```

## Argouments

|Argoument|Required|Format|
|--|--|--|
|--path|yes|--path /home/aleff/Documents/malicious.pdf|
|--API-Key|yes|--API-Key abcdefg123456789876543234567899876...|
|--verbose|no|--verbose|
|--log|no|--log|

## VirusTotal API

Signup to [VirusTotal Website](https://www.virustotal.com/gui/join-us) and go to Account -> API Key

## FAQs

### Why?
- Developed for Network Security course of UNICAL Univeristy
53 changes: 53 additions & 0 deletions docs/PDF_Parser.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

Your File: malicious.pdf
Your API: 6788d***
{
"data": {
"type": "analysis",
"id": "YjI1Y2E0NGU2NjIwMzlmNjY0MDFkZDliMzMzYjA3MDE6MTY2OTU2NDAzMw=="
}
}
MD5: b25ca44e662039f66401dd9b333b0701
+-------------------+------------------------------------------+
| Result | CVE |
+-------------------+------------------------------------------+
| Lionic | Trojan.PDF.Pdfka.4!c |
| MicroWorld-eScan | Exploit.PDF-Name.2.Gen |
| FireEye | Exploit.PDF-Name.2.Gen |
| CAT-QuickHeal | PDF.JS.Gen.A |
| McAfee | Exploit-PDF.bk.gen |
| Cyren | ShellCode.AX.gen |
| Symantec | Bloodhound.Exploit.213 |
| ESET-NOD32 | JS/Exploit.Pdfka.NOO |
| Baidu | JS.Exploit.Pdfka.adb |
| Avast | JS:Pdfka-AK [Expl] |
| ClamAV | Heuristics.PDF.ObfuscatedNameObject |
| Kaspersky | Exploit.JS.Pdfka.cil |
| BitDefender | Exploit.PDF-Name.2.Gen |
| NANO-Antivirus | Exploit.Script.IframeBof.gqjs |
| Cynet | Malicious (score: 99) |
| Tencent | Heur:Trojan.Script.LS_Gencirc.7033944.72 |
| Ad-Aware | Exploit.PDF-Name.2.Gen |
| Emsisoft | Exploit.PDF-Name.2.Gen (B) |
| VIPRE | Exploit.PDF-Name.2.Gen |
| TrendMicro | HEUR_PDFF.SPACE |
| McAfee-GW-Edition | BehavesLike.PDF.Trojan.zb |
| Sophos | Mal/PDFEx-D |
| SentinelOne | Static AI - Malicious PDF |
| GData | Exploit.PDF-Name.2.Gen |
| Avira | HTML/Malicious.PDF.Gen3 |
| Arcabit | Exploit.PDF-Name.2.Gen |
| ViRobot | PDF.Exploit.CVE-2008-2992.A |
| ZoneAlarm | HEUR:Exploit.Script.Generic |
| Microsoft | Exploit:JS/ShellCode.gen |
| Google | Detected |
| AhnLab-V3 | Exploit/PDF.Generic.S1213 |
| ALYac | Exploit.PDF-Name.2.Gen |
| MAX | malware (ai score=80) |
| Rising | Hack.Exploit.MalPDF.a (CLASSIC) |
| Ikarus | PDF.Exploit.PDF-JS |
| MaxSecure | Virus.PDF.Pidief.zm |
| Fortinet | PDF/Script.JSS!exploit |
| AVG | JS:Pdfka-AK [Expl] |
+-------------------+------------------------------------------+
This document is most likely malicious!!!
Binary file added docs/log file.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/malicious.pdf
Binary file not shown.
Binary file added docs/terminal verbose.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit e9854dc

Please sign in to comment.