-
Notifications
You must be signed in to change notification settings - Fork 0
/
gnip-historical-powertrack.ps1
149 lines (124 loc) · 5.54 KB
/
gnip-historical-powertrack.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# Powershell script for working with Gnip's Historical Powertrack service
# http://support.gnip.com/apis/historical_api/
[CmdletBinding()]
Param(
[Parameter(Mandatory=$True,Position=1)]
[string]$command,
[string]$jobid,
[string]$file
)
$help = @"
USAGE:
.\gnip-historical-powertrack.ps1 <command> <options>
AVAILABLE COMMANDS:
... list-jobs
... list-job -jobid <jobid>
... request-job -file <filename>
... accept-job -jobid <jobid>
... reject-job -jobid <jobid>
... download-files -jobid <jobid>
... accept-all-quoted
... download-all-delivered
CONFIGURATION:
Make sure there is an authorization.txt file in this directory with one line that has
your gnip username & password in the format username:password.
Also create an accounts.txt file which contains your account name for Gnip.
"@
$powertrack = New-Module -AsCustomObject -ScriptBlock `
{
[String] $account = `
[IO.File]::ReadAllText((Get-Item account.txt | Resolve-Path).ProviderPath).Trim()
[Hashtable] $headers = @{
Authorization = "Basic " + [System.Convert]::ToBase64String(
[System.Text.Encoding]::ASCII.GetBytes(
[IO.File]::ReadAllText((Get-Item authorization.txt | Resolve-Path).ProviderPath).Trim()
)
)
}
Function ListJobs {
$response = Invoke-RestMethod -Method Get -Uri "https://historical.gnip.com/accounts/$account/jobs.json" `
-Headers $headers -ContentType "application/json"
echo $response.jobs
}
Function ListJob($jobid) {
$response = Invoke-RestMethod -Method Get -Uri "https://historical.gnip.com/accounts/$account/jobs/$jobid.json" `
-Headers $headers -ContentType "application/json"
echo $response
}
Function RequestJob($file) {
try {
$response = Invoke-RestMethod -Method POST -Uri "https://historical.gnip.com/accounts/$account/jobs.json" `
-Headers $headers -ContentType "application/json" -InFile $file
} catch {
$response = $_.Exception.Response.GetResponseStream()
$reader = New-Object System.IO.StreamReader($response)
echo $reader.ReadToEnd()
}
}
Function AcceptJob($jobid) {
$response = Invoke-RestMethod -Method PUT -Uri "https://historical.gnip.com/accounts/$account/publishers/twitter/historical/track/jobs/$jobid.json" `
-Headers $headers -ContentType "application/json" -Body "{ ""status"": ""accept"" }"
echo $response
}
Function RejectJob($jobid) {
$response = Invoke-RestMethod -Method PUT -Uri "https://historical.gnip.com/accounts/$account/publishers/twitter/historical/track/jobs/$jobid.json" `
-Headers $headers -ContentType "application/json" -Body "{ ""status"": ""reject"" }"
echo $response
}
Function DownloadFiles($jobid) {
[void][System.Reflection.Assembly]::LoadWithPartialName("System.Web.Extensions")
$jsonserial= New-Object -TypeName System.Web.Script.Serialization.JavaScriptSerializer
$jsonserial.MaxJsonLength = 10000000
$responseRaw = Invoke-WebRequest -Method Get -Uri "https://historical.gnip.com/accounts/$account/publishers/twitter/historical/track/jobs/$jobid/results.json" `
-Headers $headers -ContentType "application/json"
$response = $jsonserial.DeserializeObject($responseRaw.Content)
echo "Job contains $($response.urlCount) files with total size of $([math]::Round($response.totalFileSizeBytes / 1024 / 1024, 2)) MB"
$outputFolder = "jobfiles-$jobid-$(Get-Date -Format yyyy-MM-dd)"
New-Item -ItemType directory -Path $outputFolder -Force | Out-Null
foreach ($url in $response.urlList) {
$abspath = ([System.URI]$url).AbsolutePath
$split = $abspath.LastIndexOf("/")
$filepath = $abspath.Substring(0, $split)
$filename = $abspath.Substring($split + 1)
$destdir = "$outputFolder$filepath"
$destpath = "$destdir/$filename"
if (!(Test-Path -Path $destdir)) {
New-Item -ItemType directory -Path $destdir -Force | Out-Null
}
if (!(Test-Path -Path $destpath)) {
echo "Downloading file to $destpath"
Invoke-WebRequest -Uri $url -OutFile $destpath
}
}
}
Function AcceptAllQuoted() {
$response = Invoke-RestMethod -Method Get -Uri "https://historical.gnip.com/accounts/$account/jobs.json" `
-Headers $headers -ContentType "application/json"
foreach ($job in $response.jobs) {
if ($job.status -eq "quoted") {
AcceptJob($job.uuid)
}
}
}
Function DownloadAllDelivered() {
$response = Invoke-RestMethod -Method Get -Uri "https://historical.gnip.com/accounts/$account/jobs.json" `
-Headers $headers -ContentType "application/json"
foreach ($job in $response.jobs) {
if ($job.status -eq "delivered") {
DownloadFiles($job.uuid)
}
}
}
}
switch ($command)
{
"list-jobs" { $powertrack.ListJobs() }
"list-job" { $powertrack.ListJob($jobid) }
"request-job" { $powertrack.RequestJob($file) }
"accept-job" { $powertrack.AcceptJob($jobid) }
"reject-job" { $powertrack.RejectJob($jobid) }
"download-files" { $powertrack.DownloadFiles($jobid) }
"accept-all-quoted" { $powertrack.AcceptAllQuoted() }
"download-all-delivered" { $powertrack.DownloadAllDelivered() }
default { echo $help }
}