Skip to content

Commit

Permalink
First version of session caching
Browse files Browse the repository at this point in the history
  • Loading branch information
raiym committed Mar 20, 2017
1 parent 9c1a759 commit 4682e97
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 62 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
],
"require": {
"php": ">=5.4.0",
"mashape/unirest-php": "3.0.*"
"mashape/unirest-php": "3.0.*",
"phpFastCache/phpFastCache": "5.0.*"

This comment has been minimized.

Copy link
@Geolim4

Geolim4 Jun 6, 2017

Contributor

What a good choice ;)

},
"require-dev": {
"phpunit/phpunit": "5.5.*"
Expand Down
9 changes: 5 additions & 4 deletions index.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//require_once 'src/InstagramScraper.php';

use InstagramScraper\Instagram;
use Unirest\Request;

//const MAIN_URL = "https://www.instagram.com",
//LOGIN_URL = "https://www.instagram.com/accounts/login/ajax/";
////echo 'Hello </br>';
Expand All @@ -32,7 +32,7 @@
//
////echo $crfs;
//$headers = ['cookie' => $h, 'referer' => 'https://www.instagram.com/', 'x-csrftoken' => $crfs];
//$response = Request::post(LOGIN_URL, $headers, ['username' => 'debugposter8', 'password' => 'vilka121']);
//$response = Request::post(LOGIN_URL, $headers, ['username' => 'debugposter8', 'password' => 'wrong']);
//echo json_encode($response);

//echo Media::getIdFromCode('z-arAqi4DP') . '<br/>';
Expand All @@ -42,7 +42,8 @@

//echo json_encode($instagram->getMediaById('936303077400215759_123123123'));

$instagram = Instagram::withCredentials('debugposter8', 'vilka121');
$instagram = Instagram::withCredentials('PASTE_LOGIN', 'PASTE_PASSWORD');
$instagram->login();
echo __DIR__;

echo json_encode($instagram->getTopMediasByTagName('hello'));
//echo json_encode($instagram->getTopMediasByTagName('hello'));
181 changes: 128 additions & 53 deletions src/InstagramScraper/Instagram.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
use InstagramScraper\Model\Location;
use InstagramScraper\Model\Media;
use InstagramScraper\Model\Tag;
use phpFastCache\CacheManager;
use Unirest\Request;

class Instagram
{
const MAX_COMMENTS_PER_REQUEST = 300;

private static $instanceCache;
public $sessionUsername;
public $sessionPassword;
public $sessionId;
Expand All @@ -26,8 +27,16 @@ public function __construct()
{
}

public static function withCredentials($username, $password)
public static function withCredentials($username, $password, $sessionFolder = null)
{
if (is_null($sessionFolder)) {
$sessionFolder = __DIR__ . DIRECTORY_SEPARATOR . 'sessions' . DIRECTORY_SEPARATOR;
}
// load cache
CacheManager::setDefaultConfig([
'path' => $sessionFolder
]);
self::$instanceCache = CacheManager::getInstance('files');
$instance = new self();
$instance->sessionUsername = $username;
$instance->sessionPassword = $password;
Expand Down Expand Up @@ -274,24 +283,29 @@ private function getContentsFromUrl($parameters)
return $output;
}

private static function generateRandomString($length = 10)
{
$characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
$charactersLength = strlen($characters);
$randomString = '';
for ($i = 0; $i < $length; $i++) {
$randomString .= $characters[rand(0, $charactersLength - 1)];
}
return $randomString;
}

public function getAccountById($id)
{
if (!is_numeric($id)) {
throw new \InvalidArgumentException('User id must be integer or integer wrapped in string');
}
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();

$parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id);
$userArray = json_decode(self::getContentsFromUrl($parameters), true);

$response = Request::post(Endpoints::INSTAGRAM_QUERY_URL, $this->generateHeaders($session), ['q' => $parameters]);

if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}

$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);

$userArray = json_decode($response->raw_body, true);
if ($userArray['status'] === 'fail') {
throw new InstagramException($userArray['message']);
}
Expand All @@ -301,21 +315,47 @@ public function getAccountById($id)
return Account::fromAccountPage($userArray);
}

private function generateHeaders($session)
{
$cookies = '';
foreach ($session as $key => $value) {
$cookies .= "$key=$value; ";
}
$headers = ['cookie' => $cookies, 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
return $headers;
}

private static function parseCookies($rawCookies)
{
$cookies = [];
foreach ($rawCookies as $c) {
$c = explode(';', $c)[0];
$parts = explode('=', $c);
if (sizeof($parts) >= 2 && !is_null($parts[1])) {
$cookies[$parts[0]] = $parts[1];
}
}
return $cookies;
}

public function getMediasByTag($tag, $count = 12, $maxId = '')
{
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();
$index = 0;
$medias = [];
$mediaIds = [];
$hasNextPage = true;
while ($index < $count && $hasNextPage) {
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $headers);
$response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($session));
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];

$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);
$arr = json_decode($response->raw_body, true);
if (!is_array($arr)) {
throw new InstagramException('Response decoding failed. Returned data corrupted or this library outdated. Please report issue');
Expand Down Expand Up @@ -345,17 +385,6 @@ public function getMediasByTag($tag, $count = 12, $maxId = '')
return $medias;
}

private static function parseCookies($rawCookies)
{
$cookies = [];
foreach ($rawCookies as $c) {
$c = explode(';', $c)[0];
$parts = explode('=', $c);
$cookies[$parts[0]] = $parts[1];
}
return $cookies;
}

public function getPaginateMediasByTag($tag, $maxId = '')
{
$hasNextPage = true;
Expand All @@ -366,15 +395,19 @@ public function getPaginateMediasByTag($tag, $maxId = '')
'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $headers);
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();
$response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($session));

if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}

$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);

$arr = json_decode($response->raw_body, true);

Expand Down Expand Up @@ -412,8 +445,9 @@ public function getPaginateMediasByTag($tag, $maxId = '')

public function getTopMediasByTagName($tagName)
{
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::getMediasJsonByTagLink($tagName, ''), $headers);
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();
$response = Request::get(Endpoints::getMediasJsonByTagLink($tagName, ''), $this->generateHeaders($session));
if ($response->code === 404) {
throw new InstagramNotFoundException('Account with given username does not exist.');
}
Expand All @@ -422,6 +456,9 @@ public function getTopMediasByTagName($tagName)
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);
$jsonResponse = json_decode($response->raw_body, true);
$medias = [];
foreach ($jsonResponse['tag']['top_posts']['nodes'] as $mediaArray) {
Expand All @@ -432,8 +469,9 @@ public function getTopMediasByTagName($tagName)

public function getLocationTopMediasById($facebookLocationId)
{
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $headers);
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();
$response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($session));
if ($response->code === 404) {
throw new InstagramNotFoundException('Location with this id doesn\'t exist');
}
Expand All @@ -442,6 +480,9 @@ public function getLocationTopMediasById($facebookLocationId)
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);
$jsonResponse = json_decode($response->raw_body, true);
$nodes = $jsonResponse['location']['top_posts']['nodes'];
$medias = [];
Expand All @@ -453,17 +494,22 @@ public function getLocationTopMediasById($facebookLocationId)

public function getLocationMediasById($facebookLocationId, $quantity = 12, $offset = '')
{
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();

$index = 0;
$medias = [];
$hasNext = true;
while ($index < $quantity && $hasNext) {
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId, $offset), $headers);
$response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId, $offset), $this->generateHeaders($session));
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);
$arr = json_decode($response->raw_body, true);
$nodes = $arr['location']['media']['nodes'];
foreach ($nodes as $mediaArray) {
Expand All @@ -484,8 +530,10 @@ public function getLocationMediasById($facebookLocationId, $quantity = 12, $offs

public function getLocationById($facebookLocationId)
{
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $headers);
$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();

$response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($session));
if ($response->code === 404) {
throw new InstagramNotFoundException('Location with this id doesn\'t exist');
}
Expand All @@ -494,32 +542,59 @@ public function getLocationById($facebookLocationId)
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$session['csrftoken'] = $this->csrfToken;
$cachedString->set($session);
self::$instanceCache->save($cachedString);
$jsonResponse = json_decode($response->raw_body, true);
return Location::makeLocation($jsonResponse['location']);
}

public function login()
public function login($force = false)
{
// TODO: cache session
// TODO: load cached session
// TODO: Force login flag
if ($this->sessionUsername == null || $this->sessionPassword == null) {
throw new InstagramAuthException("User credentials not provided");
}
$response = Request::get(Endpoints::BASE_URL);
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');

$cachedString = self::$instanceCache->getItem($this->sessionUsername);
$session = $cachedString->get();
if ($force || !$this->isLoggedIn($session)) {
$response = Request::get(Endpoints::BASE_URL);
if ($response->code !== 200) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$this->mid = $cookies['mid'];
$headers = ['cookie' => "csrftoken=$this->csrfToken; mid=$this->mid;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::post(Endpoints::LOGIN_URL, $headers, ['username' => $this->sessionUsername, 'password' => $this->sessionPassword]);
if ($response->code !== 200) {
throw new InstagramAuthException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$cookies['mid'] = $this->mid;
$this->csrfToken = $cookies['csrftoken'];
$this->sessionId = $cookies['sessionid'];
$cachedString->set($cookies);
self::$instanceCache->save($cachedString);
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$this->mid = $cookies['mid'];
$headers = ['cookie' => "csrftoken=$this->csrfToken; mid=$this->mid;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::post(Endpoints::LOGIN_URL, $headers, ['username' => $this->sessionUsername, 'password' => $this->sessionPassword]);
}

public function isLoggedIn($session)
{
if (is_null($session) || !isset($session['sessionid'])) {
return false;
}
$this->sessionId = $session['sessionid'];
$this->csrfToken = $session['csrftoken'];
$headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken];
$response = Request::get(Endpoints::BASE_URL, $headers);
if ($response->code !== 200) {
throw new InstagramAuthException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
return false;
}
$cookies = self::parseCookies($response->headers['Set-Cookie']);
$this->csrfToken = $cookies['csrftoken'];
$this->sessionId = $cookies['sessionid'];
if (!isset($cookies['ds_user_id'])) {
return false;
}
return true;
}
}
8 changes: 4 additions & 4 deletions tests/InstagramTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class InstagramTest extends TestCase

public static function setUpBeforeClass()
{
self::$instagram = Instagram::withCredentials('PASTE USERNAME', 'PASTE PASSWORD');
self::$instagram = Instagram::withCredentials('PASTE_LOGIN', 'PASTE_PASSWORD');
self::$instagram->login();

}
Expand All @@ -39,10 +39,10 @@ public function testGetMedias()
$this->assertEquals(80, sizeof($medias));
}

public function testGet1000Medias()
public function testGet100Medias()
{
$medias = Instagram::getMedias('kevin', 1000);
$this->assertEquals(1000, sizeof($medias));
$medias = Instagram::getMedias('kevin', 100);
$this->assertEquals(100, sizeof($medias));
}

public function testGetMediaByCode()
Expand Down

0 comments on commit 4682e97

Please sign in to comment.