From 4682e97eba524f9ce8d749bf1ad54f5563ab0a28 Mon Sep 17 00:00:00 2001 From: raiym Date: Tue, 21 Mar 2017 02:29:16 +0300 Subject: [PATCH] First version of session caching --- composer.json | 3 +- index.php | 9 +- src/InstagramScraper/Instagram.php | 181 ++++++++++++++++++++--------- tests/InstagramTest.php | 8 +- 4 files changed, 139 insertions(+), 62 deletions(-) diff --git a/composer.json b/composer.json index 39b0cc64..fd98b466 100644 --- a/composer.json +++ b/composer.json @@ -16,7 +16,8 @@ ], "require": { "php": ">=5.4.0", - "mashape/unirest-php": "3.0.*" + "mashape/unirest-php": "3.0.*", + "phpFastCache/phpFastCache": "5.0.*" }, "require-dev": { "phpunit/phpunit": "5.5.*" diff --git a/index.php b/index.php index cab2c0db..12b26629 100644 --- a/index.php +++ b/index.php @@ -6,7 +6,7 @@ //require_once 'src/InstagramScraper.php'; use InstagramScraper\Instagram; -use Unirest\Request; + //const MAIN_URL = "https://www.instagram.com", //LOGIN_URL = "https://www.instagram.com/accounts/login/ajax/"; ////echo 'Hello
'; @@ -32,7 +32,7 @@ // ////echo $crfs; //$headers = ['cookie' => $h, 'referer' => 'https://www.instagram.com/', 'x-csrftoken' => $crfs]; -//$response = Request::post(LOGIN_URL, $headers, ['username' => 'debugposter8', 'password' => 'vilka121']); +//$response = Request::post(LOGIN_URL, $headers, ['username' => 'debugposter8', 'password' => 'wrong']); //echo json_encode($response); //echo Media::getIdFromCode('z-arAqi4DP') . '
'; @@ -42,7 +42,8 @@ //echo json_encode($instagram->getMediaById('936303077400215759_123123123')); -$instagram = Instagram::withCredentials('debugposter8', 'vilka121'); +$instagram = Instagram::withCredentials('PASTE_LOGIN', 'PASTE_PASSWORD'); $instagram->login(); +echo __DIR__; -echo json_encode($instagram->getTopMediasByTagName('hello')); \ No newline at end of file +//echo json_encode($instagram->getTopMediasByTagName('hello')); \ No newline at end of file diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 94eb370d..c51afc05 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -10,12 +10,13 @@ use InstagramScraper\Model\Location; use InstagramScraper\Model\Media; use InstagramScraper\Model\Tag; +use phpFastCache\CacheManager; use Unirest\Request; class Instagram { const MAX_COMMENTS_PER_REQUEST = 300; - + private static $instanceCache; public $sessionUsername; public $sessionPassword; public $sessionId; @@ -26,8 +27,16 @@ public function __construct() { } - public static function withCredentials($username, $password) + public static function withCredentials($username, $password, $sessionFolder = null) { + if (is_null($sessionFolder)) { + $sessionFolder = __DIR__ . DIRECTORY_SEPARATOR . 'sessions' . DIRECTORY_SEPARATOR; + } + // load cache + CacheManager::setDefaultConfig([ + 'path' => $sessionFolder + ]); + self::$instanceCache = CacheManager::getInstance('files'); $instance = new self(); $instance->sessionUsername = $username; $instance->sessionPassword = $password; @@ -274,24 +283,29 @@ private function getContentsFromUrl($parameters) return $output; } - private static function generateRandomString($length = 10) - { - $characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; - $charactersLength = strlen($characters); - $randomString = ''; - for ($i = 0; $i < $length; $i++) { - $randomString .= $characters[rand(0, $charactersLength - 1)]; - } - return $randomString; - } - public function getAccountById($id) { if (!is_numeric($id)) { throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); } + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + $parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id); - $userArray = json_decode(self::getContentsFromUrl($parameters), true); + + $response = Request::post(Endpoints::INSTAGRAM_QUERY_URL, $this->generateHeaders($session), ['q' => $parameters]); + + if ($response->code !== 200) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + } + + $cookies = self::parseCookies($response->headers['Set-Cookie']); + $this->csrfToken = $cookies['csrftoken']; + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); + + $userArray = json_decode($response->raw_body, true); if ($userArray['status'] === 'fail') { throw new InstagramException($userArray['message']); } @@ -301,21 +315,47 @@ public function getAccountById($id) return Account::fromAccountPage($userArray); } + private function generateHeaders($session) + { + $cookies = ''; + foreach ($session as $key => $value) { + $cookies .= "$key=$value; "; + } + $headers = ['cookie' => $cookies, 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; + return $headers; + } + + private static function parseCookies($rawCookies) + { + $cookies = []; + foreach ($rawCookies as $c) { + $c = explode(';', $c)[0]; + $parts = explode('=', $c); + if (sizeof($parts) >= 2 && !is_null($parts[1])) { + $cookies[$parts[0]] = $parts[1]; + } + } + return $cookies; + } + public function getMediasByTag($tag, $count = 12, $maxId = '') { + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); $index = 0; $medias = []; $mediaIds = []; $hasNextPage = true; while ($index < $count && $hasNextPage) { - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $headers); + $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($session)); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); $this->csrfToken = $cookies['csrftoken']; - + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); $arr = json_decode($response->raw_body, true); if (!is_array($arr)) { throw new InstagramException('Response decoding failed. Returned data corrupted or this library outdated. Please report issue'); @@ -345,17 +385,6 @@ public function getMediasByTag($tag, $count = 12, $maxId = '') return $medias; } - private static function parseCookies($rawCookies) - { - $cookies = []; - foreach ($rawCookies as $c) { - $c = explode(';', $c)[0]; - $parts = explode('=', $c); - $cookies[$parts[0]] = $parts[1]; - } - return $cookies; - } - public function getPaginateMediasByTag($tag, $maxId = '') { $hasNextPage = true; @@ -366,8 +395,9 @@ public function getPaginateMediasByTag($tag, $maxId = '') 'maxId' => $maxId, 'hasNextPage' => $hasNextPage ]; - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $headers); + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId), $this->generateHeaders($session)); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); @@ -375,6 +405,9 @@ public function getPaginateMediasByTag($tag, $maxId = '') $cookies = self::parseCookies($response->headers['Set-Cookie']); $this->csrfToken = $cookies['csrftoken']; + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); $arr = json_decode($response->raw_body, true); @@ -412,8 +445,9 @@ public function getPaginateMediasByTag($tag, $maxId = '') public function getTopMediasByTagName($tagName) { - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::get(Endpoints::getMediasJsonByTagLink($tagName, ''), $headers); + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + $response = Request::get(Endpoints::getMediasJsonByTagLink($tagName, ''), $this->generateHeaders($session)); if ($response->code === 404) { throw new InstagramNotFoundException('Account with given username does not exist.'); } @@ -422,6 +456,9 @@ public function getTopMediasByTagName($tagName) } $cookies = self::parseCookies($response->headers['Set-Cookie']); $this->csrfToken = $cookies['csrftoken']; + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); $jsonResponse = json_decode($response->raw_body, true); $medias = []; foreach ($jsonResponse['tag']['top_posts']['nodes'] as $mediaArray) { @@ -432,8 +469,9 @@ public function getTopMediasByTagName($tagName) public function getLocationTopMediasById($facebookLocationId) { - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $headers); + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($session)); if ($response->code === 404) { throw new InstagramNotFoundException('Location with this id doesn\'t exist'); } @@ -442,6 +480,9 @@ public function getLocationTopMediasById($facebookLocationId) } $cookies = self::parseCookies($response->headers['Set-Cookie']); $this->csrfToken = $cookies['csrftoken']; + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); $jsonResponse = json_decode($response->raw_body, true); $nodes = $jsonResponse['location']['top_posts']['nodes']; $medias = []; @@ -453,17 +494,22 @@ public function getLocationTopMediasById($facebookLocationId) public function getLocationMediasById($facebookLocationId, $quantity = 12, $offset = '') { + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + $index = 0; $medias = []; $hasNext = true; while ($index < $quantity && $hasNext) { - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId, $offset), $headers); + $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId, $offset), $this->generateHeaders($session)); if ($response->code !== 200) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); } $cookies = self::parseCookies($response->headers['Set-Cookie']); $this->csrfToken = $cookies['csrftoken']; + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); $arr = json_decode($response->raw_body, true); $nodes = $arr['location']['media']['nodes']; foreach ($nodes as $mediaArray) { @@ -484,8 +530,10 @@ public function getLocationMediasById($facebookLocationId, $quantity = 12, $offs public function getLocationById($facebookLocationId) { - $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $headers); + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + + $response = Request::get(Endpoints::getMediasJsonByLocationIdLink($facebookLocationId), $this->generateHeaders($session)); if ($response->code === 404) { throw new InstagramNotFoundException('Location with this id doesn\'t exist'); } @@ -494,32 +542,59 @@ public function getLocationById($facebookLocationId) } $cookies = self::parseCookies($response->headers['Set-Cookie']); $this->csrfToken = $cookies['csrftoken']; + $session['csrftoken'] = $this->csrfToken; + $cachedString->set($session); + self::$instanceCache->save($cachedString); $jsonResponse = json_decode($response->raw_body, true); return Location::makeLocation($jsonResponse['location']); } - public function login() + public function login($force = false) { - // TODO: cache session - // TODO: load cached session - // TODO: Force login flag if ($this->sessionUsername == null || $this->sessionPassword == null) { throw new InstagramAuthException("User credentials not provided"); } - $response = Request::get(Endpoints::BASE_URL); - if ($response->code !== 200) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + + $cachedString = self::$instanceCache->getItem($this->sessionUsername); + $session = $cachedString->get(); + if ($force || !$this->isLoggedIn($session)) { + $response = Request::get(Endpoints::BASE_URL); + if ($response->code !== 200) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + } + $cookies = self::parseCookies($response->headers['Set-Cookie']); + $this->csrfToken = $cookies['csrftoken']; + $this->mid = $cookies['mid']; + $headers = ['cookie' => "csrftoken=$this->csrfToken; mid=$this->mid;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; + $response = Request::post(Endpoints::LOGIN_URL, $headers, ['username' => $this->sessionUsername, 'password' => $this->sessionPassword]); + if ($response->code !== 200) { + throw new InstagramAuthException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + } + $cookies = self::parseCookies($response->headers['Set-Cookie']); + $cookies['mid'] = $this->mid; + $this->csrfToken = $cookies['csrftoken']; + $this->sessionId = $cookies['sessionid']; + $cachedString->set($cookies); + self::$instanceCache->save($cachedString); } - $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $this->mid = $cookies['mid']; - $headers = ['cookie' => "csrftoken=$this->csrfToken; mid=$this->mid;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; - $response = Request::post(Endpoints::LOGIN_URL, $headers, ['username' => $this->sessionUsername, 'password' => $this->sessionPassword]); + } + + public function isLoggedIn($session) + { + if (is_null($session) || !isset($session['sessionid'])) { + return false; + } + $this->sessionId = $session['sessionid']; + $this->csrfToken = $session['csrftoken']; + $headers = ['cookie' => "csrftoken=$this->csrfToken; sessionid=$this->sessionId;", 'referer' => Endpoints::BASE_URL . '/', 'x-csrftoken' => $this->csrfToken]; + $response = Request::get(Endpoints::BASE_URL, $headers); if ($response->code !== 200) { - throw new InstagramAuthException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); + return false; } $cookies = self::parseCookies($response->headers['Set-Cookie']); - $this->csrfToken = $cookies['csrftoken']; - $this->sessionId = $cookies['sessionid']; + if (!isset($cookies['ds_user_id'])) { + return false; + } + return true; } } \ No newline at end of file diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php index 00d0f7ef..c0639cb4 100644 --- a/tests/InstagramTest.php +++ b/tests/InstagramTest.php @@ -13,7 +13,7 @@ class InstagramTest extends TestCase public static function setUpBeforeClass() { - self::$instagram = Instagram::withCredentials('PASTE USERNAME', 'PASTE PASSWORD'); + self::$instagram = Instagram::withCredentials('PASTE_LOGIN', 'PASTE_PASSWORD'); self::$instagram->login(); } @@ -39,10 +39,10 @@ public function testGetMedias() $this->assertEquals(80, sizeof($medias)); } - public function testGet1000Medias() + public function testGet100Medias() { - $medias = Instagram::getMedias('kevin', 1000); - $this->assertEquals(1000, sizeof($medias)); + $medias = Instagram::getMedias('kevin', 100); + $this->assertEquals(100, sizeof($medias)); } public function testGetMediaByCode()