Skip to content

Commit

Permalink
Merge pull request #350 from mr-molodoy/master
Browse files Browse the repository at this point in the history
Optimize and update
  • Loading branch information
raiym authored Apr 20, 2018
2 parents c441ac5 + 237ea0b commit 817932a
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 58 deletions.
6 changes: 2 additions & 4 deletions examples/getAccountById.php
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
<?php
require __DIR__ . '/../vendor/autoload.php';

$instagram = \InstagramScraper\Instagram::withCredentials('username', 'password', '/path/to/cache/folder');
$instagram->login();
$account = $instagram->getAccountById('3');
$account = (new \InstagramScraper\Instagram())->getAccountById('3');

// Available fields
echo "Account info:\n";
Expand All @@ -17,4 +15,4 @@
echo "Number of followers: {$account->getFollowedByCount()}\n";
echo "Number of follows: {$account->getFollowsCount()}\n";
echo "Is private: {$account->isPrivate()}\n";
echo "Is verified: {$account->isVerified()}\n";
echo "Is verified: {$account->isVerified()}\n";
31 changes: 31 additions & 0 deletions examples/getPaginateMediasByUsername.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php
require __DIR__ . '/../vendor/autoload.php';

$instagram = new \InstagramScraper\Instagram();
$response = $instagram->getPaginateMedias('kevin');

foreach ($response['medias'] as $media) {
/** @var \InstagramScraper\Model\Media $media */

echo "Media info:" . PHP_EOL;
echo "Id: {$media->getId()}" . PHP_EOL;
echo "Shotrcode: {$media->getShortCode()}" . PHP_EOL;
echo "Created at: {$media->getCreatedTime()}" . PHP_EOL;
echo "Caption: {$media->getCaption()}" . PHP_EOL;
echo "Number of comments: {$media->getCommentsCount()}" . PHP_EOL;
echo "Number of likes: {$media->getLikesCount()}" . PHP_EOL;
echo "Get link: {$media->getLink()}" . PHP_EOL;
echo "High resolution image: {$media->getImageHighResolutionUrl()}" . PHP_EOL;
echo "Media type (video or image): {$media->getType()}" . PHP_EOL . PHP_EOL;
$account = $media->getOwner();

echo "Account info:" . PHP_EOL;
echo "Id: {$account->getId()}" . PHP_EOL;
echo "Username: {$account->getUsername()}" . PHP_EOL;
echo "Full name: {$account->getFullName()}" . PHP_EOL;
echo "Profile pic url: {$account->getProfilePicUrl()}" . PHP_EOL;
echo PHP_EOL . PHP_EOL;
}

echo "HasNextPage: {$response['hasNextPage']}" . PHP_EOL;
echo "MaxId: {$response['maxId']}" . PHP_EOL;
6 changes: 6 additions & 0 deletions src/InstagramScraper/Endpoints.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Endpoints
const USER_FEED2 = 'https://www.instagram.com/?__a=1';
const INSTAGRAM_QUERY_URL = 'https://www.instagram.com/query/';
const INSTAGRAM_CDN_URL = 'https://scontent.cdninstagram.com/';
const ACCOUNT_JSON_PRIVATE_INFO_BY_ID = 'https://i.instagram.com/api/v1/users/{userId}/info/';

const ACCOUNT_MEDIAS2 = 'https://www.instagram.com/graphql/query/?query_id=17880160963012870&id={{accountId}}&first=10&after=';

Expand Down Expand Up @@ -64,6 +65,11 @@ public static function getAccountJsonInfoLinkByAccountId($id)
return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_INFO_BY_ID);
}

public static function getAccountJsonPrivateInfoLinkByAccountId($id)
{
return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_PRIVATE_INFO_BY_ID);
}

public static function getAccountMediasJsonLink($variables)
{
return str_replace('{variables}', urlencode($variables), static::ACCOUNT_MEDIAS);
Expand Down
147 changes: 93 additions & 54 deletions src/InstagramScraper/Instagram.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
use InstagramScraper\Model\Story;
use InstagramScraper\Model\Tag;
use InstagramScraper\Model\UserStories;
use InvalidArgumentException;
use phpFastCache\Cache\ExtendedCacheItemPoolInterface;
use phpFastCache\CacheManager;
use Unirest\Request;

Expand All @@ -26,7 +28,9 @@ class Instagram
const PAGING_DELAY_MINIMUM_MICROSEC = 1000000; // 1 sec min delay to simulate browser
const PAGING_DELAY_MAXIMUM_MICROSEC = 3000000; // 3 sec max delay to simulate browser

/** @var ExtendedCacheItemPoolInterface $instanceCache */
private static $instanceCache;

public $pagingTimeLimitSec = self::PAGING_TIME_LIMIT_SEC;
public $pagingDelayMinimumMicrosec = self::PAGING_DELAY_MINIMUM_MICROSEC;
public $pagingDelayMaximumMicrosec = self::PAGING_DELAY_MAXIMUM_MICROSEC;
Expand All @@ -42,6 +46,7 @@ class Instagram
* @param null $sessionFolder
*
* @return Instagram
* @throws \phpFastCache\Exceptions\phpFastCacheDriverCheckException
*/
public static function withCredentials($username, $password, $sessionFolder = null)
{
Expand Down Expand Up @@ -254,11 +259,9 @@ public function setUserAgent($userAgent)
}

/**
* @param $userAgent
*
* @return null
*/
public function resetUserAgent($userAgent)
public function resetUserAgent()
{
return $this->userAgent = null;
}
Expand All @@ -270,6 +273,7 @@ public function resetUserAgent($userAgent)
*
* @return Media[]
* @throws InstagramException
* @throws InstagramNotFoundException
*/
public function getMedias($username, $count = 20, $maxId = '')
{
Expand All @@ -295,39 +299,37 @@ public function getAccount($username)
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out);
$userArray = json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING);

$userArray = self::extractSharedDataFromBody($response->raw_body);

if (!isset($userArray['entry_data']['ProfilePage'][0]['graphql']['user'])) {
throw new InstagramNotFoundException('Account with this username does not exist', 404);
}
return Account::create($userArray['entry_data']['ProfilePage'][0]['graphql']['user']);
}

private function getSharedDataFromPage($url = Endpoints::BASE_URL)
{
$response = Request::get(rtrim($url, '/') . '/', $this->generateHeaders($this->userSession));
if (static::HTTP_NOT_FOUND === $response->code) {
throw new InstagramNotFoundException('Account with given username does not exist.');
}
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out);
return json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING);
}

/**
* @return null
* @throws InstagramException
*/
private function getRhxGis()
{
if ($this->rhxGis === null) {
$sharedData = $this->getSharedDataFromPage();
$this->rhxGis = $sharedData['rhx_gis'];
try {
$sharedData = $this->getSharedDataFromPage();
$this->rhxGis = $sharedData['rhx_gis'];
} catch (\Exception $exception) {
throw new InstagramException('Could not extract gis from page');
}
}

return $this->rhxGis;
}

/**
* @param $variables
* @return string
* @throws InstagramException
*/
private function generateGisToken($variables)
{
return md5(implode(':', [$this->getRhxGis(), $variables ]));
Expand Down Expand Up @@ -358,7 +360,9 @@ public function getMediasByUserId($id, $count = 12, $maxId = '')
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

$arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING);

if (!is_array($arr)) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}
Expand Down Expand Up @@ -422,12 +426,14 @@ public function getMediasFromFeed($username, $count = 20)
}

return $medias;
}
}

/**
* @param $mediaId
*
* @return Media
* @throws InstagramException
* @throws InstagramNotFoundException
*/
public function getMediaById($mediaId)
{
Expand All @@ -445,7 +451,7 @@ public function getMediaById($mediaId)
public function getMediaByUrl($mediaUrl)
{
if (filter_var($mediaUrl, FILTER_VALIDATE_URL) === false) {
throw new \InvalidArgumentException('Malformed media url');
throw new InvalidArgumentException('Malformed media url');
}
$response = Request::get(rtrim($mediaUrl, '/') . '/?__a=1', $this->generateHeaders($this->userSession));
// use a raw constant in the code is not a good idea!!
Expand Down Expand Up @@ -486,6 +492,7 @@ public function getMediaByCode($mediaCode)
*
* @return array
* @throws InstagramException
* @throws InstagramNotFoundException
*/
public function getPaginateMedias($username, $maxId = '')
{
Expand All @@ -499,8 +506,16 @@ public function getPaginateMedias($username, $maxId = '')
'hasNextPage' => $hasNextPage,
];

$response = Request::get(Endpoints::getAccountMediasJsonLink($account->getId(), $maxId),
$this->generateHeaders($this->userSession));
$variables = json_encode([
'id' => (string) $account->getId(),
'first' => (string) Endpoints::getAccountMediasRequestCount(),
'after' => (string) $maxId
]);

$response = Request::get(
Endpoints::getAccountMediasJsonLink($variables),
$this->generateHeaders($this->userSession, $this->generateGisToken($variables))
);

// use a raw constant in the code is not a good idea!!
//if ($response->code !== 200) {
Expand Down Expand Up @@ -539,11 +554,12 @@ public function getPaginateMedias($username, $maxId = '')
}

/**
* @param $mediaId
* @param $mediaId
* @param int $count
* @param null $maxId
*
* @return Comment[]
* @throws InstagramException
*/
public function getMediaCommentsById($mediaId, $count = 10, $maxId = null)
{
Expand Down Expand Up @@ -704,7 +720,8 @@ public function getMediaLikesByCode($code, $count = 10, $maxId = null)
*
* @return Account
* @throws InstagramException
* @throws \InvalidArgumentException
* @throws InvalidArgumentException
* @throws InstagramNotFoundException
*/
public function getAccountById($id)
{
Expand All @@ -714,44 +731,31 @@ public function getAccountById($id)

/**
* @param string $id
*
* @return string
* @throws InstagramException
* @throws \InvalidArgumentException
* @throws InstagramNotFoundException
*/
public function getUsernameById($id)
{
// Use the follow page to get the account. The follow url will redirect to the home page for the user,
// which has the username embedded in the url.
$response = Request::get(Endpoints::getAccountJsonPrivateInfoLinkByAccountId($id), $this->generateHeaders($this->userSession));

if (!is_numeric($id)) {
throw new \InvalidArgumentException('User id must be integer or integer wrapped in string');
if (static::HTTP_NOT_FOUND === $response->code) {
throw new InstagramNotFoundException('Account with given username does not exist.');
}

$url = Endpoints::getFollowUrl($id);

// Cut a request by disabling redirects.
Request::curlOpt(CURLOPT_FOLLOWLOCATION, FALSE);
$response = Request::get($url, $this->generateHeaders($this->userSession));
Request::curlOpt(CURLOPT_FOLLOWLOCATION, TRUE);

if ($response->code === 400) {
throw new InstagramException('Account with this id does not exist.');
if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

if ($response->code !== 302) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->raw_body) . ' Something went wrong. Please report issue.');
if (!($responseArray = json_decode($response->raw_body, true))) {
throw new InstagramException('Response does not JSON');
}

$cookies = static::parseCookies($response->headers['Set-Cookie']);
$this->userSession['csrftoken'] = $cookies['csrftoken'];

// Get the username from the response url.
$responseUrl = $response->headers['Location'];
$urlParts = explode('/', rtrim($responseUrl, '/'));
$username = end($urlParts);
if ($responseArray['status'] !== 'ok') {
throw new InstagramException((isset($responseArray['message']) ? $responseArray['message'] : 'Unknown Error'));
}

return $username;
return $responseArray['user']['username'];
}

/**
Expand Down Expand Up @@ -1262,6 +1266,12 @@ public function isLoggedIn($session)
return true;
}

/**
* @param $response
* @param $cookies
* @return \Unirest\Response
* @throws InstagramAuthException
*/
private function verifyTwoStep($response, $cookies)
{
$new_cookies = static::parseCookies($response->headers['Set-Cookie']);
Expand Down Expand Up @@ -1351,4 +1361,33 @@ public function saveSession()
$cachedString = static::$instanceCache->getItem($this->sessionUsername);
$cachedString->set($this->userSession);
}

private static function extractSharedDataFromBody($body)
{
if (preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $body, $out)) {
return json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING);
}

return null;
}

/**
* @param string $url
* @return mixed|null
* @throws InstagramException
* @throws InstagramNotFoundException
*/
private function getSharedDataFromPage($url = Endpoints::BASE_URL)
{
$response = Request::get(rtrim($url, '/') . '/', $this->generateHeaders($this->userSession));
if (static::HTTP_NOT_FOUND === $response->code) {
throw new InstagramNotFoundException("Page {$url} not found");
}

if (static::HTTP_OK !== $response->code) {
throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.');
}

return self::extractSharedDataFromBody($response->raw_body);
}
}

0 comments on commit 817932a

Please sign in to comment.