Skip to content

Commit

Permalink
Fix retry to get data
Browse files Browse the repository at this point in the history
  • Loading branch information
Menighin committed Oct 25, 2020
1 parent 02edde2 commit 938b6fd
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 80 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "cei-crawler",
"version": "2.0.1",
"version": "2.0.2",
"description": "Crawler para pegar dados do Canal Eletronico do Investidor",
"main": "src/app.js",
"repository": {
Expand Down
76 changes: 48 additions & 28 deletions src/lib/DividendsCrawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ class DividendsCrawler {
})).get()
.filter(institution => institution.value > 0);


// Iterate over institutions, accounts, processing the stocks
for (const institution of institutions) {

Expand Down Expand Up @@ -187,34 +186,8 @@ class DividendsCrawler {
console.log(`Selecting account ${account}`);

domPage(PAGE.SELECT_ACCOUNT).attr('value', account);

const formDataHistory = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.DIVIDENDS_ACCOUNT, {
ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar',
__EVENTARGUMENT: '',
__LASTFOCUS: ''
});

const historyRequest = await cookieManager.fetch(PAGE.URL, {
...FETCH_OPTIONS.DIVIDENDS_ACCOUNT,
body: formDataHistory
});

const dividendsText = normalizeWhitespace(await historyRequest.text());
const errorMessage = CeiUtils.extractMessagePostResponse(dividendsText);

if (errorMessage && errorMessage.type === 2) {
throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message);
}

const dividendsDOM = cheerio.load(dividendsText);

// Process the page
/* istanbul ignore next */
if (traceOperations)
console.log(`Processing dividends data`);

const futureEvents = this._processEvents(dividendsDOM, PAGE.FUTURE_EVENTS_TITLE);
const pastEvents = this._processEvents(dividendsDOM, PAGE.PAST_EVENTS_TITLE);
const { futureEvents, pastEvents } = await this._getDataPage(domPage, cookieManager, traceOperations);

// Save the result
result.push({
Expand Down Expand Up @@ -278,6 +251,53 @@ class DividendsCrawler {
}
}

/**
* Returns the data from the page after trying more than once
* @param {cheerio.Root} dom DOM of page
* @param {FetchCookieManager} cookieManager - FetchCookieManager to work with
* @param {Boolean} traceOperations - Whether to trace operations or not
*/
static async _getDataPage(dom, cookieManager, traceOperations) {
while(true) {
const formDataHistory = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.DIVIDENDS_ACCOUNT, {
ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar',
__EVENTARGUMENT: '',
__LASTFOCUS: ''
});

const dividendsRequest = await cookieManager.fetch(PAGE.URL, {
...FETCH_OPTIONS.DIVIDENDS_ACCOUNT,
body: formDataHistory
});

const dividendsText = normalizeWhitespace(await dividendsRequest.text());
const errorMessage = CeiUtils.extractMessagePostResponse(dividendsText);

if (errorMessage && errorMessage.type === 2) {
throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message);
}

const dividendsDOM = cheerio.load(dividendsText);

// Process the page
/* istanbul ignore next */
if (traceOperations)
console.log(`Processing dividends data`);

const futureEvents = this._processEvents(dividendsDOM, PAGE.FUTURE_EVENTS_TITLE);
const pastEvents = this._processEvents(dividendsDOM, PAGE.PAST_EVENTS_TITLE);

if (errorMessage.type !== undefined || futureEvents.length > 0 || pastEvents.length > 0) {
return {
futureEvents,
pastEvents
};
}

const updtForm = CeiUtils.extractUpdateForm(dividendsText);
CeiUtils.updateFieldsDOM(dom, updtForm);
}
}

/**
* Process the events given the parameters
Expand Down
67 changes: 43 additions & 24 deletions src/lib/StockHistoryCrawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -209,31 +209,8 @@ class StockHistoryCrawler {
console.log(`Selecting account ${account}`);

domPage(PAGE.SELECT_ACCOUNT).attr('value', account);

const formDataHistory = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.STOCK_HISTORY_ACCOUNT, {
ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar',
__EVENTARGUMENT: ''
});

const historyRequest = await cookieManager.fetch(PAGE.URL, {
...FETCH_OPTIONS.STOCK_HISTORY_ACCOUNT,
body: formDataHistory
});

const historyText = normalizeWhitespace(await historyRequest.text());
const errorMessage = CeiUtils.extractMessagePostResponse(historyText);

if (errorMessage && errorMessage.type === 2) {
throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message);
}

const historyDOM = cheerio.load(historyText);

/* istanbul ignore next */
if (traceOperations)
console.log(`Processing stock history data`);

const stockHistory = this._processStockHistory(historyDOM);
const stockHistory = await this._getDataPage(domPage, cookieManager, traceOperations);

/* istanbul ignore next */
if (traceOperations) {
Expand Down Expand Up @@ -300,6 +277,48 @@ class StockHistoryCrawler {
}
}

/**
* Returns the data from the page after trying more than once
* @param {cheerio.Root} dom DOM of page
* @param {FetchCookieManager} cookieManager - FetchCookieManager to work with
* @param {Boolean} traceOperations - Whether to trace operations or not
*/
static async _getDataPage(dom, cookieManager, traceOperations) {
while(true) {
const formDataHistory = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.STOCK_HISTORY_ACCOUNT, {
ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar',
__EVENTARGUMENT: ''
});

const historyRequest = await cookieManager.fetch(PAGE.URL, {
...FETCH_OPTIONS.STOCK_HISTORY_ACCOUNT,
body: formDataHistory
});

const historyText = normalizeWhitespace(await historyRequest.text());
const errorMessage = CeiUtils.extractMessagePostResponse(historyText);

if (errorMessage && errorMessage.type === 2) {
throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message);
}

const historyDOM = cheerio.load(historyText);

/* istanbul ignore next */
if (traceOperations)
console.log(`Processing stock history data`);

const stockHistory = this._processStockHistory(historyDOM);

if (errorMessage.type !== undefined || stockHistory.length > 0) {
return stockHistory;
}

const updtForm = CeiUtils.extractUpdateForm(historyText);
CeiUtils.updateFieldsDOM(dom, updtForm);
}
}

/**
* Process the stock history to a DTO
* @param {cheerio.Root} dom DOM table stock history
Expand Down
76 changes: 49 additions & 27 deletions src/lib/WalletCrawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -201,33 +201,7 @@ class WalletCrawler {

domPage(PAGE.SELECT_ACCOUNT).attr('value', account);

const formDataHistory = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.WALLET_ACCOUNT, {
ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar',
__EVENTARGUMENT: '',
__LASTFOCUS: ''
});

const historyRequest = await cookieManager.fetch(PAGE.URL, {
...FETCH_OPTIONS.WALLET_ACCOUNT,
body: formDataHistory
});

const walletText = normalizeWhitespace(await historyRequest.text());
const errorMessage = CeiUtils.extractMessagePostResponse(walletText);

if (errorMessage && errorMessage.type === 2) {
throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message);
}

const walletDOM = cheerio.load(walletText);

// Process the page
/* istanbul ignore next */
if (traceOperations)
console.log(`Processing wallet data`);

const stockWallet = this._processStockWallet(walletDOM);
const nationalTreasuryWallet = this._processNationalTreasuryWallet(walletDOM);
const { stockWallet, nationalTreasuryWallet } = await this._getDataPage(domPage, cookieManager, traceOperations);

// Save the result
result.push({
Expand Down Expand Up @@ -291,6 +265,54 @@ class WalletCrawler {
}
}

/**
* Returns the data from the page after trying more than once
* @param {cheerio.Root} dom DOM of page
* @param {FetchCookieManager} cookieManager - FetchCookieManager to work with
* @param {Boolean} traceOperations - Whether to trace operations or not
*/
static async _getDataPage(dom, cookieManager, traceOperations) {
while(true) {
const formDataWallet = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.WALLET_ACCOUNT, {
ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar',
__EVENTARGUMENT: '',
__LASTFOCUS: ''
});

const walletRequest = await cookieManager.fetch(PAGE.URL, {
...FETCH_OPTIONS.WALLET_ACCOUNT,
body: formDataWallet
});

const walletText = normalizeWhitespace(await walletRequest.text());
const errorMessage = CeiUtils.extractMessagePostResponse(walletText);

if (errorMessage && errorMessage.type === 2) {
throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message);
}

const walletDOM = cheerio.load(walletText);

// Process the page
/* istanbul ignore next */
if (traceOperations)
console.log(`Processing wallet data`);

const stockWallet = this._processStockWallet(walletDOM);
const nationalTreasuryWallet = this._processNationalTreasuryWallet(walletDOM);

if (errorMessage.type !== undefined || stockWallet.length > 0 || nationalTreasuryWallet.length > 0) {
return {
stockWallet,
nationalTreasuryWallet
};
}

const updtForm = CeiUtils.extractUpdateForm(historyText);
CeiUtils.updateFieldsDOM(dom, updtForm);
}
}

/**
* Process the stock wallet to a DTO
* @param {cheerio.Root} dom DOM table stock history
Expand Down

0 comments on commit 938b6fd

Please sign in to comment.