From db89e1530f6bdf41ec17808c1af3e66271bf81a0 Mon Sep 17 00:00:00 2001 From: Menighin Date: Sun, 25 Jul 2021 23:45:47 -0300 Subject: [PATCH 01/17] Login in the new portal --- README.md | 4 +- package.json | 1 + src/lib/CeiCrawler.js | 86 +++++----------------------- src/lib/CeiCrawlerError.js | 3 +- src/lib/CeiLoginService.js | 102 ++++++++++++++++++++++++++++++++++ src/lib/CeiUtils.js | 16 +++--- src/lib/FetchCookieManager.js | 2 +- src/lib/certificate.crt | 39 ------------- src/lib/typedefs.js | 9 ++- 9 files changed, 139 insertions(+), 123 deletions(-) create mode 100644 src/lib/CeiLoginService.js delete mode 100644 src/lib/certificate.crt diff --git a/README.md b/README.md index 59da40b..ae8b07d 100644 --- a/README.md +++ b/README.md @@ -461,7 +461,7 @@ Na criação de um `CeiCrawler` é possivel especificar alguns valores para o pa |-----------------------|-----------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | **capDates** | _Boolean_ | _false_ | Se `true`, as datas utilizadas de input para buscas serão limitadas ao range de datas válidas do CEI, impedindo que ocorra um erro caso o usuário passe uma data maior ou menor. | | **navigationTimeout** | _Number_ | 30000 | Tempo, em ms, que o crawler espera por uma ação antes de considerar timeout. | -| **loginTimeout** | _Number_ | 180000 | Tempo, em ms, que o crawler espera para realizar login antes de considerar timeout. Diversas vezes, como a noite e aos fins de semana, o sistema do CEI parece ficar muito instavél e causa diversos timeouts no login. | +| **timeout** | _Number_ | 180000 | Tempo, em ms, que o crawler espera para realizar login antes de considerar timeout. Diversas vezes, como a noite e aos fins de semana, o sistema do CEI parece ficar muito instavél e causa diversos timeouts no login. | | **trace** | _Boolean_ | _false_ | Printa mensagens de debug no log. Útil para desenvolvimento. | Exemplo: @@ -471,7 +471,7 @@ const ceiCrawlerOptions = { trace: false, capEndDate: true, navigationTimeout: 60000, - loginTimeout: 240000, + timeout: 240000, }; diff --git a/package.json b/package.json index 811a0d4..a4ef7f5 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "cheerio": "^1.0.0-rc.3", "node-fetch": "^2.6.1", "normalize-html-whitespace": "^1.0.0", + "puppeteer-core": "^10.1.0", "tough-cookie": "^4.0.0" }, "devDependencies": { diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index a811672..40e2f6c 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -8,15 +8,13 @@ const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); const FetchCookieManager = require('./FetchCookieManager'); const cheerio = require('cheerio'); const CeiUtils = require('./CeiUtils'); +const CeiLoginService = require('./CeiLoginService'); class CeiCrawler { /** @type {boolean} */ _isLogged = false; - /** @type {FetchCookieManager} */ - _cookieManager = null; - get username() { return this._username; } set username(username) { this._username = username; } @@ -27,6 +25,9 @@ class CeiCrawler { get options() { return this._options; } set options(options) { this._options = options; } + /** @type {CeiLoginService} */ + _ceiLoginService = null; + /** * * @param {String} username - Username to login at CEI @@ -39,18 +40,16 @@ class CeiCrawler { this.options = options; this._setDefaultOptions(); - this._cookieManager = new FetchCookieManager({ - 'Host': 'cei.b3.com.br', - 'Origin': 'https://cei.b3.com.br', - 'Referer': 'https://ceiapp.b3.com.br/CEI_Responsivo/login.aspx', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36' - }, this.options.navigationTimeout); + this._ceiLoginService = new CeiLoginService(username, password, this.options.loginOptions); + } _setDefaultOptions() { if (!this.options.trace) this.options.trace = false; if (!this.options.navigationTimeout) this.options.navigationTimeout = 30000; - if (!this.options.loginTimeout) this.options.loginTimeout = 150000; + if (!this.options.loginOptions) this.options.loginOptions = {}; + if (!this.options.loginOptions.timeout) this.options.loginOptions.timeout = 150000; + if (!this.options.loginOptions.strategy) this.options.loginOptions.strategy = 'user-input'; } async login() { @@ -63,68 +62,11 @@ class CeiCrawler { /* istanbul ignore next */ if ((this.options && this.options.trace) || false) - console.log('Logging at CEI...'); - - const getPageLogin = await this._cookieManager.fetch("https://ceiapp.b3.com.br/CEI_Responsivo/login.aspx"); - const doomLoginPage = cheerio.load(await getPageLogin.text()); - - doomLoginPage('#ctl00_ContentPlaceHolder1_txtLogin').attr('value', this.username); - doomLoginPage('#ctl00_ContentPlaceHolder1_txtSenha').attr('value', this.password); - - const formData = CeiUtils.extractFormDataFromDOM(doomLoginPage, [ - 'ctl00$ContentPlaceHolder1$smLoad', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - 'ctl00$ContentPlaceHolder1$txtLogin', - 'ctl00$ContentPlaceHolder1$txtSenha', - '__ASYNCPOST', - 'ctl00$ContentPlaceHolder1$btnLogar' - ], { - ctl00$ContentPlaceHolder1$smLoad: 'ctl00$ContentPlaceHolder1$UpdatePanel1|ctl00$ContentPlaceHolder1$btnLogar', - __EVENTTARGET: '', - __EVENTARGUMENT: '' - }); - - await CeiUtils.retry(async () => { - const postLogin = await this._cookieManager.fetch("https://ceiapp.b3.com.br/CEI_Responsivo/login.aspx", { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest", - 'Connection': 'keep-alive' - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/login.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": formData, - "method": "POST", - "mode": "cors", - "credentials": "include" - }, this._options.loginTimeout); - - const accessCookie = ((postLogin.headers.raw()['set-cookie'] || []).find(str => str.includes('Acesso=')) || ''); - - if (accessCookie.includes('Acesso=0')) { - /* istanbul ignore next */ - if ((this.options && this.options.trace) || false) - console.log('Login success'); - this._isLogged = true; - } else if (accessCookie.includes('Acesso=1')) { - throw new CeiCrawlerError(CeiErrorTypes.WRONG_PASSWORD, 'Senha inválida'); - } else { - const loginText = await postLogin.text(); - const info = CeiUtils.extractMessagePostResponse(loginText); - throw new CeiCrawlerError(CeiErrorTypes.LOGIN_FAILED, info.message || 'Login falhou'); - } - }, e => e.type === CeiErrorTypes.LOGIN_FAILED && e.message.includes('could not be activated')); + console.log(`Logging at CEI using ${this.options.loginOptions.strategy}...`); + + await this._ceiLoginService.getToken(); + console.log('FOOOOOOOOOOOOOOOOOOOOOI'); + } /** diff --git a/src/lib/CeiCrawlerError.js b/src/lib/CeiCrawlerError.js index e7ebd0a..2a8550d 100644 --- a/src/lib/CeiCrawlerError.js +++ b/src/lib/CeiCrawlerError.js @@ -11,7 +11,8 @@ const CeiErrorTypes = Object.freeze({ WRONG_PASSWORD: 'WRONG_PASSWORD', SUBMIT_ERROR: 'SUBMIT_ERROR', SESSION_HAS_EXPIRED: 'SESSION_HAS_EXPIRED', - NAVIGATION_TIMEOUT: 'NAVIGATION_TIMEOUT' + NAVIGATION_TIMEOUT: 'NAVIGATION_TIMEOUT', + INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY' }); module.exports = { diff --git a/src/lib/CeiLoginService.js b/src/lib/CeiLoginService.js new file mode 100644 index 0000000..4e7a208 --- /dev/null +++ b/src/lib/CeiLoginService.js @@ -0,0 +1,102 @@ +const puppeteer = require('puppeteer-core'); +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils') +const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); + +class CeiLoginService { + + /** @type {String} - Username to fill in at CEI page */ + _username = null; + + /** @type {String} - Password to fill in at CEI page */ + _password = null; + + /** @type {typedefs.LoginOptions} - Options for CEI Crawler and Fetch */ + _options = null; + + constructor(username, password, options) { + this._username = username; + this._password = password; + this._options = options; + } + + async getToken() { + switch(this._options.strategy) { + case 'user-input': + return await this._getTokenByUserInput(); + default: + throw CeiCrawlerError(CeiErrorTypes.INVALID_LOGIN_STRATEGY, `Invalid login strategy: ${this._options.strategy}`); + } + } + + async _getTokenByUserInput() { + const browser = await puppeteer.launch({ + headless: false, + executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe', + args: ['--start-maximized'] + }); + const homePage = await browser.newPage(); + let mainPage = null; + await homePage.goto('https://www.investidor.b3.com.br/nova-area-logada'); + + // All this controlling is done like this because CEI and Puppeteer do not go so well together. + // If anything fails, the browser will hang and wait for the user to resolve the login himself + do { + try { + await CeiUtils.retry(async () => { await homePage.waitForNavigation({ timeout: 1000 }); }, 2, 1000, true); + await homePage.click('.cabecalho a'); + + mainPage = await this._getMainPage(browser); + } catch (e) { + console.log('Failed to click login button. Will reload to try again.'); + } + await homePage.reload(); + } while (mainPage === null); + + try { + await mainPage.waitForTimeout(4000); + await CeiUtils.retry(async () => { await mainPage.focus('#extension_DocInput'); }, 10, 1000, true); + await mainPage.keyboard.type(this._username); + await CeiUtils.retry(async () => { await mainPage.click('#continue'); }, 8, 301); + await CeiUtils.retry(async () => { await mainPage.focus('#password'); }, 10, 1000, true); + await mainPage.keyboard.type(this._password); + } catch (e) { + console.log('Failed trying to fill user info. Will wait for the user to resolve the login himself'); + } + + await mainPage.waitForFunction(() => { + return document.querySelector('.saudacao') !== null; + }, { timeout: 0 }); + + const sessionStorage = await mainPage.evaluate(() => { + const json = {}; + for (let i = 0; i < sessionStorage.length; i++) { + const key = sessionStorage.key(i); + json[key] = sessionStorage.getItem(key); + } + return json; + }); + + console.log(JSON.stringify(sessionStorage)); + + return sessionStorage; + } + + /** + * Function to be sure we have a tab opened before moving on. See: https://github.com/puppeteer/puppeteer/issues/1992 + * @param {puppeteer.Browser} browser Puppeteer browser running + * @returns {puppeteer.Page} The main page to keep crawling + */ + async _getMainPage(browser) { + while (true) { + for (const p of (await browser.pages())) { + if (p.url().indexOf('b3investidor.b2clogin.com') !== -1) + return p; + }; + await CeiUtils.sleep(200); + } + } + +} + +module.exports = CeiLoginService; \ No newline at end of file diff --git a/src/lib/CeiUtils.js b/src/lib/CeiUtils.js index f53ff4d..7bc9498 100644 --- a/src/lib/CeiUtils.js +++ b/src/lib/CeiUtils.js @@ -45,11 +45,11 @@ class CeiUtils { } /** - * @param {Number} timestamp - Time to sleep in miliseconds + * @param {Number} ms - Time to sleep in miliseconds * @returns {Promise} - Promise */ - static async sleep(timestamp) { - return new Promise((resolve) => setTimeout(resolve, timestamp)); + static async sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); } /** @@ -61,11 +61,13 @@ class CeiUtils { /** * @param {Promise|Function} callback - Time to sleep in miliseconds + * @param {Number} [attempts=5] - Number of attempts before throw exception + * @param {Number} [delayBetween=100] - Delay in miliseconds before retrying + * @param {Number} [failSilently=false] - If set to true and the callback still fails, only null will be returned and no error will be thrown * @param {CheckRetryCallback} [checkRetryCallback] - Filter when need attempt again on error - * @param {Number} [attempts] - Number of attempts before throw exception * @returns {Promise} - Result of callback */ - static async retry(callback, checkRetryCallback = () => true, attempts = 3) { + static async retry(callback, attempts = 5, delayBetween = 100, failSilently = false, checkRetryCallback = () => true) { let result = null; while (true) { @@ -76,8 +78,8 @@ class CeiUtils { if (checkRetryCallback(e)) { attempts--; if (attempts === 0) throw e; - await CeiUtils.sleep(100); - } else { + await CeiUtils.sleep(delayBetween); + } else if (!failSilently) { throw e; } } diff --git a/src/lib/FetchCookieManager.js b/src/lib/FetchCookieManager.js index cc24cf0..39fc968 100644 --- a/src/lib/FetchCookieManager.js +++ b/src/lib/FetchCookieManager.js @@ -8,7 +8,7 @@ const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); const { time } = require('console'); const certs = [ - readFileSync(__dirname + '/certificate.crt') + // readFileSync(__dirname + '/certificate.crt') ]; const agent = new https.Agent({ diff --git a/src/lib/certificate.crt b/src/lib/certificate.crt deleted file mode 100644 index 34197b7..0000000 --- a/src/lib/certificate.crt +++ /dev/null @@ -1,39 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIGwzCCBaugAwIBAgIMdw8BHYBkKgFvz5NxMA0GCSqGSIb3DQEBCwUAMFAxCzAJ -BgNVBAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMSYwJAYDVQQDEx1H -bG9iYWxTaWduIFJTQSBPViBTU0wgQ0EgMjAxODAeFw0xOTA5MjMxMzUxMDhaFw0y -MTA5MjMxMzUxMDhaMIGlMQswCQYDVQQGEwJCUjELMAkGA1UECBMCU1AxEjAQBgNV -BAcTCVNBTyBQQVVMTzEzMDEGA1UECxMqREktR1NJIC0gR2VzdGFvIGRlIFNlZ3Vy -YW5jYSBkYSBJbmZvcm1hY2FvMSgwJgYDVQQKEx9CMyBTLkEuIC0gQlJBU0lMLCBC -T0xTQSwgQkFMQ0FPMRYwFAYDVQQDEw1jZWkuYjMuY29tLmJyMIIBIjANBgkqhkiG -9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyjRX1Pv5RL13AyEohbTsf5ZhDqi/ptSjlQR2 -AJBbE0gx/PSbPO/J7a5JDC3wr1eYHVNy3og0TvRz/gXHl8C+m4kORR2QAD/ZDfyb -GCIBSU19AV8WU2v5vic+SN3JuKxm53QFQKRVMMFMyOQfag1RjGFvHHmbD01J93Ne -KVHIPimQbxhr62vQOXZEH6+qPJGKGbrPuLbMkN91GjBAB1dVLKehWYCrIXLlprJ+ -cEfMacSdaRBhtqw0WPEf7R+6UpdsTZm4jNuaXcxUwy7I0Vyppgzj0U5cqvo5QO4J -u3rat5em/aYPc9kUj81x7frkdMGoSsTmqg3qd1kYN8cr3VY9JwIDAQABo4IDRTCC -A0EwDgYDVR0PAQH/BAQDAgWgMIGOBggrBgEFBQcBAQSBgTB/MEQGCCsGAQUFBzAC -hjhodHRwOi8vc2VjdXJlLmdsb2JhbHNpZ24uY29tL2NhY2VydC9nc3JzYW92c3Ns -Y2EyMDE4LmNydDA3BggrBgEFBQcwAYYraHR0cDovL29jc3AuZ2xvYmFsc2lnbi5j -b20vZ3Nyc2FvdnNzbGNhMjAxODBWBgNVHSAETzBNMEEGCSsGAQQBoDIBFDA0MDIG -CCsGAQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5 -LzAIBgZngQwBAgIwCQYDVR0TBAIwADA/BgNVHR8EODA2MDSgMqAwhi5odHRwOi8v -Y3JsLmdsb2JhbHNpZ24uY29tL2dzcnNhb3Zzc2xjYTIwMTguY3JsMBgGA1UdEQQR -MA+CDWNlaS5iMy5jb20uYnIwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMC -MB8GA1UdIwQYMBaAFPjvf/LNeGeo3m+PJI2I8YcDArPrMB0GA1UdDgQWBBS5bT/5 -axeASSKCgPQ4bA5slkUtATCCAX8GCisGAQQB1nkCBAIEggFvBIIBawFpAHcAXNxD -kv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAFtXmPtbgAABAMASDBGAiEA -nhoOH8aB1NCs2PNcEEDtpGTJX9EkOjdFw+L+RL4f7WMCIQDSW7Wv7PH61f1Rl2yZ -KBMBAaVk13st3zVUk8JZkXzQLgB2AG9Tdqwx8DEZ2JkApFEV/3cVHBHZAsEAKQaN -sgiaN9kTAAABbV5j7Y0AAAQDAEcwRQIgV0hfySi/U6LQ/6FoQQWXtEqSC2INKvbL -uKCUuoNxgesCIQCaoMiKsNO8JjlXMnxo4BYhx92W7LhaGNq4CkI5SX1NqgB2AO5L -vbd1zmC64UJpH6vhnmajD35fsHLYgwDEe4l6qP3LAAABbV5j8OMAAAQDAEcwRQIg -cclQJuCTQ1S+Tcdy/veapq7c4sY+HL4KbRnGP+BXmmcCIQDeecG0CMu9EzNoJbw7 -14e83sFJUubzrGLsT+uYSQBJeTANBgkqhkiG9w0BAQsFAAOCAQEAa2cn4L8vY705 -AR5qDkqO5aYSMDQcBN4zowYZLqMGAMdEy3t/4Nw60AjkTzNs+R9M6ksGBU1Xk65s -S8nSFrKcWwrgYzn0QFFfGI6bjtdPUEfrileii/Y9Nh+Jq1KVCxePDq26QfPFeyjJ -QfDdEtZobf0T+Z8jeiN8Y6sVk3SZi4J/qmeiLIKb12x75y1GRymbPyZM0jwKzdad -w5PFAE7e7KkcbSnMIkujG7bBAubGCYOLMEan/Ara5ZSjKiffTMM4n5U+vVclDa01 -eMu64eZtpMLN9rvqSJIQ4n9aWIh08NJgzhUHs+v+4+CTtcqFQbnqsEVs/jqTdsL1 -LfGnbThZpw== ------END CERTIFICATE----- diff --git a/src/lib/typedefs.js b/src/lib/typedefs.js index 259a41c..196a4fa 100644 --- a/src/lib/typedefs.js +++ b/src/lib/typedefs.js @@ -2,12 +2,19 @@ * @namespace typedefs */ +/** + * @typedef LoginOptions + * @property {String} strategy - The strategy the crawler will use to make the login. Options are: `user-input` + * @property {Number} timeout - Login timeout + * @property {String} browserPath - Path of the browser to run puppeteer + */ + /** * @typedef CeiCrawlerOptions * @property {boolean} trace - Indicates if it should print trace messages. Helpful for debugging. * @property {boolean} capDates - Prevent crawling with an invalid date in CEI * @property {Number} navigationTimeout - Fetch timeout - * @property {Number} loginTimeout - Login timeout + * @property {LoginOptions} loginOptions - The strategy the crawler will use to make the login. Options are: `user-input` * @memberof typdefs */ From d36fffd4260172ee1eb6d972789bdce53aa430fe Mon Sep 17 00:00:00 2001 From: Menighin Date: Mon, 26 Jul 2021 21:22:46 -0300 Subject: [PATCH 02/17] Crawling for position --- package.json | 1 + src/lib/AxiosWrapper.js | 41 +++ src/lib/CeiCrawler.js | 107 +------- src/lib/CeiLoginService.js | 4 +- src/lib/CeiUtils.js | 8 + src/lib/DividendsCrawler.js | 350 -------------------------- src/lib/FetchCookieManager.js | 93 ------- src/lib/IPOCrawler.js | 260 ------------------- src/lib/LastExecutionCrawler.js | 5 + src/lib/PositionCrawler.js | 37 +++ src/lib/StockHistoryCrawler.js | 347 ------------------------- src/lib/TreasureCrawler.js | 434 -------------------------------- src/lib/WalletCrawler.js | 374 --------------------------- src/lib/typedefs.js | 7 + 14 files changed, 108 insertions(+), 1960 deletions(-) create mode 100644 src/lib/AxiosWrapper.js delete mode 100644 src/lib/DividendsCrawler.js delete mode 100644 src/lib/FetchCookieManager.js delete mode 100644 src/lib/IPOCrawler.js create mode 100644 src/lib/LastExecutionCrawler.js create mode 100644 src/lib/PositionCrawler.js delete mode 100644 src/lib/StockHistoryCrawler.js delete mode 100644 src/lib/TreasureCrawler.js delete mode 100644 src/lib/WalletCrawler.js diff --git a/package.json b/package.json index a4ef7f5..1ed23e3 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "dependencies": { "@babel/runtime": "^7.9.6", "abort-controller": "^3.0.0", + "axios": "^0.21.1", "cheerio": "^1.0.0-rc.3", "node-fetch": "^2.6.1", "normalize-html-whitespace": "^1.0.0", diff --git a/src/lib/AxiosWrapper.js b/src/lib/AxiosWrapper.js new file mode 100644 index 0000000..c45bcb0 --- /dev/null +++ b/src/lib/AxiosWrapper.js @@ -0,0 +1,41 @@ +const axios = require('axios').default; +const https = require('https'); + +class AxiosWrapper { + + static setup(options) { + const httpsAgent = new https.Agent({ + rejectUnauthorized: false, + }); + + axios.interceptors.request.use(config => { + config.headers = { + 'Authorization': `Bearer ${options.auth.token}` + }; + config.params['cache-guid'] = options.auth['cache-guid']; + config.httpsAgent = httpsAgent; + return config; + }); + } + + static async request(url, queryParams = {}, pathParams = {}) { + try { + const urlWithParams = Object.keys(pathParams) + .reduce((p, v) => { + return p.replace(`:${v}`, pathParams[v]); + }, url); + + const response = await axios.get(urlWithParams, { + params: { + ...queryParams + } + }); + return response; + } catch(e) { + console.log('ERROR ON AXIOS: ' + e.message); + throw e; + } + } +} + +module.exports = AxiosWrapper; \ No newline at end of file diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 40e2f6c..4cbee67 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -1,14 +1,9 @@ -const StockHistoryCrawler = require('./StockHistoryCrawler'); -const DividendsCrawler = require('./DividendsCrawler'); -const IPOCrawler = require('./IPOCrawler'); -const WalletCrawler = require('./WalletCrawler'); -const TreasureCrawler = require('./TreasureCrawler'); +const PositionCrawler = require('./PositionCrawler'); const typedefs = require("./typedefs"); const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); -const FetchCookieManager = require('./FetchCookieManager'); -const cheerio = require('cheerio'); const CeiUtils = require('./CeiUtils'); const CeiLoginService = require('./CeiLoginService'); +const AxiosWrapper = require('./AxiosWrapper'); class CeiCrawler { @@ -41,6 +36,7 @@ class CeiCrawler { this._setDefaultOptions(); this._ceiLoginService = new CeiLoginService(username, password, this.options.loginOptions); + AxiosWrapper.setup(this.options); } @@ -64,106 +60,17 @@ class CeiCrawler { if ((this.options && this.options.trace) || false) console.log(`Logging at CEI using ${this.options.loginOptions.strategy}...`); - await this._ceiLoginService.getToken(); - console.log('FOOOOOOOOOOOOOOOOOOOOOI'); - + this.options.auth = await this._ceiLoginService.getToken(); } /** * Returns the stock history - * @param {Date} [startDate] - The start date of the history - * @param {Date} [endDate] - The end date of the history + * @param {Date} [date] - The date of the position * @returns {Promise} - List of Stock histories */ - async getStockHistory(startDate, endDate) { - await this._login(); - return await StockHistoryCrawler.getStockHistory(this._cookieManager, this.options, startDate, endDate); - } - - /** - * Returns the options for the stock history - * @returns {Promise} - Options for stock history - */ - async getStockHistoryOptions() { - await this._login(); - return await StockHistoryCrawler.getStockHistoryOptions(this._cookieManager, this.options); - } - - /** - * Returns the dividends data for each account in CEI - * @param {Date} [date] - The date to get the dividends - * @returns {Promise} - Options for dividends - */ - async getDividendsOptions() { - await this._login(); - return await DividendsCrawler.getDividendsOptions(this._cookieManager, this._options); - } - - /** - * Returns the dividends data for each account in CEI - * @param {Date} [startDate] - The start date to get the IPO transactions - * @param {Date} [endDate] - The end date to get the IPO transactions - * @returns {Promise} - Options for dividends - */ - async getIPOOptions() { - await this._login(); - return await IPOCrawler.getIPOOptions(this._cookieManager, this._options); - } - - /** - * Returns the wallets for each account in CEI - * @param {Date} [date] - The date to get the wallet - * @returns {Promise} - List of available Dividends information - */ - async getWallet(date) { - await this._login(); - return await WalletCrawler.getWallet(this._cookieManager, this.options, date); - } - - /** - * Returns the options for the wallet - * @returns {Promise} - Options for wallet - */ - async getWalletOptions() { - await this._login(); - return await WalletCrawler.getWalletOptions(this._cookieManager, this._options); - } - - /** - * Returns the treasure for each account in CEI - * @param {Date} [date] - The date to get the wallet - * @returns {Promise} - List of available Treasure information - */ - async getTreasures(date) { - await this._login(); - return await TreasureCrawler.getTreasure(this._cookieManager, this.options, date); - } - - /** - * Returns the options for the treasure - * @returns {Promise} - Options for treasure - */ - async getTreasureOptions() { + async getPosition(date = new Date()) { await this._login(); - return await TreasureCrawler.getTreasureOptions(this._cookieManager, this._options); + return await PositionCrawler.getPosition(this.options, date); } } diff --git a/src/lib/CeiLoginService.js b/src/lib/CeiLoginService.js index 4e7a208..15e85e9 100644 --- a/src/lib/CeiLoginService.js +++ b/src/lib/CeiLoginService.js @@ -76,9 +76,9 @@ class CeiLoginService { } return json; }); - - console.log(JSON.stringify(sessionStorage)); + await browser.close(); + return sessionStorage; } diff --git a/src/lib/CeiUtils.js b/src/lib/CeiUtils.js index 7bc9498..e698091 100644 --- a/src/lib/CeiUtils.js +++ b/src/lib/CeiUtils.js @@ -9,6 +9,14 @@ class CeiUtils { .padStart(2, "0")}/${date.getFullYear()}`; } + /** + * Returns a date in the format yyyy-MM-dd for input at CEI + * @param {Date} date - Date to be parsed + */ + static getDateForQueryParam(date) { + return date.toISOString().slice(0,10); + } + /** * Return a date object given a date string * @param {String} dateStr Date string in dd/MM/yyyy format diff --git a/src/lib/DividendsCrawler.js b/src/lib/DividendsCrawler.js deleted file mode 100644 index 54fb12f..0000000 --- a/src/lib/DividendsCrawler.js +++ /dev/null @@ -1,350 +0,0 @@ -const typedefs = require("./typedefs"); -const CeiUtils = require('./CeiUtils'); -const FetchCookieManager = require('./FetchCookieManager'); -const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError') -const cheerio = require('cheerio'); -const normalizeWhitespace = require('normalize-html-whitespace'); - -const PAGE = { - URL: 'https://ceiapp.b3.com.br/CEI_Responsivo/ConsultarProventos.aspx', - SUBMIT_BUTTON: '#ctl00_ContentPlaceHolder1_btnConsultar', - TABLE_CLASS: '.responsive tbody', - TABLE_CLASS_ROWS: '.responsive tbody tr', - DATE_MIN_VALUE: '#ctl00_ContentPlaceHolder1_lblPeriodoInicial', - DATE_MAX_VALUE: '#ctl00_ContentPlaceHolder1_lblPeriodoFinal', - DATE_INPUT: '#ctl00_ContentPlaceHolder1_txtData', - SELECT_INSTITUTION: '#ctl00_ContentPlaceHolder1_ddlAgentes', - SELECT_INSTITUTION_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlAgentes option', - SELECT_ACCOUNT: '#ctl00_ContentPlaceHolder1_ddlContas', - SELECT_ACCOUNT_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlContas option', - PAGE_ALERT_ERROR: '.alert-box.alert', - PAGE_ALERT_SUCCESS: '.alert-box.success', - TABLE_TITLE_SELECTOR: 'p.title', - PAST_EVENTS_TITLE: 'Eventos em Dinheiro Creditado', - FUTURE_EVENTS_TITLE: 'Eventos em Dinheiro Provisionado', - SPLIT_EVENTS_TITLE: 'Eventos em Ativos Creditado' -} - -const DIVIDENDS_TABLE_HEADERS = { - stock: 'string', - stockType: 'string', - code: 'string', - date: 'date', - type: 'string', - quantity: 'int', - factor: 'int', - grossValue: 'float', - netValue: 'float' -}; - -const SPLITS_TABLE_HEADERS = { - stock: 'string', - stockType: 'string', - code: 'string', - type: 'string', - date: 'date', - baseQuantity: 'int', - factor: 'int', - destinationCode:'string', - quantity: 'int', - eventValue: 'float', - exerciseValue: 'float' -} - -const FETCH_OPTIONS = { - DIVIDENDS_INSTITUTION: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/ConsultarProventos.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - }, - DIVIDENDS_ACCOUNT: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/ConsultarProventos.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - } -}; - -const FETCH_FORMS = { - DIVIDENDS_INSTITUTION: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtData', - '__ASYNCPOST' - ], - DIVIDENDS_ACCOUNT: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtData', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - '__ASYNCPOST', - 'ctl00$ContentPlaceHolder1$btnConsultar' - ] -} - -class DividendsCrawler { - - /** - * Gets dividends data available on CEI page. - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Date} [date] - The date of the history. If none passed, the default of CEI will be used - * @returns {Promise} - List of available Dividends information - */ - static async getDividends(cookieManager, options = null, date = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const traceOperations = (options && options.trace) || false; - - const result = []; - - // Set date - if (date !== null) { - const minDateStr = domPage(PAGE.DATE_MIN_VALUE).text().trim(); - const minDate = CeiUtils.getDateFromInput(minDateStr); - - const maxDateStr = domPage(PAGE.DATE_MAX_VALUE).text().trim(); - const maxDate = CeiUtils.getDateFromInput(maxDateStr); - - // Prevent date out of bound if parameter is set - if (options.capDates && date < minDate) - date = minDate; - - if (options.capDates && date > maxDate) - date = maxDate; - - domPage(PAGE.DATE_INPUT).attr('value', CeiUtils.getDateForInput(date)); - } - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })).get() - .filter(institution => institution.value > 0); - - // Iterate over institutions, accounts, processing the stocks - for (const institution of institutions) { - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting institution ${institution.label} (${institution.value})`) - - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - - const formDataInstitution = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.DIVIDENDS_INSTITUTION, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$ddlAgentes', - __EVENTTARGET: 'ctl00$ContentPlaceHolder1$ddlAgentes' - }); - - const req = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.DIVIDENDS_INSTITUTION, - body: formDataInstitution - }); - - const reqInstitutionText = await req.text(); - const reqInstitutionDOM = cheerio.load(reqInstitutionText); - - const updtForm = CeiUtils.extractUpdateForm(reqInstitutionText); - CeiUtils.updateFieldsDOM(domPage, updtForm); - - const accounts = reqInstitutionDOM(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(account => account > 0); - - for (const account of accounts) { - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting account ${account}`); - - domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - - const { futureEvents, pastEvents, splitEvents } = await this._getDataPage(domPage, cookieManager, traceOperations); - - // Save the result - result.push({ - institution: institution.label, - account: account, - futureEvents: futureEvents, - pastEvents: pastEvents, - splitEvents: splitEvents - }); - } - } - - return result; - } - - /** - * Returns the available options to get Dividends data - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - Options to get data from dividends - */ - static async getDividendsOptions(cookieManager, options = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const minDateStr = domPage(PAGE.DATE_MIN_VALUE).text().trim(); - const maxDateStr = domPage(PAGE.DATE_MAX_VALUE).text().trim(); - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })) - .get() - .filter(institution => institution.value > 0); - - for (const institution of institutions) { - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - const formDataStr = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.DIVIDENDS_INSTITUTION); - - const getAcountsPage = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.DIVIDENDS_INSTITUTION, - body: formDataStr - }); - - const getAcountsPageTxt = await getAcountsPage.text(); - - const getAcountsPageDom = cheerio.load(getAcountsPageTxt); - - const accounts = getAcountsPageDom(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(accountId => accountId > 0); - - institution.accounts = accounts; - } - - return { - minDate: minDateStr, - maxDate: maxDateStr, - institutions: institutions - } - } - - /** - * Returns the data from the page after trying more than once - * @param {cheerio.Root} dom DOM of page - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Boolean} traceOperations - Whether to trace operations or not - */ - static async _getDataPage(dom, cookieManager, traceOperations) { - while(true) { - const formDataHistory = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.DIVIDENDS_ACCOUNT, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '', - __LASTFOCUS: '' - }); - - const dividendsRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.DIVIDENDS_ACCOUNT, - body: formDataHistory - }); - - const dividendsText = normalizeWhitespace(await dividendsRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(dividendsText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const dividendsDOM = cheerio.load(dividendsText); - - // Process the page - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing dividends data`); - - const futureEvents = this._processEvents(dividendsDOM, PAGE.FUTURE_EVENTS_TITLE); - const pastEvents = this._processEvents(dividendsDOM, PAGE.PAST_EVENTS_TITLE); - const splitEvents = this._processEvents(dividendsDOM, PAGE.SPLIT_EVENTS_TITLE); - - if (errorMessage.type !== undefined || futureEvents.length > 0 || pastEvents.length > 0 || splitEvents.length > 0) { - return { - futureEvents, - pastEvents, - splitEvents - }; - } - - const updtForm = CeiUtils.extractUpdateForm(dividendsText); - CeiUtils.updateFieldsDOM(dom, updtForm); - } - } - - /** - * Process the events given the parameters - * @param {cheerio.Root} dom DOM table stock history - * @param {String} tableTitle The title of the table to process the events - */ - static _processEvents(dom, tableTitle) { - // The header for dividends is the same, but for the split events is different - const headerKey = tableTitle === PAGE.SPLIT_EVENTS_TITLE ? SPLITS_TABLE_HEADERS : DIVIDENDS_TABLE_HEADERS; - - const headers = Object.keys(headerKey); - - const data = dom(PAGE.TABLE_TITLE_SELECTOR) - .filter((_, el) => dom(el).text().includes(tableTitle)) - .first() - .map((_, el) => dom(el).parent()) - .map((_, el) => dom(PAGE.TABLE_CLASS_ROWS, el).get()) - .map((_, tr) => dom('td', tr) - .map((_, td) => dom(td).text().trim()) - .get() - .reduce((dict, txt, idx) => { - dict[headers[idx]] = txt; - return dict; - }, {}) - ) - .get(); - - return CeiUtils.parseTableTypes(data, headerKey); - } -} - -module.exports = DividendsCrawler; \ No newline at end of file diff --git a/src/lib/FetchCookieManager.js b/src/lib/FetchCookieManager.js deleted file mode 100644 index 39fc968..0000000 --- a/src/lib/FetchCookieManager.js +++ /dev/null @@ -1,93 +0,0 @@ -const https = require('https'); -const { readFileSync } = require('fs'); -const nodeFetch = require('node-fetch'); -const AbortController = require('abort-controller'); -const tough = require('tough-cookie'); -const CeiUtils = require('./CeiUtils'); -const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); -const { time } = require('console'); - -const certs = [ - // readFileSync(__dirname + '/certificate.crt') -]; - -const agent = new https.Agent({ - ca: certs, - keepAlive: true, - rejectUnauthorized: false -}); - - -class FetchCookieManager { - /** @type {tough.CookieJar} */ - _jar = false; - _navigationTimeout = 30000; - - constructor(defaultHeaders = {}, navigationTimeout = 30000) { - this._jar = new tough.CookieJar(); - this._navigationTimeout = navigationTimeout; - this._defaultHeaders = defaultHeaders; - } - - /** - * - * @param {String} url - URL - * @param {Object} opts - fetch options - * @param {Number} fetchTimeout - fetch fetchTimeout - * @returns {Promise} - Response - */ - async fetch(url, opts = {}, fetchTimeout = null) { - const cookie = await this._jar.getCookieString(url); - - const newOpts = { - ...opts, - headers: { - ...this._defaultHeaders, - ...(opts.headers || {}), - cookie - }, - agent - } - - const response = await CeiUtils.retry( - async () => { - const controller = new AbortController(); - const timeout = setTimeout(() => { - controller.abort(); - }, fetchTimeout || this._navigationTimeout); - - let resp; - try { - resp = await nodeFetch(url, { - ...newOpts, - signal: controller.signal - }); - } catch (error) { - clearTimeout(timeout); - if (error.name === 'AbortError') - throw new CeiCrawlerError(CeiErrorTypes.NAVIGATION_TIMEOUT, `Requisição estourou o tempo limite em: ${url}`); - throw error; - } finally { - clearTimeout(timeout); - } - - return resp; - }, - e => e.type === 'system' && e.errno === 'ECONNRESET' && e.code === 'ECONNRESET' - ); - - const newCookies = response.headers.raw()['set-cookie'] || []; - - await Promise.all( - newCookies.map(newCookie => this._jar.setCookie(newCookie, response.url, { ignoreError: true })) - ); - - if (response.status === 302) { - throw new CeiCrawlerError(CeiErrorTypes.SESSION_HAS_EXPIRED, 'Sessão expirou, faça login novamente'); - } - - return response - } -}; - -module.exports = FetchCookieManager; diff --git a/src/lib/IPOCrawler.js b/src/lib/IPOCrawler.js deleted file mode 100644 index e852b71..0000000 --- a/src/lib/IPOCrawler.js +++ /dev/null @@ -1,260 +0,0 @@ -const typedefs = require("./typedefs"); -const CeiUtils = require('./CeiUtils'); -const FetchCookieManager = require('./FetchCookieManager'); -const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError') -const cheerio = require('cheerio'); -const normalizeWhitespace = require('normalize-html-whitespace'); - -const PAGE = { - URL: 'https://ceiapp.b3.com.br/CEI_Responsivo/ofertas-publicas.aspx', - SUBMIT_BUTTON: '#ctl00_ContentPlaceHolder1_btnConsultar', - TABLE_CLASS: '.responsive tbody', - TABLE_CLASS_ROWS: '.responsive tbody tr', - DATE_MIN_VALUE: '#ctl00_ContentPlaceHolder1_lblPeriodoInicial', - DATE_MAX_VALUE: '#ctl00_ContentPlaceHolder1_lblPeriodoFinal', - DATE_INPUT: '.datepicker', - SELECT_INSTITUTION: '#ctl00_ContentPlaceHolder1_ddlAgentes', - SELECT_INSTITUTION_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlAgentes option', -} - -const IPO_TABLE_HEADERS = { - company: 'string', - offerName: 'string', - code: 'string', - isin: 'string', - type: 'string', - buyMethod: 'string', - reservedAmount: 'int', - reservedValue: 'float', - maxPrice: 'float', - price: 'float', - allocAmount: 'int', - allocValue: 'float', - date: 'date' -}; - -const FETCH_OPTIONS = { - IPO_INSTITUTION: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/ofertas-publicas.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - } -}; - -const FETCH_FORMS = { - IPO_INSTITUTION: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$txtDatePickerFiltro', - '__ASYNCPOST', - 'ctl00$ContentPlaceHolder1$btnConsultar' - ] -} - -class IPOCrawler { - - /** - * Gets ipo data available on CEI page. - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Date} [startDate] - The start date of the history of ipo data. If none passed, the mininum available date will be used. - * @param {Date} [endDate] - The end date of the history of ipo data. If none passed, the maximum available date will be used. - * @returns {Promise} - List of available ipo information - */ - static async getIPOTransactions(cookieManager, options = null, startDate = null, endDate = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const traceOperations = (options && options.trace) || false; - - const result = []; - - // Set minimum and maximum date - const minDateStr = domPage(PAGE.DATE_MIN_VALUE).text().trim(); - const maxDateStr = domPage(PAGE.DATE_MAX_VALUE).text().trim(); - const minDate = CeiUtils.getDateFromInput(minDateStr); - const maxDate = CeiUtils.getDateFromInput(maxDateStr); - - startDate = startDate || minDate; - // Prevent date out of bound if parameter is set - if (startDate < minDate) - startDate = minDate; - - endDate = endDate || maxDate; - // Prevent date out of bound if parameter is set - if (endDate > maxDate) - endDate = maxDate; - - // Iterate over the range of dates and fetch the IPO transactions - for (let date = new Date(startDate); date <= endDate; date.setDate(date.getDate() + 1)) - { - if (traceOperations) - console.log(`Fetching operations from ${date}...`); - let dateResult = await this._getIPOTransactions(cookieManager, options, date); - dateResult.forEach(el => result.push(el)); - } - - return result; - } - - /** - * Gets ipo data available on CEI page. - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Date} [date] - The date of the IPO transactions. - * @returns {Promise} - List of available ipo transactions. - */ - static async _getIPOTransactions(cookieManager, options, date) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const traceOperations = (options && options.trace) || false; - - const result = []; - - // Set date - domPage(PAGE.DATE_INPUT).attr('value', CeiUtils.getDateForInput(date)); - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })).get() - .filter(institution => institution.value > 0); - - // Iterate over institutions, processing the transactions - for (const institution of institutions) { - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting data of institution ${institution.label} (${institution.value}) in date ${date}`); - - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - - const formDataInstitution = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.IPO_INSTITUTION, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$ddlAgentes', - __EVENTTARGET: 'ctl00$ContentPlaceHolder1$ddlAgentes', - ctl00$ContentPlaceHolder1$btnConsultar: 'Consultar' - }); - - const req = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.IPO_INSTITUTION, - body: formDataInstitution - }); - - const transactions = await this._getDataPage(req, cookieManager, traceOperations); - result.push( - { - institution : institution.label, - date : new Date(date.setHours(12, 0, 0, 0)), - transactions : transactions - } - ); - } - - return result; - } - - /** - * Returns the available options to get ipo data - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - Options to get data from ipo - */ - static async getIPOOptions(cookieManager, options = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const minDateStr = domPage(PAGE.DATE_MIN_VALUE).text().trim(); - const maxDateStr = domPage(PAGE.DATE_MAX_VALUE).text().trim(); - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })) - .get() - .filter(institution => institution.value > 0); - - return { - minDate: minDateStr, - maxDate: maxDateStr, - institutions: institutions - } - } - - /** - * Returns the data from the page after trying more than once - * @param {cheerio.Root} dom DOM of page - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Boolean} traceOperations - Whether to trace operations or not - */ - static async _getDataPage(req, cookieManager, traceOperations) { - while(true) { - - const domText = normalizeWhitespace(await req.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(domText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - if( errorMessage.message === 'Não foram encontrados resultados para esta pesquisa.') - { - return []; - } - - const dom = cheerio.load(domText); - - // Process the page - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing ipo transactions`); - - return this._processTable(dom); - - } - } - - /** - * Process the table given the parameters - * @param {cheerio.Root} dom DOM table of ipo transactions - */ - static _processTable(dom) { - const headers = Object.keys(IPO_TABLE_HEADERS); - - const data = dom(PAGE.TABLE_CLASS_ROWS).get() - .map((tr) => dom('td', tr).get() - .map((td) => dom(td).text().trim()) - .reduce((dict, txt, idx) => { - dict[headers[idx]] = txt; - return dict; - }, {}) - ); - - return CeiUtils.parseTableTypes(data, IPO_TABLE_HEADERS); - } -} - -module.exports = IPOCrawler; \ No newline at end of file diff --git a/src/lib/LastExecutionCrawler.js b/src/lib/LastExecutionCrawler.js new file mode 100644 index 0000000..a5a220d --- /dev/null +++ b/src/lib/LastExecutionCrawler.js @@ -0,0 +1,5 @@ + + +const URLS = { + LAST_EXECUTION: 'https://investidor.b3.com.br/api/v1/sistema/carga/ultima-execucao' +} \ No newline at end of file diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js new file mode 100644 index 0000000..db13234 --- /dev/null +++ b/src/lib/PositionCrawler.js @@ -0,0 +1,37 @@ +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils'); +const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); +const AxiosWrapper = require('./AxiosWrapper'); + +const URLS = { + GET_DATA: 'https://investidor.b3.com.br/api/extrato/v1/posicao/:page' +}; + +class PositionCrawler { + + /** + * Get data from the position screen + * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler + * @param {Date} [date] - The date of the wallet. If none passed, the default of CEI will be used + * @returns {Promise} - List of Stock histories + */ + static async getPosition(options = null, date = new Date()) { + const traceOperations = (options && options.trace) || false; + + const dateStr = CeiUtils.getDateForQueryParam(date); + try { + const response = await AxiosWrapper.request(URLS.GET_DATA, { + data: dateStr + }, { + page: 1 + }); + + console.log(JSON.stringify(response.data)); + } catch(e) { + console.log('ERROR ON AXIOS: ' + e.message); + throw e; + } + } +} + +module.exports = PositionCrawler; \ No newline at end of file diff --git a/src/lib/StockHistoryCrawler.js b/src/lib/StockHistoryCrawler.js deleted file mode 100644 index fccb761..0000000 --- a/src/lib/StockHistoryCrawler.js +++ /dev/null @@ -1,347 +0,0 @@ -const typedefs = require("./typedefs"); -const CeiUtils = require('./CeiUtils'); -const FetchCookieManager = require('./FetchCookieManager'); -const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError') -const cheerio = require('cheerio'); -const normalizeWhitespace = require('normalize-html-whitespace'); - -const PAGE = { - URL: 'https://ceiapp.b3.com.br/CEI_Responsivo/negociacao-de-ativos.aspx', - SELECT_INSTITUTION: '#ctl00_ContentPlaceHolder1_ddlAgentes', - SELECT_INSTITUTION_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlAgentes option', - SELECT_ACCOUNT: '#ctl00_ContentPlaceHolder1_ddlContas', - SELECT_ACCOUNT_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlContas option', - START_DATE_INPUT: '#ctl00_ContentPlaceHolder1_txtDataDeBolsa', - END_DATE_INPUT: '#ctl00_ContentPlaceHolder1_txtDataAteBolsa', - ALERT_BOX: '.alert-box', - SUBMIT_BUTTON: '#ctl00_ContentPlaceHolder1_btnConsultar', - STOCKS_DIV: '#ctl00_ContentPlaceHolder1_rptAgenteBolsa_ctl00_rptContaBolsa_ctl00_pnAtivosNegociados', - STOCKS_TABLE: '#ctl00_ContentPlaceHolder1_rptAgenteBolsa_ctl00_rptContaBolsa_ctl00_pnAtivosNegociados table tbody', - STOCKS_TABLE_ROWS: '#ctl00_ContentPlaceHolder1_rptAgenteBolsa_ctl00_rptContaBolsa_ctl00_pnAtivosNegociados table tbody tr', - PAGE_ALERT_ERROR: '.alert-box.alert', - PAGE_ALERT_SUCCESS: '.alert-box.success' -} - -const STOCK_TABLE_HEADERS = { - date: 'date', - operation: 'string', - market: 'string', - expiration: 'string', - code: 'string', - name: 'string', - quantity: 'int', - price: 'float', - totalValue: 'float', - quotationFactor: 'float' -}; - -const SUMMARY_STOCK_TABLE_HEADERS = { - code: 'string', - period: 'string', - buyAmount: 'int', - saleAmount: 'int', - averageBuyPrice: 'float', - averageSalePrice: 'float', - quantityNet: 'int', - position: 'string', -}; - -const FETCH_OPTIONS = { - STOCK_HISTORY_INSTITUTION: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest", - "Connection": "keep-alive" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/negociacao-de-ativos.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - }, - STOCK_HISTORY_ACCOUNT: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/negociacao-de-ativos.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - } -}; - -const FETCH_FORMS = { - STOCK_HISTORY_INSTITUTION: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - 'ctl00$ContentPlaceHolder1$hdnPDF_EXCEL', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtDataDeBolsa', - 'ctl00$ContentPlaceHolder1$txtDataAteBolsa', - '__ASYNCPOST' - ], - STOCK_HISTORY_ACCOUNT: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - 'ctl00$ContentPlaceHolder1$hdnPDF_EXCEL', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtDataDeBolsa', - 'ctl00$ContentPlaceHolder1$txtDataAteBolsa', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - '__ASYNCPOST', - 'ctl00$ContentPlaceHolder1$btnConsultar' - ] -} - -const ALERT_VALIDATION = { - HISTORY_ACCOUNT: 'CEIWeb.IncluirMensagem' -} - -class StockHistoryCrawler { - /** - * Get the stock history from CEI - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @param {Date} [startDate] - The start date of the history. If none passed, the default of CEI will be used - * @param {Date} [endDate] - The end date of the history. If none passed, the default of CEI will be used - * @returns {Promise} - List of Stock histories - */ - static async getStockHistory(cookieManager, options = null, startDate = null, endDate = null) { - const traceOperations = (options && options.trace) || false; - - const result = []; - - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - if (startDate !== null) { - const minDateStr = domPage(PAGE.START_DATE_INPUT).attr('value'); - const minDate = CeiUtils.getDateFromInput(minDateStr); - - // Prevent date out of bound if parameter is set - if (options.capDates && startDate < minDate) - startDate = minDate; - - domPage(PAGE.START_DATE_INPUT).attr('value', CeiUtils.getDateForInput(startDate)); - } - - if (endDate !== null) { - const maxDateStr = domPage(PAGE.END_DATE_INPUT).attr('value'); - const maxDate = CeiUtils.getDateFromInput(maxDateStr); - - // Prevent date out of bound if parameter is set - if (options.capDates && endDate > maxDate) - endDate = maxDate; - - domPage(PAGE.END_DATE_INPUT).attr('value', CeiUtils.getDateForInput(endDate)); - } - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })).get() - .filter(institution => institution.value > 0); - - // Iterate over institutions, accounts, processing the stocks - for (const institution of institutions) { - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting institution ${institution.label} (${institution.value})`); - - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - - const formDataInstitution = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.STOCK_HISTORY_INSTITUTION, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$ddlAgentes', - __EVENTTARGET: 'ctl00$ContentPlaceHolder1$ddlAgentes' - }); - - const req = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.STOCK_HISTORY_INSTITUTION, - body: formDataInstitution - }); - - const reqInstitutionText = await req.text(); - const reqInstitutionDOM = cheerio.load(reqInstitutionText); - - const updtForm = CeiUtils.extractUpdateForm(reqInstitutionText); - CeiUtils.updateFieldsDOM(domPage, updtForm); - - const accounts = reqInstitutionDOM(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(account => account > 0); - - for (const account of accounts) { - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting account ${account}`); - - domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - - const stockHistory = await this._getDataPage(domPage, cookieManager, traceOperations); - - /* istanbul ignore next */ - if (traceOperations) { - console.log (`Found ${stockHistory.length} stockHistory operations`); - } - - // Save the result - result.push({ - institution: institution.label, - account: account, - stockHistory - }); - } - } - - return result; - } - - /** - * Returns the available options to get Stock History data - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - Options to get data from stock history - */ - static async getStockHistoryOptions(cookieManager, options = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const minDateStr = domPage(PAGE.START_DATE_INPUT).attr('value'); - const maxDateStr = domPage(PAGE.END_DATE_INPUT).attr('value'); - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })).get() - .filter(institution => institution.value > 0); - - for (const institution of institutions) { - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - const formDataStr = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.STOCK_HISTORY_INSTITUTION); - - const getAcountsPage = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.STOCK_HISTORY_INSTITUTION, - body: formDataStr - }); - - const getAcountsPageTxt = await getAcountsPage.text(); - - const getAcountsPageDom = cheerio.load(getAcountsPageTxt); - - const accounts = getAcountsPageDom(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(accountId => accountId > 0); - - institution.accounts = accounts; - } - - return { - minDate: minDateStr, - maxDate: maxDateStr, - institutions: institutions - } - } - - /** - * Returns the data from the page after trying more than once - * @param {cheerio.Root} dom DOM of page - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Boolean} traceOperations - Whether to trace operations or not - */ - static async _getDataPage(dom, cookieManager, traceOperations) { - while(true) { - const formDataHistory = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.STOCK_HISTORY_ACCOUNT, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '' - }); - - const historyRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.STOCK_HISTORY_ACCOUNT, - body: formDataHistory - }); - - const historyText = normalizeWhitespace(await historyRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(historyText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const historyDOM = cheerio.load(historyText); - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing stock history data`); - - const stockHistory = this._processStockHistory(historyDOM); - - if (errorMessage.type !== undefined || stockHistory.length > 0) { - return stockHistory; - } - - const updtForm = CeiUtils.extractUpdateForm(historyText); - CeiUtils.updateFieldsDOM(dom, updtForm); - } - } - - /** - * Process the stock history to a DTO - * @param {cheerio.Root} dom DOM table stock history - * @param {boolean} isSummary Get Summary Stock History - */ - static _processStockHistory(dom) { - const tableHeaders = STOCK_TABLE_HEADERS; - const tableRowsSelector = PAGE.STOCKS_TABLE_ROWS; - const headers = Object.keys(tableHeaders); - - const data = dom(tableRowsSelector) - .map((_, tr) => dom('td', tr) - .map((_, td) => dom(td).text().trim()) - .get() - .reduce((dict, txt, idx) => { - dict[headers[idx]] = txt; - return dict; - }, {}) - ).get(); - - return CeiUtils.parseTableTypes(data, tableHeaders); - } - -} - -module.exports = StockHistoryCrawler; diff --git a/src/lib/TreasureCrawler.js b/src/lib/TreasureCrawler.js deleted file mode 100644 index 021f8b5..0000000 --- a/src/lib/TreasureCrawler.js +++ /dev/null @@ -1,434 +0,0 @@ -const typedefs = require("./typedefs"); -const CeiUtils = require('./CeiUtils'); -const FetchCookieManager = require('./FetchCookieManager'); -const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); -const cheerio = require('cheerio'); -const normalizeWhitespace = require('normalize-html-whitespace'); - -const PAGE = { - URL: 'https://ceiapp.b3.com.br/CEI_Responsivo/extrato-tesouro-direto.aspx', - SELECT_INSTITUTION: '#ctl00_ContentPlaceHolder1_ddlAgentes', - SELECT_INSTITUTION_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlAgentes option', - SELECT_ACCOUNT: '#ctl00_ContentPlaceHolder1_ddlContas', - SELECT_ACCOUNT_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlContas option', - DATE_INPUT: '#ctl00_ContentPlaceHolder1_txtDatePickerFiltro', - ALERT_BOX: '.alert-box', - SUBMIT_BUTTON: '#ctl00_ContentPlaceHolder1_btnConsultar', - AGENT_TITLE: '#ctl00_ContentPlaceHolder1_lblTituloAgente', - TREASURE_TABLE: 'table', - TREASURE_TABLE_BODY: 'table tbody', - TREASURE_TABLE_BODY_ROWS: 'table tbody tr', - TREASURE_DETAIL_TABLE: '.reveal-modal table', - TREASURE_DETAIL_TABLE_BODY: '.reveal-modal table tbody', - TREASURE_DETAIL_TABLE_BODY_ROWS: '.reveal-modal table tbody tr', - RESULT_FOOTER_TREASURE: 'table tfoot', - RESULT_FOOTER_TREASURE_DETAIL: '.reveal-modal table tfoot', - PAGE_ALERT_ERROR: '.alert-box.alert', - PAGE_ALERT_SUCCESS: '.alert-box.success' -}; - -const TREASURE_TABLE_HEADER = { - code: 'string', - expirationDate: 'date', - investedValue: 'float', - grossValue: 'float', - netValue: 'float', - quantity: 'float', - blocked: 'float' -}; - -const TREASURE_DETAIL_TABLE_HEADER = { - tradeDate: 'date', - quantity: 'float', - price: 'float', - notional: 'float', - profitability: 'string', - grossProfitability: 'string', - grossProfitabilityPercent: 'float', - grossValue: 'float', - investmentTerm: 'float', - taxBracket: 'float', - taxIrValue: 'float', - taxIofValue: 'float', - feeB3Value: 'float', - feeInstitutionValue: 'float', - netValue: 'float', -}; - -const FETCH_OPTIONS = { - TREASURE_INSTITUTION: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/extrato-tesouro-direto.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - }, - TREASURE_ACCOUNT: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/extrato-tesouro-direto.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - }, - TREASURE_DETAIL: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/extrato-tesouro-direto.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - } -}; - -const FETCH_FORMS = { - TREASURE_INSTITUTION: [ - 'ctl00$ContentPlaceHolder1$smAlgumaCoisa', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$txtDatePickerFiltro', - '__ASYNCPOST' - ], - TREASURE_ACCOUNT: [ - 'ctl00$ContentPlaceHolder1$smAlgumaCoisa', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtDatePickerFiltro', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - '__ASYNCPOST', - 'ctl00$ContentPlaceHolder1$btnConsultar' - ], - TREASURE_DETAIL: [ - 'ctl00$ContentPlaceHolder1$smAlgumaCoisa', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtDatePickerFiltro', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - '__ASYNCPOST' - ] -}; - -class TreasureCrawler { - - /** - * Get the treasure data from CEI - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @param {Date} [date] - The date of the treasure. If none passed, the default of CEI will be used - * @returns {Promise} - List of treasures - */ - static async getTreasure(cookieManager, options = null, date = null) { - const traceOperations = (options && options.trace) || false; - - const result = []; - - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - // Set date - if (date !== null) { - /* istanbul ignore next */ - const maxDate = new Date(); - maxDate.setDate(maxDate.getDate() - 1); - - if (options.capDates && date > maxDate) { - date = maxDate; - } - - domPage(PAGE.DATE_INPUT).attr('value', CeiUtils.getDateForInput(date)); - } - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })).get() - .filter(institution => institution.value > 0); - - for (const institution of institutions) { - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting institution ${institution.label} (${institution.value})`) - - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - - const formDataInstitution = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.TREASURE_INSTITUTION, { - ctl00$ContentPlaceHolder1$smAlgumaCoisa: 'ctl00$ContentPlaceHolder1$pnlPanel|ctl00$ContentPlaceHolder1$ddlAgentes', - __EVENTTARGET: 'ctl00$ContentPlaceHolder1$ddlAgentes' - }); - - const req = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.TREASURE_INSTITUTION, - body: formDataInstitution - }); - - const reqInstitutionText = await req.text(); - const reqInstitutionDOM = cheerio.load(reqInstitutionText); - - const updtForm = CeiUtils.extractUpdateForm(reqInstitutionText); - CeiUtils.updateFieldsDOM(domPage, updtForm); - - const accounts = reqInstitutionDOM(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(account => account > 0); - - for (const account of accounts) { - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting account ${account}`); - - domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - - const treasures = await this._getDataPage(domPage, cookieManager, traceOperations); - - const updtForm = CeiUtils.extractUpdateForm(reqInstitutionText); - CeiUtils.updateFieldsDOM(domPage, updtForm); - - // Save the result - result.push({ - institution: institution.label, - account: account, - treasures, - }); - } - } - - return result; - } - - /** - * Returns the available options to get Treasure data - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise - Options to get data from treasure - */ - static async getTreasureOptions(cookieManager, options = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })) - .get() - .filter(institution => institution.value > 0); - - for (const institution of institutions) { - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - const formDataStr = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.TREASURE_INSTITUTION); - - const getAcountsPage = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.TREASURE_INSTITUTION, - body: formDataStr - }); - - const getAcountsPageTxt = await getAcountsPage.text(); - - const getAcountsPageDom = cheerio.load(getAcountsPageTxt); - - const accounts = getAcountsPageDom(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(accountId => accountId > 0); - - institution.accounts = accounts; - } - - return { - institutions: institutions - } - } - - /** - * Returns the data from the page after trying more than once - * @param {cheerio.Root} dom DOM of page - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Boolean} traceOperations - Whether to trace operations or not - */ - static async _getDataPage(dom, cookieManager, traceOperations) { - while(true) { - const formDataWallet = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.TREASURE_ACCOUNT, { - ctl00$ContentPlaceHolder1$smAlgumaCoisa: 'ctl00$ContentPlaceHolder1$pnlPanel|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '' - }); - - const treasureRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.TREASURE_ACCOUNT, - body: formDataWallet - }); - - const treasureText = normalizeWhitespace(await treasureRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(treasureText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const treasureDOM = cheerio.load(treasureText); - - const updtForm = CeiUtils.extractUpdateForm(treasureText); - CeiUtils.updateFieldsDOM(dom, updtForm); - - // Process the page - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing treasure data`); - - if (errorMessage.type !== undefined || this._hasLoadedData(treasureDOM, PAGE.RESULT_FOOTER_TREASURE)) { - const treasures = this._processTableData(treasureDOM, TREASURE_TABLE_HEADER, PAGE.TREASURE_TABLE_BODY_ROWS); - return await this._getDataPageDetail(dom, cookieManager, traceOperations, treasures); - } - - if(this._hasEmptyData(treasureDOM)) { - return []; - } - } - } - - /** - * Returns the data from the modal page after trying more than once - * @param {cheerio.Root} dom DOM of page - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Boolean} traceOperations - Whether to trace operations or not - * @param {TreasureTransactionItem[]} treasures - List of treasures - */ - static async _getDataPageDetail(dom, cookieManager, traceOperations, treasures) { - for(const row in treasures) { - const treasure = treasures[row]; - const key = String(+row + 1).padStart(2, '0'); - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Process treasure detail data for ${treasure.code}`); - - dom('#__EVENTTARGET') - .attr('value', `ctl00$ContentPlaceHolder1$repTabela$ctl${key}$LinkButton2`); - - dom('#__EVENTARGUMENT') - .attr('value', ''); - - dom('#__LASTFOCUS') - .attr('value', ''); - - const formDataTreasureDetail = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.TREASURE_DETAIL, { - ctl00$ContentPlaceHolder1$smAlgumaCoisa: `ctl00$ContentPlaceHolder1$pnlPanel|ctl00$ContentPlaceHolder1$repTabela$ctl${key}$LinkButton2`, - }); - - const treasureDetailRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.TREASURE_DETAIL, - body: formDataTreasureDetail - }); - - const treasureDetailText = await treasureDetailRequest.text(); - const errorMessage = CeiUtils.extractMessagePostResponse(treasureDetailText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const treasureDetailDOM = cheerio.load(treasureDetailText); - - treasures[row].transactions = []; - if (errorMessage.type !== undefined || this._hasLoadedData(treasureDetailDOM, PAGE.RESULT_FOOTER_TREASURE_DETAIL)) { - treasures[row].transactions = this._processTableData(treasureDetailDOM, TREASURE_DETAIL_TABLE_HEADER, PAGE.TREASURE_DETAIL_TABLE_BODY_ROWS); - } - } - - return treasures; - } - - /** - * Process the treasure table to a DTO - * @param {cheerio.Root} dom DOM table stock history - * @param {Array} header List of fields in table header - * @param {String} rows Name of element for table rows - */ - static _processTableData(dom, header, rows) { - const headers = Object.keys(header); - - const data = dom(rows) - .map((_, tr) => dom('td', tr) - .map((_, td) => dom(td).text().trim()) - .get() - .reduce((dict, txt, idx) => { - dict[headers[idx]] = txt; - return dict; - }, {}) - ).get(); - - return CeiUtils.parseTableTypes(data, header); - } - - /** - * Check wheter the table was rendered on the screen to stop trying to get data - * @param {cheerio.Root} dom DOM table treasure - * @param {String} field Name of element for check if the result is rendered - */ - static _hasLoadedData(dom, field) { - const query = dom(field); - return query.length > 0; - } - - /** - * Check if the result is empty - * @param {cheerio.Root} dom DOM table treasure - */ - static _hasEmptyData(dom) { - const query = dom(PAGE.AGENT_TITLE).text().trim(); - return query === ''; - } - -} - -module.exports = TreasureCrawler; diff --git a/src/lib/WalletCrawler.js b/src/lib/WalletCrawler.js deleted file mode 100644 index 8276266..0000000 --- a/src/lib/WalletCrawler.js +++ /dev/null @@ -1,374 +0,0 @@ -const typedefs = require("./typedefs"); -const CeiUtils = require('./CeiUtils'); -const FetchCookieManager = require('./FetchCookieManager'); -const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError') -const cheerio = require('cheerio'); -const normalizeWhitespace = require('normalize-html-whitespace'); - -const PAGE = { - URL: 'https://ceiapp.b3.com.br/CEI_Responsivo/ConsultarCarteiraAtivos.aspx', - SELECT_INSTITUTION: '#ctl00_ContentPlaceHolder1_ddlAgentes', - SELECT_INSTITUTION_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlAgentes option', - SELECT_ACCOUNT: '#ctl00_ContentPlaceHolder1_ddlContas', - SELECT_ACCOUNT_OPTIONS: '#ctl00_ContentPlaceHolder1_ddlContas option', - DATE_INPUT: '#ctl00_ContentPlaceHolder1_txtData', - DATE_MIN_VALUE: '#ctl00_ContentPlaceHolder1_lblPeriodoInicial', - DATE_MAX_VALUE: '#ctl00_ContentPlaceHolder1_lblPeriodoFinal', - ALERT_BOX: '.alert-box', - SUBMIT_BUTTON: '#ctl00_ContentPlaceHolder1_btnConsultar', - STOCK_WALLET_TABLE: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_rprCarteira_ctl00_grdCarteira', - STOCK_WALLET_TABLE_BODY: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_rprCarteira_ctl00_grdCarteira tbody', - STOCK_WALLET_TABLE_BODY_ROWS: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_rprCarteira_ctl00_grdCarteira tbody tr', - STOCK_GUARANTEES_WALLET_TABLE_BODY_ROWS: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_rprCarteira_ctl01_grdCarteira tbody tr', - TREASURE_WALLET_TABLE: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_trBodyTesouroDireto', - TREASURE_WALLET_TABLE_BODY: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_trBodyTesouroDireto tbody', - TREASURE_WALLET_TABLE_BODY_ROWS: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_trBodyTesouroDireto tbody tr', - RESULT_FOOTER_100: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl00_divTotalCarteira', - RESULT_FOOTER_101: '#ctl00_ContentPlaceHolder1_rptAgenteContaMercado_ctl00_rptContaMercado_ctl01_divTotalCarteira', - PAGE_ALERT_ERROR: '.alert-box.alert', - PAGE_ALERT_SUCCESS: '.alert-box.success' -} - -const STOCK_WALLET_TABLE_HEADER = { - company: 'string', - stockType: 'string', - code: 'string', - isin: 'string', - price: 'float', - quantity: 'int', - quotationFactor: 'float', - totalValue: 'float' -}; - -const TREASURE_WALLET_TABLE_HEADER = { - code: 'string', - expirationDate: 'date', - investedValue: 'float', - grossValue: 'float', - netValue: 'float', - quantity: 'float', - blocked: 'float' -}; - -const FETCH_OPTIONS = { - WALLET_INSTITUTION: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/ConsultarCarteiraAtivos.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - }, - WALLET_ACCOUNT: { - "headers": { - "accept": "*/*", - "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "cache-control": "no-cache", - "content-type": "application/x-www-form-urlencoded; charset=UTF-8", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-microsoftajax": "Delta=true", - "x-requested-with": "XMLHttpRequest" - }, - "referrer": "https://ceiapp.b3.com.br/CEI_Responsivo/ConsultarCarteiraAtivos.aspx", - "referrerPolicy": "strict-origin-when-cross-origin", - "body": null, - "method": "POST", - "mode": "cors", - "credentials": "include" - } -}; - -const FETCH_FORMS = { - WALLET_INSTITUTION: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtData', - '__ASYNCPOST' - ], - WALLET_ACCOUNT: [ - 'ctl00$ContentPlaceHolder1$ToolkitScriptManager1', - 'ctl00_ContentPlaceHolder1_ToolkitScriptManager1_HiddenField', - 'ctl00$ContentPlaceHolder1$ddlAgentes', - 'ctl00$ContentPlaceHolder1$ddlContas', - 'ctl00$ContentPlaceHolder1$txtData', - '__EVENTTARGET', - '__EVENTARGUMENT', - '__LASTFOCUS', - '__VIEWSTATE', - '__VIEWSTATEGENERATOR', - '__EVENTVALIDATION', - '__ASYNCPOST', - 'ctl00$ContentPlaceHolder1$btnConsultar' - ] -} - -class WalletCrawler { - - /** - * Get the wallet data from CEI - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @param {Date} [date] - The date of the wallet. If none passed, the default of CEI will be used - * @returns {Promise} - List of Stock histories - */ - static async getWallet(cookieManager, options = null, date = null) { - const traceOperations = (options && options.trace) || false; - - const result = []; - - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - // Set date - if (date !== null) { - /* istanbul ignore next */ - const minDateStr = domPage(PAGE.DATE_MIN_VALUE).text().trim(); - const minDate = CeiUtils.getDateFromInput(minDateStr); - - /* istanbul ignore next */ - const maxDateStr = domPage(PAGE.DATE_MAX_VALUE).text().trim(); - const maxDate = CeiUtils.getDateFromInput(maxDateStr); - - // Prevent date out of bound if parameter is set - if (options.capDates && date < minDate) { - date = minDate; - } - - if (options.capDates && date > maxDate) { - date = maxDate; - } - domPage(PAGE.DATE_INPUT).attr('value', CeiUtils.getDateForInput(date)); - } - - // Get all institutions to iterate - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })).get() - .filter(institution => institution.value > 0); - - for (const institution of institutions) { - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting institution ${institution.label} (${institution.value})`) - - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - - const formDataInstitution = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.WALLET_INSTITUTION, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$ddlAgentes', - __EVENTTARGET: 'ctl00$ContentPlaceHolder1$ddlAgentes' - }); - - const req = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.WALLET_INSTITUTION, - body: formDataInstitution - }); - - const reqInstitutionText = await req.text(); - const reqInstitutionDOM = cheerio.load(reqInstitutionText); - - const updtForm = CeiUtils.extractUpdateForm(reqInstitutionText); - CeiUtils.updateFieldsDOM(domPage, updtForm); - - const accounts = reqInstitutionDOM(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(account => account > 0); - - for (const account of accounts) { - /* istanbul ignore next */ - if (traceOperations) - console.log(`Selecting account ${account}`); - - domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - - const { stockWallet, stockGuaranteesWallet, nationalTreasuryWallet } = await this._getDataPage(domPage, cookieManager, traceOperations); - - // Save the result - result.push({ - institution: institution.label, - account: account, - stockWallet: stockWallet, - stockGuaranteesWallet: stockGuaranteesWallet, - nationalTreasuryWallet: nationalTreasuryWallet - }); - } - } - - return result; - } - - /** - * Returns the available options to get Wallet data - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise - Options to get data from wallet - */ - static async getWalletOptions(cookieManager, options = null) { - const getPage = await cookieManager.fetch(PAGE.URL); - const domPage = cheerio.load(await getPage.text()); - - - const minDateStr = domPage(PAGE.DATE_MIN_VALUE).text().trim(); - const maxDateStr = domPage(PAGE.DATE_MAX_VALUE).text().trim(); - - const institutions = domPage(PAGE.SELECT_INSTITUTION_OPTIONS) - .map((_, option) => ({ - value: option.attribs.value, - label: domPage(option).text() - })) - .get() - .filter(institution => institution.value > 0); - - for (const institution of institutions) { - domPage(PAGE.SELECT_INSTITUTION).attr('value', institution.value); - const formDataStr = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.WALLET_INSTITUTION); - - const getAcountsPage = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.WALLET_INSTITUTION, - body: formDataStr - }); - - const getAcountsPageTxt = await getAcountsPage.text(); - - const getAcountsPageDom = cheerio.load(getAcountsPageTxt); - - const accounts = getAcountsPageDom(PAGE.SELECT_ACCOUNT_OPTIONS) - .map((_, option) => option.attribs.value).get() - .filter(accountId => accountId > 0); - - institution.accounts = accounts; - } - - return { - minDate: minDateStr, - maxDate: maxDateStr, - institutions: institutions - } - } - - /** - * Returns the data from the page after trying more than once - * @param {cheerio.Root} dom DOM of page - * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with - * @param {Boolean} traceOperations - Whether to trace operations or not - */ - static async _getDataPage(dom, cookieManager, traceOperations) { - while(true) { - const formDataWallet = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.WALLET_ACCOUNT, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '', - __LASTFOCUS: '' - }); - - const walletRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.WALLET_ACCOUNT, - body: formDataWallet - }); - - const walletText = normalizeWhitespace(await walletRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(walletText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const walletDOM = cheerio.load(walletText); - - // Process the page - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing wallet data`); - - const stockWallet = this._processStockWallet(walletDOM, PAGE.STOCK_WALLET_TABLE_BODY_ROWS); - const stockGuaranteesWallet = this._processStockWallet(walletDOM, PAGE.STOCK_GUARANTEES_WALLET_TABLE_BODY_ROWS); - const nationalTreasuryWallet = this._processNationalTreasuryWallet(walletDOM); - - if (errorMessage.type !== undefined || this._hasLoadedData(walletDOM)) { - return { - stockWallet, - stockGuaranteesWallet, - nationalTreasuryWallet - }; - } - - const updtForm = CeiUtils.extractUpdateForm(walletText); - CeiUtils.updateFieldsDOM(dom, updtForm); - } - } - - /** - * Process the stock wallet to a DTO - * @param {cheerio.Root} dom DOM table stock history - * @param {string} table selector - */ - static _processStockWallet(dom, tableSelector) { - const headers = Object.keys(STOCK_WALLET_TABLE_HEADER); - - const data = dom(tableSelector) - .map((_, tr) => dom('td', tr) - .map((_, td) => dom(td).text().trim()) - .get() - .reduce((dict, txt, idx) => { - dict[headers[idx]] = txt; - return dict; - }, {}) - ).get(); - - return CeiUtils.parseTableTypes(data, STOCK_WALLET_TABLE_HEADER); - } - - /** - * Process the stock wallet to a DTO - * @param {cheerio.Root} dom DOM table stock history - */ - static _processNationalTreasuryWallet(dom) { - const headers = Object.keys(TREASURE_WALLET_TABLE_HEADER); - - const data = dom(PAGE.TREASURE_WALLET_TABLE_BODY_ROWS) - .map((_, tr) => dom('td', tr) - .map((_, td) => dom(td).text().trim()) - .get() - .reduce((dict, txt, idx) => { - dict[headers[idx]] = txt; - return dict; - }, {}) - ).get(); - - return CeiUtils.parseTableTypes(data, TREASURE_WALLET_TABLE_HEADER); - } - - /** - * Check wheter the table was rendered on the screen to stop trying to get data - * @param {cheerio.Root} dom DOM table stock history - */ - static _hasLoadedData(dom) { - const query = dom(`${PAGE.RESULT_FOOTER_100}, ${PAGE.RESULT_FOOTER_101}`); - return query.length > 0; - } - -} - -module.exports = WalletCrawler; \ No newline at end of file diff --git a/src/lib/typedefs.js b/src/lib/typedefs.js index 196a4fa..17560d9 100644 --- a/src/lib/typedefs.js +++ b/src/lib/typedefs.js @@ -9,12 +9,19 @@ * @property {String} browserPath - Path of the browser to run puppeteer */ +/** + * @typedef CeiAuth + * @property {String} token - Bearer token used in CEI + * @property {String} cache-guid - Cache GUID for the requests + */ + /** * @typedef CeiCrawlerOptions * @property {boolean} trace - Indicates if it should print trace messages. Helpful for debugging. * @property {boolean} capDates - Prevent crawling with an invalid date in CEI * @property {Number} navigationTimeout - Fetch timeout * @property {LoginOptions} loginOptions - The strategy the crawler will use to make the login. Options are: `user-input` + * @property {CeiAuth} auth - Auth logged info * @memberof typdefs */ From 03aa90bbc7ef7b079cc38fb161bf5fbf9d3566a3 Mon Sep 17 00:00:00 2001 From: Menighin Date: Tue, 27 Jul 2021 23:28:13 -0300 Subject: [PATCH 03/17] Crawling last execution --- src/lib/CeiCrawler.js | 4 ++++ src/lib/LastExecutionCrawler.js | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 4cbee67..2e6e4e8 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -1,4 +1,5 @@ const PositionCrawler = require('./PositionCrawler'); +const LastExecutionCrawler = require('./LastExecutionCrawler'); const typedefs = require("./typedefs"); const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); const CeiUtils = require('./CeiUtils'); @@ -61,6 +62,9 @@ class CeiCrawler { console.log(`Logging at CEI using ${this.options.loginOptions.strategy}...`); this.options.auth = await this._ceiLoginService.getToken(); + this.options.lastExecutionInfo = await LastExecutionCrawler.getLastExecutionInfo(); + + console.log(JSON.stringify(this.options)); } /** diff --git a/src/lib/LastExecutionCrawler.js b/src/lib/LastExecutionCrawler.js index a5a220d..711c138 100644 --- a/src/lib/LastExecutionCrawler.js +++ b/src/lib/LastExecutionCrawler.js @@ -1,5 +1,21 @@ - +const AxiosWrapper = require('./AxiosWrapper'); const URLS = { LAST_EXECUTION: 'https://investidor.b3.com.br/api/v1/sistema/carga/ultima-execucao' -} \ No newline at end of file +}; + + +class LastExecutionCrawler { + + static async getLastExecutionInfo() { + const data = (await AxiosWrapper.request(URLS.LAST_EXECUTION)).data; + return { + generalDate: new Date(data.dataGeral), + stockDate: new Date(data.dataRendaVariavel), + treasuryDirectDate: new Date(data.dataTesouroDireto) + } + } + +} + +module.exports = LastExecutionCrawler; \ No newline at end of file From 417b1ef6aed5d701184f82d25d533c74b5eab1fe Mon Sep 17 00:00:00 2001 From: Menighin Date: Wed, 28 Jul 2021 23:04:11 -0300 Subject: [PATCH 04/17] Axios error handling + Crawling options --- src/lib/AxiosWrapper.js | 13 ++++++++----- src/lib/CeiCrawler.js | 14 ++++++-------- src/lib/CeiCrawlerError.js | 6 ++++-- src/lib/CeiLoginService.js | 12 +++++++----- src/lib/LastExecutionCrawler.js | 2 +- src/lib/PositionCrawler.js | 26 +++++++++++++------------- src/lib/typedefs.js | 12 ++++++++++-- 7 files changed, 49 insertions(+), 36 deletions(-) diff --git a/src/lib/AxiosWrapper.js b/src/lib/AxiosWrapper.js index c45bcb0..cfa8f0c 100644 --- a/src/lib/AxiosWrapper.js +++ b/src/lib/AxiosWrapper.js @@ -1,5 +1,6 @@ const axios = require('axios').default; const https = require('https'); +const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); class AxiosWrapper { @@ -18,7 +19,9 @@ class AxiosWrapper { }); } - static async request(url, queryParams = {}, pathParams = {}) { + static async request(url, opts = {}) { + const pathParams = opts.pathParams || {}; + const queryParams = opts.queryParams || {}; try { const urlWithParams = Object.keys(pathParams) .reduce((p, v) => { @@ -30,10 +33,10 @@ class AxiosWrapper { ...queryParams } }); - return response; - } catch(e) { - console.log('ERROR ON AXIOS: ' + e.message); - throw e; + return response.data; + } catch (e) { + const msg = e.response.data == null || e.response.data.trim() == '' ? e.message : e.response.data; + throw new CeiCrawlerError(CeiErrorTypes.BAD_REQUEST, msg, e.response.status); } } } diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 2e6e4e8..29e2cc0 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -36,17 +36,17 @@ class CeiCrawler { this.options = options; this._setDefaultOptions(); - this._ceiLoginService = new CeiLoginService(username, password, this.options.loginOptions); + this._ceiLoginService = new CeiLoginService(username, password, this.options); AxiosWrapper.setup(this.options); } _setDefaultOptions() { - if (!this.options.trace) this.options.trace = false; + if (!this.options.debug) this.options.debug = false; if (!this.options.navigationTimeout) this.options.navigationTimeout = 30000; if (!this.options.loginOptions) this.options.loginOptions = {}; if (!this.options.loginOptions.timeout) this.options.loginOptions.timeout = 150000; - if (!this.options.loginOptions.strategy) this.options.loginOptions.strategy = 'user-input'; + if (!this.options.loginOptions.strategy) this.options.loginOptions.strategy = 'user-resolve'; } async login() { @@ -58,13 +58,11 @@ class CeiCrawler { if (this._isLogged) return; /* istanbul ignore next */ - if ((this.options && this.options.trace) || false) + if (this.options.debug) console.log(`Logging at CEI using ${this.options.loginOptions.strategy}...`); this.options.auth = await this._ceiLoginService.getToken(); this.options.lastExecutionInfo = await LastExecutionCrawler.getLastExecutionInfo(); - - console.log(JSON.stringify(this.options)); } /** @@ -72,9 +70,9 @@ class CeiCrawler { * @param {Date} [date] - The date of the position * @returns {Promise} - List of Stock histories */ - async getPosition(date = new Date()) { + async getPosition(date = null) { await this._login(); - return await PositionCrawler.getPosition(this.options, date); + return await PositionCrawler.getPosition(date, this.options); } } diff --git a/src/lib/CeiCrawlerError.js b/src/lib/CeiCrawlerError.js index 2a8550d..1c49d5c 100644 --- a/src/lib/CeiCrawlerError.js +++ b/src/lib/CeiCrawlerError.js @@ -1,8 +1,9 @@ class CeiCrawlerError extends Error { - constructor(type, message) { + constructor(type, message, status = null) { super(message); this.type = type; this.name = 'CeiCrawlerError'; + this.status = null; } } @@ -12,7 +13,8 @@ const CeiErrorTypes = Object.freeze({ SUBMIT_ERROR: 'SUBMIT_ERROR', SESSION_HAS_EXPIRED: 'SESSION_HAS_EXPIRED', NAVIGATION_TIMEOUT: 'NAVIGATION_TIMEOUT', - INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY' + INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY', + BAD_REQUEST: 'BAD_REQUEST' }); module.exports = { diff --git a/src/lib/CeiLoginService.js b/src/lib/CeiLoginService.js index 15e85e9..1f9152e 100644 --- a/src/lib/CeiLoginService.js +++ b/src/lib/CeiLoginService.js @@ -11,7 +11,7 @@ class CeiLoginService { /** @type {String} - Password to fill in at CEI page */ _password = null; - /** @type {typedefs.LoginOptions} - Options for CEI Crawler and Fetch */ + /** @type {typedefs.CeiCrawlerOptions} - Options for CEI Crawler and Fetch */ _options = null; constructor(username, password, options) { @@ -21,15 +21,17 @@ class CeiLoginService { } async getToken() { - switch(this._options.strategy) { - case 'user-input': - return await this._getTokenByUserInput(); + switch(this._options.loginOptions.strategy) { + case 'user-resolve': + return await this._getTokenByUserResolve(); + case 'raw-token': + return this._options.auth; default: throw CeiCrawlerError(CeiErrorTypes.INVALID_LOGIN_STRATEGY, `Invalid login strategy: ${this._options.strategy}`); } } - async _getTokenByUserInput() { + async _getTokenByUserResolve() { const browser = await puppeteer.launch({ headless: false, executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe', diff --git a/src/lib/LastExecutionCrawler.js b/src/lib/LastExecutionCrawler.js index 711c138..78a4275 100644 --- a/src/lib/LastExecutionCrawler.js +++ b/src/lib/LastExecutionCrawler.js @@ -8,7 +8,7 @@ const URLS = { class LastExecutionCrawler { static async getLastExecutionInfo() { - const data = (await AxiosWrapper.request(URLS.LAST_EXECUTION)).data; + const data = (await AxiosWrapper.request(URLS.LAST_EXECUTION)); return { generalDate: new Date(data.dataGeral), stockDate: new Date(data.dataRendaVariavel), diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js index db13234..90a672d 100644 --- a/src/lib/PositionCrawler.js +++ b/src/lib/PositionCrawler.js @@ -15,22 +15,22 @@ class PositionCrawler { * @param {Date} [date] - The date of the wallet. If none passed, the default of CEI will be used * @returns {Promise} - List of Stock histories */ - static async getPosition(options = null, date = new Date()) { - const traceOperations = (options && options.trace) || false; + static async getPosition(date = null, options = null) { + const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); - const dateStr = CeiUtils.getDateForQueryParam(date); - try { - const response = await AxiosWrapper.request(URLS.GET_DATA, { + if (options.debug) + console.log(`[PositionCrawler] Crawling wallet position on date ${dateStr}`); + + const response = await AxiosWrapper.request(URLS.GET_DATA, { + queryParams: { data: dateStr - }, { + }, + pathParams: { page: 1 - }); - - console.log(JSON.stringify(response.data)); - } catch(e) { - console.log('ERROR ON AXIOS: ' + e.message); - throw e; - } + } + }); + + return response; } } diff --git a/src/lib/typedefs.js b/src/lib/typedefs.js index 17560d9..423feac 100644 --- a/src/lib/typedefs.js +++ b/src/lib/typedefs.js @@ -2,9 +2,16 @@ * @namespace typedefs */ +/** + * @typedef LastExecutionInfo + * @property {Date} generalDate - new Date(data.dataGeral), + * @property {Date} stockDate - new Date(data.dataRendaVariavel), + * @property {Date} treasuryDirectDate - new Date(data.dataTesouroDireto) + */ + /** * @typedef LoginOptions - * @property {String} strategy - The strategy the crawler will use to make the login. Options are: `user-input` + * @property {String} strategy - The strategy the crawler will use to make the login. Options are: `user-resolve`, `raw-token` * @property {Number} timeout - Login timeout * @property {String} browserPath - Path of the browser to run puppeteer */ @@ -17,11 +24,12 @@ /** * @typedef CeiCrawlerOptions - * @property {boolean} trace - Indicates if it should print trace messages. Helpful for debugging. + * @property {boolean} debug - Indicates if it should print debug messages. Helpful for debugging. * @property {boolean} capDates - Prevent crawling with an invalid date in CEI * @property {Number} navigationTimeout - Fetch timeout * @property {LoginOptions} loginOptions - The strategy the crawler will use to make the login. Options are: `user-input` * @property {CeiAuth} auth - Auth logged info + * @property {LastExecutionInfo} lastExecutionInfo - CEI info about the last execution * @memberof typdefs */ From 0bc5aba870994beea670636e237f98e009bddd41 Mon Sep 17 00:00:00 2001 From: Menighin Date: Fri, 30 Jul 2021 22:23:09 -0300 Subject: [PATCH 05/17] Page parameter --- src/lib/CeiCrawler.js | 5 +++-- src/lib/PositionCrawler.js | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 29e2cc0..a8c60fd 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -68,11 +68,12 @@ class CeiCrawler { /** * Returns the stock history * @param {Date} [date] - The date of the position + * @param {Number} [page=1] - The page of the data * @returns {Promise} - List of Stock histories */ - async getPosition(date = null) { + async getPosition(date = null, page = 1) { await this._login(); - return await PositionCrawler.getPosition(date, this.options); + return await PositionCrawler.getPosition(date, page, this.options); } } diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js index 90a672d..ee2746d 100644 --- a/src/lib/PositionCrawler.js +++ b/src/lib/PositionCrawler.js @@ -11,11 +11,12 @@ class PositionCrawler { /** * Get data from the position screen + * @param {Date} date - The date of the wallet. If none passed, the default of CEI will be used + * @param {Number} page - The page of the data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @param {Date} [date] - The date of the wallet. If none passed, the default of CEI will be used * @returns {Promise} - List of Stock histories */ - static async getPosition(date = null, options = null) { + static async getPosition(date, page, options = {}) { const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); if (options.debug) @@ -26,7 +27,7 @@ class PositionCrawler { data: dateStr }, pathParams: { - page: 1 + page: page } }); From 312caba794121d418c0cde146fdf87f2eb8d7559 Mon Sep 17 00:00:00 2001 From: Menighin Date: Sat, 31 Jul 2021 17:11:42 -0300 Subject: [PATCH 06/17] Position detail crawler --- src/lib/AxiosWrapper.js | 4 ++++ src/lib/CeiCrawler.js | 14 ++++++++++++++ src/lib/CeiUtils.js | 13 +++++++++++++ src/lib/PositionCrawler.js | 39 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/lib/AxiosWrapper.js b/src/lib/AxiosWrapper.js index cfa8f0c..ab38c03 100644 --- a/src/lib/AxiosWrapper.js +++ b/src/lib/AxiosWrapper.js @@ -15,6 +15,10 @@ class AxiosWrapper { }; config.params['cache-guid'] = options.auth['cache-guid']; config.httpsAgent = httpsAgent; + + if (options.debug) + console.log(`[AxiosWrapper] ${config.method.toUpperCase()} ${config.url} ${JSON.stringify(config.params)}`); + return config; }); } diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index a8c60fd..823ef18 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -63,6 +63,7 @@ class CeiCrawler { this.options.auth = await this._ceiLoginService.getToken(); this.options.lastExecutionInfo = await LastExecutionCrawler.getLastExecutionInfo(); + this._isLogged = true; } /** @@ -76,6 +77,19 @@ class CeiCrawler { return await PositionCrawler.getPosition(date, page, this.options); } + /** + * Returns the detail of a position given by CEI. The format of the position differs from type to type + * @param {String} id - The UUID of the position given by CEI + * @param {String} category - The category of the position + * @param {String} type - The type of the position + * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler + * @returns {Any} + */ + async getPositionDetail(id, category, type) { + await this._login(); + return await PositionCrawler.getPositionDetail(id, category, type, this.options); + } + } module.exports = CeiCrawler; diff --git a/src/lib/CeiUtils.js b/src/lib/CeiUtils.js index e698091..a862dbd 100644 --- a/src/lib/CeiUtils.js +++ b/src/lib/CeiUtils.js @@ -199,6 +199,19 @@ class CeiUtils { return {}; } } + + /** + * Convert the string to kebab-case + * @param {String} str Text to be kebabize'd + * @returns The @param str in kebab-case + */ + static kebabize(str) { + return str.split('').map((letter, idx) => { + return letter.toUpperCase() === letter + ? `${idx !== 0 ? '-' : ''}${letter.toLowerCase()}` + : letter; + }).join(''); + } } module.exports = CeiUtils; diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js index ee2746d..537a0c3 100644 --- a/src/lib/PositionCrawler.js +++ b/src/lib/PositionCrawler.js @@ -4,7 +4,9 @@ const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); const AxiosWrapper = require('./AxiosWrapper'); const URLS = { - GET_DATA: 'https://investidor.b3.com.br/api/extrato/v1/posicao/:page' + LIST_DATA: 'https://investidor.b3.com.br/api/extrato/v1/posicao/:page', + DETAIL_1: 'https://investidor.b3.com.br/api/extrato/v1/posicao/detalhes/:category/:type/:id', + DETAIL_2: 'https://investidor.b3.com.br/api/extrato/v1/posicao/detalhes/:category/:id' }; class PositionCrawler { @@ -22,7 +24,7 @@ class PositionCrawler { if (options.debug) console.log(`[PositionCrawler] Crawling wallet position on date ${dateStr}`); - const response = await AxiosWrapper.request(URLS.GET_DATA, { + const response = await AxiosWrapper.request(URLS.LIST_DATA, { queryParams: { data: dateStr }, @@ -33,6 +35,39 @@ class PositionCrawler { return response; } + + /** + * Returns the detail of the given position + * @param {String} id - The UUID of the position given by CEI + * @param {String} category - The category of the position + * @param {String} type - The type of the position + * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler + * @returns {Any} + */ + static async getPositionDetail(id, category, type, options = {}) { + if (options.debug) + console.log(`[PositionCrawler] Crawling wallet position detail for ${id} (${category}, ${type})`); + + const pathParams = { + id: id, + category: CeiUtils.kebabize(category), + type: CeiUtils.kebabize(type) + }; + + // Try to get the detail with type + try { + return await AxiosWrapper.request(URLS.DETAIL_1, { + pathParams: pathParams + }); + } catch (e) { + if (options.debug) + console.log(`[PositionCrawler] Failed getting detail for type and category ${type}, ${category}`); + } + + return await AxiosWrapper.request(URLS.DETAIL_2, { + pathParams: pathParams + }); + } } module.exports = PositionCrawler; \ No newline at end of file From d67dd2f22cfc979fcb3e4bfa901749ae7613505e Mon Sep 17 00:00:00 2001 From: Menighin Date: Sun, 1 Aug 2021 00:03:08 -0300 Subject: [PATCH 07/17] Account statement crawler --- src/lib/AccountStatementCrawler.js | 41 ++++++++++++++++++++++++++++++ src/lib/CeiCrawler.js | 14 ++++++++++ 2 files changed, 55 insertions(+) create mode 100644 src/lib/AccountStatementCrawler.js diff --git a/src/lib/AccountStatementCrawler.js b/src/lib/AccountStatementCrawler.js new file mode 100644 index 0000000..0deac76 --- /dev/null +++ b/src/lib/AccountStatementCrawler.js @@ -0,0 +1,41 @@ +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils'); +const AxiosWrapper = require('./AxiosWrapper'); + +const URLS = { + LIST_DATA: 'https://investidor.b3.com.br/api/extrato/v1/movimentacao/:page', +}; + +class AccountStatementCrawler { + + /** + * Get data from the position screen + * @param {Date} startDate - The start date of the range + * @param {Date} endDate - The end date of the range + * @param {Number} page - The page of the data + * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler + * @returns {Promise} - List of Stock histories + */ + static async getAccountStatement(startDate, endDate, page, options = {}) { + const startDateStr = CeiUtils.getDateForQueryParam(startDate || new Date(options.lastExecutionInfo.generalDate.getTime() - 1000 * 60 * 60 * 24)); + const endDateStr = CeiUtils.getDateForQueryParam(endDate || options.lastExecutionInfo.generalDate); + + if (options.debug) + console.log(`[AccountStatementCrawler] Crawling statement for period ${startDateStr} - ${endDateStr}`); + + const response = await AxiosWrapper.request(URLS.LIST_DATA, { + queryParams: { + dataInicio: startDateStr, + dataFim: endDateStr, + }, + pathParams: { + page: page + } + }); + + return response; + } + +} + +module.exports = AccountStatementCrawler; \ No newline at end of file diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 823ef18..03ec873 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -1,4 +1,5 @@ const PositionCrawler = require('./PositionCrawler'); +const AccountStatementCrawler = require('./AccountStatementCrawler'); const LastExecutionCrawler = require('./LastExecutionCrawler'); const typedefs = require("./typedefs"); const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); @@ -90,6 +91,19 @@ class CeiCrawler { return await PositionCrawler.getPositionDetail(id, category, type, this.options); } + + /** + * Returns the stock history + * @param {Date} [startDate] - The start date to filter + * @param {Date} [endDate] - The end date to filter + * @param {Number} [page=1] - The page of the data + * @returns {Promise} - List of Stock histories + */ + async getAccountStatement(startDate = null, endDate = null, page = 1) { + await this._login(); + return await AccountStatementCrawler.getAccountStatement(startDate, endDate, page, this.options); + } + } module.exports = CeiCrawler; From ddf54a9ae97ee39235171dc8988960a78608b1f8 Mon Sep 17 00:00:00 2001 From: Menighin Date: Mon, 2 Aug 2021 00:00:16 -0300 Subject: [PATCH 08/17] StockTransactionsCrawler --- src/lib/AccountStatementCrawler.js | 7 +- src/lib/AxiosWrapper.js | 4 + src/lib/CeiCrawler.js | 21 +++- src/lib/CeiCrawlerError.js | 3 +- src/lib/CeiUtils.js | 166 ++-------------------------- src/lib/StockTransactionsCrawler.js | 42 +++++++ 6 files changed, 79 insertions(+), 164 deletions(-) create mode 100644 src/lib/StockTransactionsCrawler.js diff --git a/src/lib/AccountStatementCrawler.js b/src/lib/AccountStatementCrawler.js index 0deac76..b8b033a 100644 --- a/src/lib/AccountStatementCrawler.js +++ b/src/lib/AccountStatementCrawler.js @@ -17,9 +17,10 @@ class AccountStatementCrawler { * @returns {Promise} - List of Stock histories */ static async getAccountStatement(startDate, endDate, page, options = {}) { - const startDateStr = CeiUtils.getDateForQueryParam(startDate || new Date(options.lastExecutionInfo.generalDate.getTime() - 1000 * 60 * 60 * 24)); - const endDateStr = CeiUtils.getDateForQueryParam(endDate || options.lastExecutionInfo.generalDate); - + const lastExecution = options.lastExecutionInfo.generalDate; + const startDateStr = CeiUtils.getDateForQueryParam(startDate || CeiUtils.subtractMonth(lastExecution)); + const endDateStr = CeiUtils.getDateForQueryParam(endDate || lastExecution); + if (options.debug) console.log(`[AccountStatementCrawler] Crawling statement for period ${startDateStr} - ${endDateStr}`); diff --git a/src/lib/AxiosWrapper.js b/src/lib/AxiosWrapper.js index ab38c03..f3f1f95 100644 --- a/src/lib/AxiosWrapper.js +++ b/src/lib/AxiosWrapper.js @@ -40,6 +40,10 @@ class AxiosWrapper { return response.data; } catch (e) { const msg = e.response.data == null || e.response.data.trim() == '' ? e.message : e.response.data; + + if (e.response.status === 401) + throw new CeiCrawlerError(CeiErrorTypes.UNAUTHORIZED, msg, e.response.status); + throw new CeiCrawlerError(CeiErrorTypes.BAD_REQUEST, msg, e.response.status); } } diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 03ec873..c8c16b9 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -3,9 +3,9 @@ const AccountStatementCrawler = require('./AccountStatementCrawler'); const LastExecutionCrawler = require('./LastExecutionCrawler'); const typedefs = require("./typedefs"); const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); -const CeiUtils = require('./CeiUtils'); const CeiLoginService = require('./CeiLoginService'); const AxiosWrapper = require('./AxiosWrapper'); +const StockTransactionsCrawler = require('./StockTransactionsCrawler'); class CeiCrawler { @@ -68,7 +68,7 @@ class CeiCrawler { } /** - * Returns the stock history + * Returns the wallet position * @param {Date} [date] - The date of the position * @param {Number} [page=1] - The page of the data * @returns {Promise} - List of Stock histories @@ -91,9 +91,8 @@ class CeiCrawler { return await PositionCrawler.getPositionDetail(id, category, type, this.options); } - - /** - * Returns the stock history + /** + * Returns the account statement * @param {Date} [startDate] - The start date to filter * @param {Date} [endDate] - The end date to filter * @param {Number} [page=1] - The page of the data @@ -104,6 +103,18 @@ class CeiCrawler { return await AccountStatementCrawler.getAccountStatement(startDate, endDate, page, this.options); } + /** + * Returns the stock transactions + * @param {Date} [startDate] - The start date to filter + * @param {Date} [endDate] - The end date to filter + * @param {Number} [page=1] - The page of the data + * @returns {Promise} - List of Stock histories + */ + async getStockTransactions(startDate = null, endDate = null, page = 1) { + await this._login(); + return await StockTransactionsCrawler.getStockTransactions(startDate, endDate, page, this.options); + } + } module.exports = CeiCrawler; diff --git a/src/lib/CeiCrawlerError.js b/src/lib/CeiCrawlerError.js index 1c49d5c..02cf2ba 100644 --- a/src/lib/CeiCrawlerError.js +++ b/src/lib/CeiCrawlerError.js @@ -14,7 +14,8 @@ const CeiErrorTypes = Object.freeze({ SESSION_HAS_EXPIRED: 'SESSION_HAS_EXPIRED', NAVIGATION_TIMEOUT: 'NAVIGATION_TIMEOUT', INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY', - BAD_REQUEST: 'BAD_REQUEST' + BAD_REQUEST: 'BAD_REQUEST', + UNAUTHORIZED: 'UNAUTHORIZED' }); module.exports = { diff --git a/src/lib/CeiUtils.js b/src/lib/CeiUtils.js index a862dbd..de47c11 100644 --- a/src/lib/CeiUtils.js +++ b/src/lib/CeiUtils.js @@ -1,13 +1,4 @@ class CeiUtils { - /** - * Returns a date in the format dd/MM/yyyy for input at CEI - * @param {Date} date - Date to be parsed - */ - static getDateForInput(date) { - return `${date.getDate().toString().padStart(2, "0")}/${(date.getMonth() + 1) - .toString() - .padStart(2, "0")}/${date.getFullYear()}`; - } /** * Returns a date in the format yyyy-MM-dd for input at CEI @@ -17,41 +8,6 @@ class CeiUtils { return date.toISOString().slice(0,10); } - /** - * Return a date object given a date string - * @param {String} dateStr Date string in dd/MM/yyyy format - */ - static getDateFromInput(dateStr) { - const [day, month, year] = dateStr.split("/").map((o) => parseInt(o)); - return new Date(year, month - 1, day); - } - - /** - * Parse the table data to its type configuration - * @param {Array} tableData - The data of the table, an array of objects - * @param {Object} tableDefinition - Object defining the table types in format (column, type) - */ - static parseTableTypes(tableData, tableDefinition) { - // Helper function - const parseValue = (value, type) => { - if (type === "string") return value; - if (type === "int") return parseInt(value.replace(".", "")); - if (type === "float") - return parseFloat(value.replace(".", "").replace(",", ".")); - if (type === "date") - return value === "01/01/0001" - ? null - : new Date(value.split("/").reverse()); - }; - - return tableData.map((row) => - Object.keys(tableDefinition).reduce((p, c) => { - p[c] = parseValue(row[c], tableDefinition[c]); - return p; - }, {}) - ); - } - /** * @param {Number} ms - Time to sleep in miliseconds * @returns {Promise} - Promise @@ -60,13 +16,6 @@ class CeiUtils { return new Promise((resolve) => setTimeout(resolve, ms)); } - /** - * This callback is displayed as part of the Requester class. - * @callback CheckRetryCallback - * @param {Error} e - Exception error - * @returns {Boolean} True if need retry or False if not - */ - /** * @param {Promise|Function} callback - Time to sleep in miliseconds * @param {Number} [attempts=5] - Number of attempts before throw exception @@ -96,110 +45,6 @@ class CeiUtils { return result; } - /** - * Returns FormData in string format from DOM - * @param {cheerio.Root} dom - DOM of the page - * @param {string[]} filterFields - List of fields to be selected - * @param {Object} [extraFormValues] - Extra fields or overlapping values - * @returns {string} - FormData in string format - */ - static extractFormDataFromDOM( - dom, - filterFields, - extraFormValues = {}, - debugg = false - ) { - const allFields = dom("input, select") - .map((_, el) => ({ - name: el.attribs.name, - value: el.attribs.value || "", - })) - .get() - .reduce( - (form, item) => { - form[item.name] = item.value; - return form; - }, - { __ASYNCPOST: true, ...extraFormValues } - ); - - const form = filterFields.reduce((dict, field) => { - if (field in allFields) { - dict[field] = allFields[field]; - } - return dict; - }, {}); - - if (debugg) console.log(form); - - return new URLSearchParams(form).toString(); - } - - /** - * Update value fields of the DOM - * @param {cheerio.Root} dom - DOM of the page - * @param {object[]} fieldsValue - List of fields to be changed - */ - static updateFieldsDOM(dom, fields) { - fields.forEach((field) => { - const i = dom(`#${field.id}`); - if (i && field.value !== "0") { - i.attr("value", field.value); - } - }); - } - - /** - * Returns FormData in string format from DOM - * @param {string} responseTxt - Response in text format - * @returns {Array} - List of fields and their respective values - */ - static extractUpdateForm(responseTxt) { - return responseTxt - .split("\n") - .slice(-1)[0] - .trim() - .replace(/\|\|/g, "|") - .split("|") - .map((str, idx, array) => { - if (str.includes("hiddenField")) { - return { - id: array[idx + 1], - value: array[idx + 2], - }; - } - - return null; - }) - .filter((it) => it); - } - - /** - * Returns message post response - * @param {String} responseTxt - Response in text format - * @returns {Object} - Status: {type, message} - Type: 0: Info; 1: Warning; 2: Error - */ - static extractMessagePostResponse(responseTxt) { - try { - const parameters = responseTxt - .split("\n") - .slice(-1)[0] - .split("CEIWeb.IncluirMensagem")[1] - .split(";")[0] - .trim(); - const arrayStr = `[${parameters.slice(1).slice(0, -1)}]` - .replace(/"/g, '\\"') - .replace(/'/g, '"'); - const args = JSON.parse(arrayStr); - return { - type: args[0], - message: args[1], - }; - } catch { - return {}; - } - } - /** * Convert the string to kebab-case * @param {String} str Text to be kebabize'd @@ -212,6 +57,17 @@ class CeiUtils { : letter; }).join(''); } + + /** + * Subtract months from a given date + * @param {Date} date The date to subtract months from + * @param {Number} [qtyMonth=1] The amount of months to be subtracted + */ + static subtractMonth(date, qtyMonth = 1) { + const newDate = new Date(date.getTime()); + newDate.setMonth(newDate.getMonth() - qtyMonth); + return newDate; + } } module.exports = CeiUtils; diff --git a/src/lib/StockTransactionsCrawler.js b/src/lib/StockTransactionsCrawler.js new file mode 100644 index 0000000..0e95647 --- /dev/null +++ b/src/lib/StockTransactionsCrawler.js @@ -0,0 +1,42 @@ +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils'); +const AxiosWrapper = require('./AxiosWrapper'); + +const URLS = { + LIST_DATA: 'https://investidor.b3.com.br/api/extrato/v1/negociacao-ativos/:page', +}; + +class StockTransactionsCrawler { + + /** + * Get data from the position screen + * @param {Date} startDate - The start date of the range + * @param {Date} endDate - The end date of the range + * @param {Number} page - The page of the data + * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler + * @returns {Promise} - List of Stock histories + */ + static async getStockTransactions(startDate, endDate, page, options = {}) { + const lastExecution = options.lastExecutionInfo.generalDate; + const startDateStr = CeiUtils.getDateForQueryParam(startDate || CeiUtils.subtractMonth(lastExecution)); + const endDateStr = CeiUtils.getDateForQueryParam(endDate || lastExecution); + + if (options.debug) + console.log(`[StockTransactionsCrawler] Crawling statement for period ${startDateStr} - ${endDateStr}`); + + const response = await AxiosWrapper.request(URLS.LIST_DATA, { + queryParams: { + dataInicio: startDateStr, + dataFim: endDateStr, + }, + pathParams: { + page: page + } + }); + + return response; + } + +} + +module.exports = StockTransactionsCrawler; \ No newline at end of file From 7e6d4e77cffd93e3540dd0236f631c694beaef95 Mon Sep 17 00:00:00 2001 From: Menighin Date: Mon, 2 Aug 2021 21:22:59 -0300 Subject: [PATCH 09/17] Provisioned Events crawler --- src/lib/AxiosWrapper.js | 3 +- src/lib/CeiCrawler.js | 23 ++++++++++++ src/lib/CeiCrawlerError.js | 3 +- src/lib/PositionCrawler.js | 6 ++- src/lib/ProvisionedEventsCrawler.js | 57 +++++++++++++++++++++++++++++ 5 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 src/lib/ProvisionedEventsCrawler.js diff --git a/src/lib/AxiosWrapper.js b/src/lib/AxiosWrapper.js index f3f1f95..0b1e48d 100644 --- a/src/lib/AxiosWrapper.js +++ b/src/lib/AxiosWrapper.js @@ -39,7 +39,8 @@ class AxiosWrapper { }); return response.data; } catch (e) { - const msg = e.response.data == null || e.response.data.trim() == '' ? e.message : e.response.data; + const msgStr = e.response.data != null ? (e.response.data.message || e.response.data.trim()) : e.message; + const msg = msgStr === '' ? e.message : msgStr; if (e.response.status === 401) throw new CeiCrawlerError(CeiErrorTypes.UNAUTHORIZED, msg, e.response.status); diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index c8c16b9..3722f13 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -6,6 +6,7 @@ const { CeiCrawlerError, CeiErrorTypes } = require('./CeiCrawlerError'); const CeiLoginService = require('./CeiLoginService'); const AxiosWrapper = require('./AxiosWrapper'); const StockTransactionsCrawler = require('./StockTransactionsCrawler'); +const ProvisionedEventsCrawler = require('./ProvisionedEventsCrawler'); class CeiCrawler { @@ -115,6 +116,28 @@ class CeiCrawler { return await StockTransactionsCrawler.getStockTransactions(startDate, endDate, page, this.options); } + /** + * Returns the provisioned events for the given date + * @param {Date} [date] - The date for the provisioned events + * @param {Number} [page=1] - The page of the data + * @returns {Promise} - List of Stock histories + */ + async getProvisionedEvents(date = null, page = 1) { + await this._login(); + return await ProvisionedEventsCrawler.getProvisionedEvents(date, page, this.options); + } + + /** + * Returns the detail of a provisioned event + * @param {String} id - The UUID of the provisioned event + * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler + * @returns {Any} + */ + async getProvisionedEventDetail(id) { + await this._login(); + return await ProvisionedEventsCrawler.getProvisionedEventDetails(id, this.options); + } + } module.exports = CeiCrawler; diff --git a/src/lib/CeiCrawlerError.js b/src/lib/CeiCrawlerError.js index 02cf2ba..ea177d6 100644 --- a/src/lib/CeiCrawlerError.js +++ b/src/lib/CeiCrawlerError.js @@ -15,7 +15,8 @@ const CeiErrorTypes = Object.freeze({ NAVIGATION_TIMEOUT: 'NAVIGATION_TIMEOUT', INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY', BAD_REQUEST: 'BAD_REQUEST', - UNAUTHORIZED: 'UNAUTHORIZED' + UNAUTHORIZED: 'UNAUTHORIZED', + TOO_MANY_REQUESTS: 'TOO_MANY_REQUESTS' }); module.exports = { diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js index 537a0c3..abfe032 100644 --- a/src/lib/PositionCrawler.js +++ b/src/lib/PositionCrawler.js @@ -16,7 +16,7 @@ class PositionCrawler { * @param {Date} date - The date of the wallet. If none passed, the default of CEI will be used * @param {Number} page - The page of the data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - List of Stock histories + * @returns {Promise<{}>} - List of Stock histories */ static async getPosition(date, page, options = {}) { const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); @@ -57,9 +57,11 @@ class PositionCrawler { // Try to get the detail with type try { return await AxiosWrapper.request(URLS.DETAIL_1, { - pathParams: pathParams + peathParams: pathParams }); } catch (e) { + if (e.type === CeiErrorTypes.TOO_MANY_REQUESTS) + throw e; if (options.debug) console.log(`[PositionCrawler] Failed getting detail for type and category ${type}, ${category}`); } diff --git a/src/lib/ProvisionedEventsCrawler.js b/src/lib/ProvisionedEventsCrawler.js new file mode 100644 index 0000000..23e9a36 --- /dev/null +++ b/src/lib/ProvisionedEventsCrawler.js @@ -0,0 +1,57 @@ +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils'); +const AxiosWrapper = require('./AxiosWrapper'); + +const URLS = { + LIST_DATA: 'https://investidor.b3.com.br/api/extrato/v1/eventos-provisionados/:page', + DETAIL: 'https://investidor.b3.com.br/api/extrato/v1/eventos-provisionados/detalhes/:id', +}; + +class ProvisionedEventsCrawler { + + /** + * Get data from the position screen + * @param {Date} date - The date of the wallet. If none passed, the default of CEI will be used + * @param {Number} page - The page of the data + * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler + * @returns {Any} - List of Stock histories + */ + static async getProvisionedEvents(date, page, options = {}) { + const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); + + if (options.debug) + console.log(`[ProvisionedEventsCrawler] Crawling on date ${dateStr}`); + + const response = await AxiosWrapper.request(URLS.LIST_DATA, { + queryParams: { + data: dateStr + }, + pathParams: { + page: page + } + }); + + return response; + } + + /** + * Returns the detail of the given position + * @param {String} id - The UUID of the position given by CEI + * @param {String} category - The category of the position + * @param {String} type - The type of the position + * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler + * @returns {Any} + */ + static async getProvisionedEventDetails(id, options = {}) { + if (options.debug) + console.log(`[ProvisionedEventsCrawler] Crawling detail for ${id}`); + + return await AxiosWrapper.request(URLS.DETAIL, { + pathParams: { + id: id + } + }); + } +} + +module.exports = ProvisionedEventsCrawler; \ No newline at end of file From 7a7439335db433ed75f8ae6b96a8fe97c74b8677 Mon Sep 17 00:00:00 2001 From: Menighin Date: Tue, 3 Aug 2021 18:41:07 -0300 Subject: [PATCH 10/17] IPO Crawler --- src/lib/CeiCrawler.js | 23 +++++++++++++++++ src/lib/IpoCrawler.js | 57 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/lib/IpoCrawler.js diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 3722f13..d0ff41e 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -7,6 +7,7 @@ const CeiLoginService = require('./CeiLoginService'); const AxiosWrapper = require('./AxiosWrapper'); const StockTransactionsCrawler = require('./StockTransactionsCrawler'); const ProvisionedEventsCrawler = require('./ProvisionedEventsCrawler'); +const IpoCrawler = require('./IpoCrawler'); class CeiCrawler { @@ -138,6 +139,28 @@ class CeiCrawler { return await ProvisionedEventsCrawler.getProvisionedEventDetails(id, this.options); } + /** + * Returns the IPOs + * @param {Date} [date] - The date for the provisioned events + * @param {Number} [page=1] - The page of the data + * @returns {Promise} - List of Stock histories + */ + async getIPOs(date = null, page = 1) { + await this._login(); + return await IpoCrawler.getIPOs(date, page, this.options); + } + + /** + * Returns the detail of an IPO + * @param {String} id - The UUID of the IPO event + * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler + * @returns {Any} + */ + async getIPODetail(id) { + await this._login(); + return await IpoCrawler.getIPODetail(id, this.options); + } + } module.exports = CeiCrawler; diff --git a/src/lib/IpoCrawler.js b/src/lib/IpoCrawler.js new file mode 100644 index 0000000..01c5088 --- /dev/null +++ b/src/lib/IpoCrawler.js @@ -0,0 +1,57 @@ +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils'); +const AxiosWrapper = require('./AxiosWrapper'); + +const URLS = { + LIST_DATA: 'https://investidor.b3.com.br/api/extrato/v1/ofertas-publicas/:page', + DETAIL: 'https://investidor.b3.com.br/api/extrato/v1/ofertas-publicas/detalhes/:id', +}; + +class IpoCrawler { + + /** + * Get data from the position screen + * @param {Date} date - The date of the wallet. If none passed, the default of CEI will be used + * @param {Number} page - The page of the data + * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler + * @returns {Any} - List of Stock histories + */ + static async getIPOs(date, page, options = {}) { + const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); + + if (options.debug) + console.log(`[IpoCrawler] Crawling on date ${dateStr}`); + + const response = await AxiosWrapper.request(URLS.LIST_DATA, { + queryParams: { + data: dateStr + }, + pathParams: { + page: page + } + }); + + return response; + } + + /** + * Returns the detail of the given position + * @param {String} id - The UUID of the position given by CEI + * @param {String} category - The category of the position + * @param {String} type - The type of the position + * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler + * @returns {Any} + */ + static async getIPODetail(id, options = {}) { + if (options.debug) + console.log(`[IpoCrawler] Crawling detail for ${id}`); + + return await AxiosWrapper.request(URLS.DETAIL, { + pathParams: { + id: id + } + }); + } +} + +module.exports = IpoCrawler; \ No newline at end of file From 9c0f4d60163d9885831326a7ae0c75371da378d3 Mon Sep 17 00:00:00 2001 From: Menighin Date: Tue, 3 Aug 2021 20:31:23 -0300 Subject: [PATCH 11/17] Consolidated Values crawler --- src/lib/CeiCrawler.js | 12 ++++++++++++ src/lib/ConsolidatedValueCrawler.js | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 src/lib/ConsolidatedValueCrawler.js diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index d0ff41e..74f8c50 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -8,6 +8,7 @@ const AxiosWrapper = require('./AxiosWrapper'); const StockTransactionsCrawler = require('./StockTransactionsCrawler'); const ProvisionedEventsCrawler = require('./ProvisionedEventsCrawler'); const IpoCrawler = require('./IpoCrawler'); +const ConsolidatedValueCrawler = require('./ConsolidatedValueCrawler'); class CeiCrawler { @@ -69,6 +70,17 @@ class CeiCrawler { this._isLogged = true; } + /** + * Returns the consolidated values + * @param {Date} [date] - The date of the position + * @param {Number} [page=1] - The page of the data + * @returns {Promise} - List of Stock histories + */ + async getConsolidatedValues() { + await this._login(); + return await ConsolidatedValueCrawler.getConsolidatedValues(this.options); + } + /** * Returns the wallet position * @param {Date} [date] - The date of the position diff --git a/src/lib/ConsolidatedValueCrawler.js b/src/lib/ConsolidatedValueCrawler.js new file mode 100644 index 0000000..cf56ee8 --- /dev/null +++ b/src/lib/ConsolidatedValueCrawler.js @@ -0,0 +1,28 @@ +const typedefs = require("./typedefs"); +const CeiUtils = require('./CeiUtils'); +const AxiosWrapper = require('./AxiosWrapper'); + +const URLS = { + DATA: 'https://investidor.b3.com.br/api/investidor/v1/posicao/total-acumulado', +}; + +class ConsolidatedValueCrawler { + + /** + * Get consolidated data + * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler + * @returns {Promise} - List of Stock histories + */ + static async getConsolidatedValues(options = {}) { + + if (options.debug) + console.log(`[ConsolidatedValueCrawler] Crawling the consolidated values`); + + const response = await AxiosWrapper.request(URLS.DATA); + + return response; + } + +} + +module.exports = ConsolidatedValueCrawler; \ No newline at end of file From a547c7131a8b73f18778311fe437a301dd37e77e Mon Sep 17 00:00:00 2001 From: Menighin Date: Wed, 4 Aug 2021 09:04:57 -0300 Subject: [PATCH 12/17] Documenting --- README.md | 507 ++++++++-------------------- src/lib/AccountStatementCrawler.js | 4 +- src/lib/CeiCrawler.js | 19 +- src/lib/ConsolidatedValueCrawler.js | 3 +- src/lib/PositionCrawler.js | 6 +- src/lib/typedefs.js | 209 ++---------- 6 files changed, 200 insertions(+), 548 deletions(-) diff --git a/README.md b/README.md index ae8b07d..f847235 100644 --- a/README.md +++ b/README.md @@ -49,410 +49,203 @@ ceiCrawler.login(); // Login é opcional, pois antes de cada método o cei-crawl ``` ### Métodos disponíveis -#### getWallet(_date_) -Retorna os dados das carteiras no CEI. As carteiras contém as posições consolidades de ativos e tesouro direto. -O retorno será uma lista com cada item representando os dados de uma instituição e conta. -O método recebe uma data como parâmetro para pegar a foto das carteiras no dia escolhido. Se nenhuma data for passada, será utilizada a data padrao do CEI que é o dia corrente. O CEI disponibiliza datas somente em um range de 2 meses, aparentemente. +#### getConsolidatedValues() +Retorna os investimentos consolidados num valor total e divididos em subcategorias ```javascript -let wallets = await ceiCrawler.getWallet(date); -``` -Resultado: -```javascript -[ - { - "institution": "1111 - INTER DTVM LTDA", - "account": "111111", - "stockWallet": [ - { - "company": "BANCO INTER", - "stockType": "PN N2", - "code": "BIDI4", - "isin": "BRBIDIACNPR0", - "price": 11.43, - "quantity": 100, - "quotationFactor": 1, - "totalValue": 1143 - }, - { - "company": "CENTAURO", - "stockType": "ON NM", - "code": "CNTO3", - "isin": "BRCNTOACNOR5", - "price": 29, - "quantity": 100, - "quotationFactor": 1, - "totalValue": 2900 - } - ], - "stockGuaranteesWallet": [ - { - "company": "BANCO INTER", - "stockType": "PN N2", - "code": "BIDI4", - "isin": "BRBIDIACNPR0", - "price": 11.43, - "quantity": 100, - "quotationFactor": 1, - "totalValue": 1143 - } - ], - "nationalTreasureWallet": [] - }, - { - "institution": "222222 - RICO INVESTIMENTOS - GRUPO XP", - "account": "2222222", - "stockWallet": [ - { - "company": "TENDA", - "stockType": "ON NM", - "code": "TEND3", - "isin": "BRTENDACNOR4", - "price": 25.14, - "quantity": 100, - "quotationFactor": 1, - "totalValue": 2514 - } - ], - "nationalTreasureWallet": [ - { - "code": "Tesouro IPCA+ 2024", - "expirationDate": "2019-06-12T03:00:00.000Z", - "investedValue": 1000.00, - "grossValue": 1500.00, - "netValue": 1400.00, - "quantity": 0.25, - "blocked": 0 - } - ] - } -] -``` - -#### getWalletOptions() -Retorna as opções dos formulários da página de carteira de ativos -```javascript -const walletOptions = await ceiCrawler.getWalletOptions(); +let consolidated = await ceiCrawler.getConsolidatedValues(); ``` Resultado: ```javascript { - "minDate": "02/06/2020", - "maxDate": "31/07/2020", - "institutions": [ + "total": 10000, + "subTotais": [ { - "value": "123", - "label": "123 - RICO INVESTIMENTOS - GRUPO XP", - "accounts": [ - "12345" - ] + "categoriaProduto": "Renda Variável", + "totalPosicao": 5000, + "percentual": 0.5 }, { - "value": "321", - "label": "321 - INTER DTVM LTDA", - "accounts": [ - "54321" - ] + "categoriaProduto": "Tesouro Direto", + "totalPosicao": 5000, + "percentual": 0.5 } ] } ``` -#### getStockHistory(_startDate_, _endDate_) -Método que processa o histórico e o resumo do histórico de compra e venda de ações. O retorno será um uma lista com todas operações de compra ou venda efetuadas dentro do período informado, se nenhuma data for passada o método retornará todo o histórico disponível. -```javascript -let stockHistory = await ceiCrawler.getStockHistory(startDate, endDate); -``` -Resultado: -```javascript -[ - { - institution: 'Banco Inter', - account: 12345, - stockHistory: [ - { - date: "2019-06-12T03:00:00.000Z", - operation: "C", // C (Compra) ou V (Venda), - market: "Mercado a Vista", - expiration: "", - code: "BTOW3", - name: "B2W DIGITAL ON NM", - quantity: 200, - price: 32.2, - totalValue: 6440, - cotation: 1 - } - ] - } -] -``` -#### getStockHistoryOptions() -Retorna as opções dos formulários da página de negociações de ativos +#### getPosition(_date_, _page_) +Retorna as posições da tela "Posição" em todas as categorias de investimentos. + +| Parâmetro | Tipo | Default | Descrição | +|------------|--------|---------|--------------------------------------------------------------------------------------------------------------| +| **_date_** | Date | _null_ | Data da posição. Caso seja passado _null_ ou nenhum valor, será usada a ultima data de processamento do CEI. | +| **_page_** | Number | 1 | Paginação dos dados. Por default retorna a primeira página. | + ```javascript -const stockHistoryOptions = await ceiCrawler.getStockHistoryOptions(); +let position = await ceiCrawler.getPosition(); ``` Resultado: ```javascript { - "minDate": "08/02/2019", - "maxDate": "31/07/2020", - "institutions": [ + "paginaAtual": 1, + "totalPaginas": 1, + "itens": [ { - "value": "123", - "label": "123 - RICO INVESTIMENTOS - GRUPO XP", - "accounts": [ - "12345" - ] + "categoriaProduto": "RendaVariavel", + "tipoProduto": "Acao", + "descricaoTipoProduto": "Ações", + "posicoes": [ + { + "id": "gfw2455-8a79-4127-990b-587sa37", + "temBloqueio": false, + "instituicao": "INTER DISTRIBUIDORA DE TITULOS E VALORES MOBILIARIOS LTDA", + "quantidade": 100, + "valorAtualizado": 2377.00, + "precoFechamento": 23.77, + "produto": "BIDI4 - BANCO INTER S.A.", + "tipo": "PN", + "marcacoes": [], + "codigoNegociacao": "BIDI4", + "documentoInstituicao": "358743882", + "existeLogotipo": false, + "disponivel": 100, + "documento": "48377283492", + "razaoSocial": "BANCO INTER S.A.", + "codigoIsin": "BRBRHEU2", + "distribuicao": "114", + "escriturador": "BANCO BRADESCO S/A", + "valorBruto": 0 + } + ], + "totalPosicao": 2377.00, + "totalItemsPagina": 1 }, { - "value": "321", - "label": "321 - INTER DTVM LTDA", - "accounts": [ - "54321" - ] + "categoriaProduto": "TesouroDireto", + "tipoProduto": "TesouroDireto", + "descricaoTipoProduto": "Tesouro Direto", + "posicoes": [ + { + "id": "hfd4564-e70a-4596-93fd-987654dvbhw", + "temBloqueio": false, + "instituicao": "XP INVESTIMENTOS CCTVM S/A", + "quantidade": 1.01, + "valorAtualizado": 2200, + "vencimento": "2024-08-15T00:00:00", + "valorAplicado": 2000, + "produto": "Tesouro IPCA+ 2024", + "marcacoes": [], + "documentoInstituicao": "8573938583", + "existeLogotipo": false, + "indexador": "IPCA", + "disponivel": 1.01, + "documento": "7658493485", + "codigoIsin": "VRSIYASU@", + "valorBruto": 2038, + "nomeTituloPublico": "Tesouro IPCA+ 2024", + "valorLiquido": 29882, + "percRentabilidadeContratada": 4.71 + } + ], + "totalPosicao": 22000, + "totalItemsPagina": 5 } - ] + ], + "detalheStatusCode": 0, + "excecoes": [] } ``` +#### getPositionDetail(_id_, _category_, _type_) +Retorna o detalhe de uma posição da lista anterior. -#### getDividends(_date_) -Método que processa todos os dados disponíveis sobre proventos recebidos em um período e retorna como uma lista. Usualmente os proventos disponíveis na página do CEI são os creditados no mês atual e os já anunciados pela empresas com e sem data definida. Registros com date igual `null` são de proventos anunciados mas sem data definida de pagamento. +| Parâmetro | Tipo | Default | Descrição | +|----------------|--------|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **_id_** | String | _undefined_ | UUID da posição. Foi observado que o UUID de uma mesma posição pode mudar ao longo do tempo e essa requisição falhar após pega a lista com o `getPosition()` | +| **_category_** | String | _undefined_ | Categoria da posição informada no método `getPosition()`. | +| **_type_** | String | _undefined_ | Tipo da posição informada no método `getPosition()`. | -Além disso, caso existam eventos de desdobramento de ações, eles serão retornados em uma propriedade específica. -```javascript -let dividends = await ceiCrawler.getDividends(date); -``` -Resultado: -```javascript -[ - { - "institution": "1099 - INTER DTVM LTDA", - "account": "12345", - "futureEvents": [ - { - "stock": "BANCO INTER", - "stockType": "PN N2", - "code": "BIDI4", - "date": "2020-08-20T03:00:00.000Z", - "type": "JUROS SOBRE CAPITAL PRÓPRIO", - "quantity": 200, - "factor": 1, - "grossValue": 7.88, - "netValue": 5.8 - }, - { - "stock": "CIA HERING", - "stockType": "ON NM", - "code": "HGTX3", - "date": null, - "type": "JUROS SOBRE CAPITAL PRÓPRIO", - "quantity": 100, - "factor": 1, - "grossValue": 21.96, - "netValue": 18.67 - }, - ], - "pastEvents": [ - { - "stock": "ITAUSA", - "stockType": "PN N1", - "code": "ITSA4", - "date": "2020-07-01T03:00:00.000Z", - "type": "DIVIDENDO", - "quantity": 300, - "factor": 1, - "grossValue": 6, - "netValue": 6 - } - ] - }, - { - "institution": "386 - RICO INVESTIMENTOS - GRUPO XP", - "account": "12345", - "futureEvents": [], - "pastEvents": [ - { - "stock": "FII CSHG LOG", - "stockType": "CI", - "code": "HGLG11", - "date": "2020-07-14T03:00:00.000Z", - "type": "RENDIMENTO", - "quantity": 100, - "factor": 1, - "grossValue": 78, - "netValue": 78 - } - ], - "splitEvents": [ - { - "stock": "B3", - "stockType": "ON NM", - "code": "B3SA3", - "type": "DESDOBRAMENTO DE AÇÕES", - "date": "2021-05-18T03:00:00.000Z", - "baseQuantity": 49, - "factor": 1, - "destinationCode": "B3SA3", - "quantity": 98, - "eventValue": 200, - "exerciseValue": 0 - } - ] - } -] -``` -#### getDividendsOptions() -Retorna as opções dos formulários da página de proventos ```javascript -const dividendsOptions = await ceiCrawler.getDividendsOptions(); +let positionDetail = await ceiCrawler.getPositionDetail('gfw2455-8a79-4127-990b-587sa37', 'RendaVariavel', 'Acao'); ``` Resultado: ```javascript { - "minDate": "27/07/2020", - "maxDate": "31/07/2020", - "institutions": [ - { - "value": "123", - "label": "123 - RICO INVESTIMENTOS - GRUPO XP", - "accounts": [ - "12345" - ] - }, - { - "value": "321", - "label": "321 - INTER DTVM LTDA", - "accounts": [ - "54321" - ] - } - ] + "codigoIsin": "BRBIDIACNPR0", + "distribuicao": "114", + "empresa": "BANCO INTER S.A.", + "escriturador": "BANCO BRADESCO S/A", + "codigoNegociacao": "BIDI4", + "disponivel": 100, + "indisponivel": 0, + "quantidade": 100, + "marcacoes": [], + "possuiMarcacoes": false, + "existeLogotipo": false, + "documentoInstituicao": "358743882" } ``` -#### getTreasure(_date_) -Método que processa todos os dados disponíveis sobre Tesouro Direto em um período e retorna como uma lista e também uma lista das transações. -```javascript -let treasures = await ceiCrawler.getTreasures(date); -``` -Resultado: -```javascript -[ - { - "institution": "3 - XP INVESTIMENTOS CCTVM S/A", - "account": "123456", - "treasures": [ - { - "code": "Tesouro IPCA+ 2045", - "expirationDate": "2045-05-15T03:00:00.000Z", - "investedValue": 12.34, - "grossValue": 13.43, - "netValue": 10.12, - "quantity": 0.01, - "blocked": 0, - "transactions": [ - { - "tradeDate": "2020-11-27T03:00:00.000Z", - "quantity": 0.01, - "price": 1234.56, - "notional": 12.34, - "profitability": "IPCA + 4,05%", - "grossProfitability": "IPCA + 566,89%", - "grossProfitabilityPercent": 12.34, - "grossValue": 45.67, - "investmentTerm": 18, - "taxBracket": 23.4, - "taxIrValue": 0.12, - "taxIofValue": 1.94, - "feeB3Value": 0, - "feeInstitutionValue": 0, - "netValue": 42.67 - } - ] - } - ] - } -] -``` -#### getTreasureOptions() -Retorna as opções dos formulários da página de tesouro direto +#### getAccountStatement(_startDate_, _endDate_, _page_) +Retorna as movimentações da aba "Movimentação" no CEI. + +| Parâmetro | Tipo | Default | Descrição | +|-----------------|--------|---------|-----------------------------------------------------------------------------------------------------------------------------| +| **_startDate_** | Date | _null_ | Data de inicio para trazer as movimentações. Caso `null`, será utilizada a ultima data de processamento do CEI menos 1 mês. | +| **_endDate_** | Date | _null_ | Data fim para trazer as movimentações. Caso `null`, será utilizada a ultima data de processamento do CEI. | +| **_page_** | Number | 1 | Paginação dos dados. Por default retorna a primeira página. | + ```javascript -const treasureOptions = await ceiCrawler.getTreasureOptions(); +let accountStatement = await ceiCrawler.getAccountStatement(); ``` Resultado: ```javascript { - "institutions": [ + "paginaAtual": 1, + "totalPaginas": 2, + "itens": [ { - "value": "123", - "label": "123 - RICO INVESTIMENTOS - GRUPO XP", - "accounts": [ - "12345" - ] + "data": "2021-08-02T00:00:00", + "movimentacoes": [ + { + "tipoOperacao": "Credito", + "tipoMovimentacao": "Juros Sobre Capital Próprio", + "nomeProduto": "BIDI4 - BANCO INTER S.A.", + "instituicao": "INTER DISTRIBUIDORA DE TITULOS E VALORES MOBILIARIOS LTDA", + "quantidade": 100, + "valorOperacao": 1.49, + "precoUnitario": 0.01 + } + ], + "totalItemsPagina": 1 }, { - "value": "321", - "label": "321 - INTER DTVM LTDA", - "accounts": [ - "54321" - ] + "data": "2021-07-30T00:00:00", + "movimentacoes": [ + { + "tipoOperacao": "Debito", + "tipoMovimentacao": "Transferência", + "nomeProduto": "ALZR11 - ALIANZA TRUST RENDA IMOBILIARIA FDO INV IMOB", + "instituicao": "RICO INVESTIMENTOS - GRUPO XP", + "quantidade": 5 + }, + { + "tipoOperacao": "Credito", + "tipoMovimentacao": "Transferência", + "nomeProduto": "ALZR11 - ALIANZA TRUST RENDA IMOBILIARIA FDO INV IMOB", + "instituicao": "XP INVESTIMENTOS CCTVM S/A", + "quantidade": 5 + } + ], + "totalItemsPagina": 2 } - ] + ], + "detalheStatusCode": 0, + "excecoes": [] } ``` -#### getIPOTransactions(startDate,endDate) -Retorna as transações de participação de ofertas públicas no período \[startDate,endDate\] -```javascript -const transactions = await getIPOTransactions(CeiUtils.getDateFromInput('03/09/2020'),CeiUtils.getDateFromInput('14/09/2020')); -``` -Resultado: -```javascript -[ - { - "institution": "114 - ITAU CV S/A", - "date": "2020-09-03T15:00:00.000Z", - "transactions": [ - { - "company": "EMPREENDIMEN", - "offerName": "PAGUE MENOS CVM 400", - "code": "PGMN3L", - "isin": "BRPGMNACNOR8", - "type": "OUTRO", - "buyMethod": "Varejo sem alocação prioritária", - "reservedAmount": 0, - "reservedValue": 10000, - "maxPrice": 0, - "price": 8.5, - "allocAmount": 55, - "allocValue": 467.5, - "date": "2020-09-02T03:00:00.000Z" - } - ] - }, - { - "institution": "114 - ITAU CV S/A", - "date": "2020-09-14T15:00:00.000Z", - "transactions": [ - { - "company": "PET CENTER C", - "offerName": "PET CENTER CVM 400", - "code": "PETZ3L", - "isin": "BRPETZACNOR2", - "type": "OUTRO", - "buyMethod": "Varejo com Lock up", - "reservedAmount": 0, - "reservedValue": 10000, - "maxPrice": 0, - "price": 13.75, - "allocAmount": 233, - "allocValue": 3203.75, - "date": "2020-09-11T03:00:00.000Z" - } - ] - } -] -``` + ## Opções Na criação de um `CeiCrawler` é possivel especificar alguns valores para o parâmetro `options` que modificam a forma que o crawler funciona. As opções são: diff --git a/src/lib/AccountStatementCrawler.js b/src/lib/AccountStatementCrawler.js index b8b033a..7c04867 100644 --- a/src/lib/AccountStatementCrawler.js +++ b/src/lib/AccountStatementCrawler.js @@ -9,12 +9,12 @@ const URLS = { class AccountStatementCrawler { /** - * Get data from the position screen + * Crawls the tab "Movimentacao" at CEI * @param {Date} startDate - The start date of the range * @param {Date} endDate - The end date of the range * @param {Number} page - The page of the data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - List of Stock histories + * @returns {Promise} - The account statement */ static async getAccountStatement(startDate, endDate, page, options = {}) { const lastExecution = options.lastExecutionInfo.generalDate; diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 74f8c50..0ea9cb7 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -72,9 +72,7 @@ class CeiCrawler { /** * Returns the consolidated values - * @param {Date} [date] - The date of the position - * @param {Number} [page=1] - The page of the data - * @returns {Promise} - List of Stock histories + * @returns {Promise} - The consolidated values */ async getConsolidatedValues() { await this._login(); @@ -82,18 +80,19 @@ class CeiCrawler { } /** - * Returns the wallet position + * Crawls the tab "Posição" at CEI * @param {Date} [date] - The date of the position * @param {Number} [page=1] - The page of the data - * @returns {Promise} - List of Stock histories + * @returns {Promise} - Wallet positions */ async getPosition(date = null, page = 1) { await this._login(); + return await PositionCrawler.getPosition(date, page, this.options); } /** - * Returns the detail of a position given by CEI. The format of the position differs from type to type + * Crawls the detail of an item on tab "Posição" * @param {String} id - The UUID of the position given by CEI * @param {String} category - The category of the position * @param {String} type - The type of the position @@ -106,11 +105,11 @@ class CeiCrawler { } /** - * Returns the account statement + * Crawls the tab "Movimentacao" at CEI * @param {Date} [startDate] - The start date to filter * @param {Date} [endDate] - The end date to filter * @param {Number} [page=1] - The page of the data - * @returns {Promise} - List of Stock histories + * @returns {Promise} - The account statement */ async getAccountStatement(startDate = null, endDate = null, page = 1) { await this._login(); @@ -169,10 +168,14 @@ class CeiCrawler { * @returns {Any} */ async getIPODetail(id) { + var a = this.test() + a.itens[0].name await this._login(); return await IpoCrawler.getIPODetail(id, this.options); } + + } module.exports = CeiCrawler; diff --git a/src/lib/ConsolidatedValueCrawler.js b/src/lib/ConsolidatedValueCrawler.js index cf56ee8..d8fb36f 100644 --- a/src/lib/ConsolidatedValueCrawler.js +++ b/src/lib/ConsolidatedValueCrawler.js @@ -1,5 +1,4 @@ const typedefs = require("./typedefs"); -const CeiUtils = require('./CeiUtils'); const AxiosWrapper = require('./AxiosWrapper'); const URLS = { @@ -11,7 +10,7 @@ class ConsolidatedValueCrawler { /** * Get consolidated data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - List of Stock histories + * @returns {Promise} - The consolidated values */ static async getConsolidatedValues(options = {}) { diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js index abfe032..f12fba4 100644 --- a/src/lib/PositionCrawler.js +++ b/src/lib/PositionCrawler.js @@ -16,7 +16,7 @@ class PositionCrawler { * @param {Date} date - The date of the wallet. If none passed, the default of CEI will be used * @param {Number} page - The page of the data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise<{}>} - List of Stock histories + * @returns {Promise} - Wallet positions */ static async getPosition(date, page, options = {}) { const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); @@ -37,12 +37,12 @@ class PositionCrawler { } /** - * Returns the detail of the given position + * Crawls the detail of an item on tab "Posição" * @param {String} id - The UUID of the position given by CEI * @param {String} category - The category of the position * @param {String} type - The type of the position * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler - * @returns {Any} + * @returns {Any} */ static async getPositionDetail(id, category, type, options = {}) { if (options.debug) diff --git a/src/lib/typedefs.js b/src/lib/typedefs.js index 423feac..0eb78e8 100644 --- a/src/lib/typedefs.js +++ b/src/lib/typedefs.js @@ -34,197 +34,54 @@ */ /** - * @typedef StockOperation - * @property {Date} date - Date of the operation - * @property {String} operation - The operation C (buy) or V (sell) - * @property {String} market - The market the operation happened - * @property {String} expiration - The expiration of the operation - * @property {String} code - The code of the stock - * @property {String} name - Full name of the stock - * @property {Number} quantity - Quantity of stock bought - * @property {Number} price - Price payed for each stock - * @property {Number} totalValue - The total value payed - * @property {Number} quotationFactor - The quotation factor for the stock - * @memberof typdefs + * @typedef ConsolidatedSubValues + * @property {String} categoriaProduto - The category of the product + * @property {Number} totalPosicao - The total amount allocated in that category + * @property {Number} percentual - The percentage of that category over the total */ /** -* @typedef StockHistory -* @property {String} institution - Name of the institution -* @property {String} account - The institution's account number -* @property {StockOperation[]} stockHistory - List of operations for this institution and account -* @property {SummaryStockOperation[]} summaryStockHistory - List of operations for this institution and account -* @memberof typdefs -*/ - -/** - * @typedef SummaryStockOperation - * @property {String} code - The code of the summary stock - * @property {String} period - The period of the summary stock - * @property {Number} buyAmount - Purchase amount of the summary stock - * @property {Number} saleAmount - Sale amount of the summary stock - * @property {Number} averageBuyPrice - Average buy price of the summary stock - * @property {Number} averageSalePrice - Average sale price of the summary stock - * @property {Number} quantityNet - Quantity net of the summary stock - * @property {String} position - The position of the summary stock - * @memberof typdefs + * @typedef ConsolidatedValues + * @property {Number} total - The total amount allocated + * @property {ConsolidatedSubValues[]} subTotais - The total drilled into categories */ /** - * @typedef StockHistoryOptions - * @property {String} minDate - Minimum date which data is available - * @property {String} maxDate - Maximum date which data is available - * @property {InstitutionOption[]} institutions - Array of available institutions with its accounts + * @template T + * @typedef CeiListData + * @property {T[]} itens - The itens of the data + * @property {Number} detalheStatusCode - The status code of details + * @property {Any[]} excecoes - Exceptions + * @property {Number} paginaAtual - The number of the actual page + * @property {Number} totalPaginas - The total amount of pages to query */ /** -* @typedef InstitutionOption -* @property {String} label - Label for the institution option -* @property {String} value - Option value for institution -* @property {String[]} accounts - Accounts available for the given institution -*/ - -/** - * @typedef DividendData - * @property {String} institution - Name of the institution - * @property {String} account - The institution's account number - * @property {DividendEvent[]} futureEvents - List of future dividend events - * @property {DividendEvent[]} pastEvents - List of past dividend events - * @memberof typdefs + * @typedef PositionCategory + * @property {String} categoriaProduto - The category of the positions + * @property {String} tipoProduto - The type of the positions + * @property {String} descricaoTipoProduto - Positions description + * @property {Number} totalItemsPagina - The total of items in this category + * @property {Number} totalPosicao - The total amount of this category + * @property {Any[]} posicoes - The positions in this category */ /** - * @typedef DividendEvent - * @property {String} stockType - Type of Stock (ON, PN, CI) - * @property {String} code - The code of the stock - * @property {Date} date - Dividend payment date (can be a future date for scheduled payment) - * @property {String} type - Dividend type (Rendimento, JPC, Dividendo) - * @property {Number} quantity - Quantity of stock dividend is based - * @property {Number} factor - Multiply factor for each stock unit - * @property {Number} grossValue - Dividend value before taxes - * @property {Number} netValue - Dividend value after taxes - * @memberof typdefs + * @typedef AccountStatementEntry + * @property {String} tipoOperacao - The type of operation: "Credito" or "Debito" + * @property {String} tipoMovimentacao - The type of the transaction + * @property {String} nomeProduto - The name of the product related to the entry + * @property {String} instituicao - The broker where the entry happened + * @property {Number} quantidade - The quantity related to the entry + * @property {Number} valorOperacao - The value of the entry + * @property {number} precoUnitario - The unit price for the entry */ /** -* @typedef DividendsOptions -* @property {String} minDate - Minimum date which data is available -* @property {String} maxDate - Maximum date which data is available -* @property {InstitutionOption[]} institutions - Array of available institutions with its accounts -*/ - -/** -* @typedef AccountWallet -* @property {String} institution - Name of the institution -* @property {String} account - The institution's account number -* @property {StockWalletItem[]} stockWallet - List of stocks in the wallet in the given account and institution -* @property {NationalTreasuryItem[]} nationalTreasuryWallet - List of stocks in the wallet in the given account and institution -* @memberof typdefs -*/ - -/** -* @typedef StockWalletItem -* @property {String} company - The name of the company for the given stock -* @property {String} stockType - The type of the stock -* @property {String} code - The code of the stock -* @property {String} isin - The ISIN code of the stock -* @property {Number} price - The last price reported for the stock in the last day -* @property {String} quantity - The quantitu in the wallet for the stock -* @property {Number} quotationFactor - The quotation factor for the stock -* @property {Number} totalValue - The total value of that stock in your wallet, given the last price -* @memberof typdefs -*/ - -/** -* @typedef NationalTreasuryItem -* @property {String} code - The code of the national treasury -* @property {String} expirationDate - The expiration date of that item -* @property {String} investedValue - The value invested -* @property {String} grossValue - The gross value now -* @property {Number} netValue - The net value now -* @property {Number} quantity - The quantity of that treasury -* @property {Number} blocked - The quantity blocked of that treasury -* @memberof typdefs -*/ - -/** - * @typedef WalletOptions - * @property {String} minDate - Minimum date which data is available - * @property {String} maxDate - Maximum date which data is available - * @property {InstitutionOption[]} institutions - Array of available institutions with its accounts - */ - -/** - * @typedef TreasureTransactionItem - * @property {String} tradeDate - The application date of that transaction - * @property {String} quantity - The quantity date of that transaction - * @property {String} price - The price of that transaction - * @property {String} notional - The notional that transaction - * @property {Number} profitability - The profitability that transaction - * @property {Number} grossProfitability - The gross profitability that transaction - * @property {Number} grossProfitabilityPercent - The gross profitability in percent that transaction - * @property {Number} grossValue - The gross value that transaction - * @property {Number} investmentTerm - The investment term that transaction - * @property {Number} taxBracket - The tax bracket that transaction - * @property {Number} taxIrValue - The tax IR value that transaction - * @property {Number} taxIofValue - The tax IOF value that transaction - * @property {Number} feeB3Value - The fee B3 value that transaction - * @property {Number} feeInstitutionValue - The fee Finance Institution that transaction - * @property {Number} netValue - The new value that transaction - * @memberof typdefs - */ - -/** - * @typedef TreasureItem - * @property {String} code - The code of the national treasury - * @property {String} expirationDate - The expiration date of that item - * @property {String} investedValue - The value invested - * @property {String} grossValue - The gross value now - * @property {Number} netValue - The net value now - * @property {Number} quantity - The quantity of that treasury - * @property {Number} blocked - The quantity blocked of that treasury - * @property {TreasureTransactionItem[]} transactions - The transactions of that treasury - * @memberof typdefs - */ - -/** - * @typedef TreasureOptions - * @property {String} minDate - Minimum date which data is available - * @property {String} maxDate - Maximum date which data is available - * @property {InstitutionOption[]} institutions - Array of available institutions with its accounts - */ - - /** - * @typedef IPOTransaction - * @property {String} company - The name of the company - * @property {String} offerName - The IPO offer name - * @property {String} code - The company code in the IPO - * @property {String} isin - The IPO ISIN - * @property {String} type - The type of the IPO - * @property {String} buyMethod - The buy method of the operation - * @property {Number} reservedAmount - The reserved amount in the IPO - * @property {Number} reservedValue - The reserved value - * @property {Number} maxPrice - Max price the IPO got - * @property {Number} price - Current price - * @property {Number} allocAmount - The amount allocated - * @property {Number} allocValue - The value allocated - * @property {Date} date - The date of the reservation - * @memberof typdefs - */ - - /** - * @typedef IPOData - * @property {String} institution - The name of the institution the operation happened - * @property {Date} date - The date of the operations - * @property {IPOTransaction[]} transactions - The IPO transactions for this institution and date - * @memberof typdefs - */ - - /** - * @typedef IPOOptions - * @property {String} minDate - Minimum date which data is available - * @property {String} maxDate - Maximum date which data is available - * @property {InstitutionOption[]} institutions - Array of available institutions with its accounts + * @typedef AccountStatement + * @property {String} data - The date of the statement entries + * @property {Number} totalItemsPagina - The number of items at the date + * @property {AccountStatementEntry[]} movimentacoes - The entries for this date */ exports.unused = {}; From 1a85a7c824064c3723face7750190c5a986dfb10 Mon Sep 17 00:00:00 2001 From: Menighin Date: Thu, 5 Aug 2021 22:40:03 -0300 Subject: [PATCH 13/17] Documenting more... --- README.md | 198 +++++++++++++++++++++++++++- src/lib/CeiCrawler.js | 46 +++---- src/lib/IpoCrawler.js | 9 +- src/lib/ProvisionedEventsCrawler.js | 17 ++- src/lib/StockTransactionsCrawler.js | 10 +- src/lib/typedefs.js | 110 ++++++++++++++++ 6 files changed, 340 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index f847235..fb64813 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ ceiCrawler.login(); // Login é opcional, pois antes de cada método o cei-crawl ``` ### Métodos disponíveis -#### getConsolidatedValues() +#### `getConsolidatedValues()` Retorna os investimentos consolidados num valor total e divididos em subcategorias ```javascript @@ -74,7 +74,7 @@ Resultado: } ``` -#### getPosition(_date_, _page_) +#### `getPosition(_date_, _page_)` Retorna as posições da tela "Posição" em todas as categorias de investimentos. | Parâmetro | Tipo | Default | Descrição | @@ -157,12 +157,12 @@ Resultado: } ``` -#### getPositionDetail(_id_, _category_, _type_) +#### `getPositionDetail(_id_, _category_, _type_)` Retorna o detalhe de uma posição da lista anterior. | Parâmetro | Tipo | Default | Descrição | |----------------|--------|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **_id_** | String | _undefined_ | UUID da posição. Foi observado que o UUID de uma mesma posição pode mudar ao longo do tempo e essa requisição falhar após pega a lista com o `getPosition()` | +| **_id_** | String | _undefined_ | UUID da posição. Foi observado que o UUID de uma mesma posição pode mudar ao longo do tempo e essa requisição falhar após pegar a lista com o `getPosition()` | | **_category_** | String | _undefined_ | Categoria da posição informada no método `getPosition()`. | | **_type_** | String | _undefined_ | Tipo da posição informada no método `getPosition()`. | @@ -187,7 +187,7 @@ Resultado: } ``` -#### getAccountStatement(_startDate_, _endDate_, _page_) +#### `getAccountStatement(_startDate_, _endDate_, _page_)` Retorna as movimentações da aba "Movimentação" no CEI. | Parâmetro | Tipo | Default | Descrição | @@ -246,6 +246,194 @@ Resultado: } ``` +#### `getIpos(_date_, _page_)` +Retorna os IPOs da tela "Ofertas Públicas" no CEI. + +| Parâmetro | Tipo | Default | Descrição | +|------------|--------|---------|--------------------------------------------------------------------------------------------------------------| +| **_date_** | Date | _null_ | Data de consulta. Caso seja passado _null_ ou nenhum valor, será usada a ultima data de processamento do CEI.| +| **_page_** | Number | 1 | Paginação dos dados. Por default retorna a primeira página. | + +```javascript +let ipos = await ceiCrawler.getIPOs(); +``` +Resultado: +```javascript +{ + "paginaAtual": 1, + "totalPaginas": 1, + "itens": [ + { + "data": "2021-07-27T00:00:00", + "ofertasPublicas": [ + { + "id": "c80c8b8f-62d2-4b48-b242-b0f310cfa95a", + "dataLiquidacao": "2021-07-27T00:00:00", + "nomeEmpresa": "INVESTO ETF MSCI US TECHNOLOGY FDO INV IND INV EXT", + "tipoOferta": "OUTRO", + "oferta": "ETF INVESTO", + "nomeInstituicao": "INTER DISTRIBUIDORA DE TITULOS E VALORES MOBILIARIOS LTDA", + "quantidade": 10, + "preco": 10, + "valor": 100 + } + ], + "totalItemsPagina": 1 + } + ], + "detalheStatusCode": 0, + "excecoes": [] +} +``` + +#### `getIPODetail(_id_)` +Retorna o detalhe de uma posição da lista anterior. + +| Parâmetro | Tipo | Default | Descrição | +|----------------|--------|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **_id_** | String | _undefined_ | UUID do IPO. Foi observado que o UUID de um mesmo IPO pode mudar ao longo do tempo e essa requisição falhar após pegar a lista com o `getIPOs()` | + +```javascript +let ipoDetail = await ceiCrawler.getIPODetail('c80c8b8f-62d2-4b48-b242-b0f310cfa95a'); +``` +Resultado: +```javascript +{ + "nomeProduto": "OUTRO INVESTO ETF MSCI US TECHNOLOGY FDO INV IND INV EXT", + "nomeInstituicao": "INTER DISTRIBUIDORA DE TITULOS E VALORES MOBILIARIOS LTDA", + "ativo": { + "nomeEmpresa": "INVESTO ETF MSCI US TECHNOLOGY FDO INV IND INV EXT", + "ticker": "USTK11L", + "oferta": "ETF INVESTO", + "codigoIsin": "BRUSTKCTF007" + }, + "valores": { + "preco": 10, + "precoMaximo": 0, + "valor": 100 + }, + "reserva": { + "modalidade": "Compra/Integralização de cotas do ETF INVESTO", + "quantidade": 10, + "valor": 0 + }, + "quantidadeAlocada": 10, + "dataLiquidacao": "2021-07-27T00:00:00" +} +``` + +#### `getStockTransactions(_startDate_, _endDate_, _page_)` +Retorna os dados da aba "Negociação" no CEI. + +| Parâmetro | Tipo | Default | Descrição | +|-----------------|--------|---------|-----------------------------------------------------------------------------------------------------------------------------| +| **_startDate_** | Date | _null_ | Data de inicio para trazer as negociações. Caso `null`, será utilizada a ultima data de processamento do CEI menos 1 mês. | +| **_endDate_** | Date | _null_ | Data fim para trazer as negociações. Caso `null`, será utilizada a ultima data de processamento do CEI. | +| **_page_** | Number | 1 | Paginação dos dados. Por default retorna a primeira página. | + +```javascript +let stockTransactions = await ceiCrawler.getStockTransactions(); +``` +Resultado: +```javascript +{ + "paginaAtual": 1, + "totalPaginas": 1, + "itens": [ + { + "data": "2021-07-20T00:00:00", + "totalCompra": 1000, + "totalVenda": 0, + "negociacaoAtivos": [ + { + "tipoMovimentacao": "Compra", + "mercado": "Mercado à Vista", + "nomeInstituicao": "RICO INVESTIMENTOS - GRUPO XP", + "codigoNegociacao": "PNVL3", + "quantidade": 100, + "preco": 10.00, + "valor": 1000.00 + } + ], + "totalItemsPagina": 1 + } + ], + "detalheStatusCode": 0, + "excecoes": [] +} +``` + +#### `getProvisionedEvents(_date_, _page_)` +Retorna os eventos da tela "Eventos Provisionados" no CEI. + +| Parâmetro | Tipo | Default | Descrição | +|------------|--------|---------|--------------------------------------------------------------------------------------------------------------| +| **_date_** | Date | _null_ | Data de consulta. Caso seja passado _null_ ou nenhum valor, será usada a ultima data de processamento do CEI.| +| **_page_** | Number | 1 | Paginação dos dados. Por default retorna a primeira página. | + +```javascript +let events = await ceiCrawler.getProvisionedEvents(); +``` +Resultado: +```javascript +{ + "totalValorLiquido": 8.62, + "paginaAtual": 1, + "totalPaginas": 1, + "itens": [ + { + "id": "9cc87804-f9ae-143a-acfb-c953f38c72dd", + "produto": "WEGE3 - WEG S/A", + "tipo": "ON", + "tipoEvento": "JUROS SOBRE CAPITAL PRÓPRIO", + "previsaoPagamento": "2021-08-11T00:00:00", + "instituicao": "INTER DISTRIBUIDORA DE TITULOS E VALORES MOBILIARIOS LTDA", + "quantidade": 300, + "precoUnitario": 0.03, + "valorLiquido": 8.62, + "totalItemsPagina": 1 + } + ], + "detalheStatusCode": 0, + "excecoes": [] +} +``` + +#### `getProvisionedEventDetail(_id_)` +Retorna o detalhe de um evento provisionado da lista anterior. + +| Parâmetro | Tipo | Default | Descrição | +|----------------|--------|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **_id_** | String | _undefined_ | UUID do evento. Foi observado que o UUID de um mesmo evento pode mudar ao longo do tempo e essa requisição falhar após pegar a lista com o `getProvisionedEvents()`| + +```javascript +let eventDetail = await ceiCrawler.getProvisionedEventDetail('9cc87804-f9ae-143a-acfb-c953f38c72dd'); +``` +Resultado: +```javascript +{ + "codigoNegociacao": "WEGE3", + "codigoIsin": "BRWEGEACNOR0", + "distribuicao": "202", + "escriturador": "BANCO BRADESCO S/A", + "empresa": "WEG S/A", + "dataAprovacao": "2021-03-23T00:00:00", + "dataAtualizacao": "2021-03-30T00:00:00", + "dataEx": "2021-03-29T00:00:00", + "impostoRenda": 15, + "valorImpostoRenda": 1.52, + "valorBruto": 10.14, + "disponivel": 100, + "indisponivel": 0, + "produto": "WEGE3 - WEG S/A", + "tipo": "ON", + "tipoEvento": "JUROS SOBRE CAPITAL PRÓPRIO", + "previsaoPagamento": "2021-08-11T00:00:00", + "quantidade": 100, + "precoUnitario": 0.03, + "valorLiquido": 8.62 +} +``` ## Opções Na criação de um `CeiCrawler` é possivel especificar alguns valores para o parâmetro `options` que modificam a forma que o crawler funciona. As opções são: diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 0ea9cb7..2f2a157 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -117,11 +117,11 @@ class CeiCrawler { } /** - * Returns the stock transactions + * Crawls the tab "Negociacao" * @param {Date} [startDate] - The start date to filter * @param {Date} [endDate] - The end date to filter * @param {Number} [page=1] - The page of the data - * @returns {Promise} - List of Stock histories + * @returns {Promise} - Stock transactions */ async getStockTransactions(startDate = null, endDate = null, page = 1) { await this._login(); @@ -129,53 +129,47 @@ class CeiCrawler { } /** - * Returns the provisioned events for the given date + * Crawls the tab "Ofertas Públicas" * @param {Date} [date] - The date for the provisioned events * @param {Number} [page=1] - The page of the data - * @returns {Promise} - List of Stock histories + * @returns {Promise} - List of IPO operations */ - async getProvisionedEvents(date = null, page = 1) { + async getIPOs(date = null, page = 1) { await this._login(); - return await ProvisionedEventsCrawler.getProvisionedEvents(date, page, this.options); + return await IpoCrawler.getIPOs(date, page, this.options); } /** - * Returns the detail of a provisioned event - * @param {String} id - The UUID of the provisioned event - * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler - * @returns {Any} + * Crawls the detail of a line at the tab "Ofertas Públicas" + * @param {String} id - The UUID of the IPO event + * @returns {Promise} - The detailed information of the IPO */ - async getProvisionedEventDetail(id) { + async getIPODetail(id) { await this._login(); - return await ProvisionedEventsCrawler.getProvisionedEventDetails(id, this.options); + return await IpoCrawler.getIPODetail(id, this.options); } /** - * Returns the IPOs + * Crawls the tab "Eventos provisionados" * @param {Date} [date] - The date for the provisioned events * @param {Number} [page=1] - The page of the data - * @returns {Promise} - List of Stock histories + * @returns {Promise} - List of provisioned events */ - async getIPOs(date = null, page = 1) { + async getProvisionedEvents(date = null, page = 1) { await this._login(); - return await IpoCrawler.getIPOs(date, page, this.options); + return await ProvisionedEventsCrawler.getProvisionedEvents(date, page, this.options); } /** - * Returns the detail of an IPO - * @param {String} id - The UUID of the IPO event - * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler - * @returns {Any} + * Crawls the detail of a line at the tab "Eventos provisionados" + * @param {String} id - The UUID of the provisioned event + * @returns {Provise} - The detailed information of the event */ - async getIPODetail(id) { - var a = this.test() - a.itens[0].name + async getProvisionedEventDetail(id) { await this._login(); - return await IpoCrawler.getIPODetail(id, this.options); + return await ProvisionedEventsCrawler.getProvisionedEventDetails(id, this.options); } - - } module.exports = CeiCrawler; diff --git a/src/lib/IpoCrawler.js b/src/lib/IpoCrawler.js index 01c5088..729b98d 100644 --- a/src/lib/IpoCrawler.js +++ b/src/lib/IpoCrawler.js @@ -34,13 +34,12 @@ class IpoCrawler { return response; } + /** - * Returns the detail of the given position - * @param {String} id - The UUID of the position given by CEI - * @param {String} category - The category of the position - * @param {String} type - The type of the position + * Crawls the detail of a line at the tab "Ofertas Públicas" + * @param {String} id - The UUID of the IPO event * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler - * @returns {Any} + * @returns {Promise} - The dailed information of the IPO */ static async getIPODetail(id, options = {}) { if (options.debug) diff --git a/src/lib/ProvisionedEventsCrawler.js b/src/lib/ProvisionedEventsCrawler.js index 23e9a36..e507cea 100644 --- a/src/lib/ProvisionedEventsCrawler.js +++ b/src/lib/ProvisionedEventsCrawler.js @@ -10,11 +10,11 @@ const URLS = { class ProvisionedEventsCrawler { /** - * Get data from the position screen - * @param {Date} date - The date of the wallet. If none passed, the default of CEI will be used - * @param {Number} page - The page of the data + * Crawls the tab "Eventos provisionados" + * @param {Date} [date] - The date for the provisioned events + * @param {Number} [page=1] - The page of the data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Any} - List of Stock histories + * @returns {Promise} - List of provisioned events */ static async getProvisionedEvents(date, page, options = {}) { const dateStr = CeiUtils.getDateForQueryParam(date || options.lastExecutionInfo.generalDate); @@ -34,13 +34,12 @@ class ProvisionedEventsCrawler { return response; } + /** - * Returns the detail of the given position - * @param {String} id - The UUID of the position given by CEI - * @param {String} category - The category of the position - * @param {String} type - The type of the position + * Crawls the detail of a line at the tab "Eventos provisionados" + * @param {String} id - The UUID of the provisioned event * @param {typedefs.CeiCrawlerOptions} options - Options for the crawler - * @returns {Any} + * @returns {Provise} - The detailed information of the event */ static async getProvisionedEventDetails(id, options = {}) { if (options.debug) diff --git a/src/lib/StockTransactionsCrawler.js b/src/lib/StockTransactionsCrawler.js index 0e95647..a6cb2b2 100644 --- a/src/lib/StockTransactionsCrawler.js +++ b/src/lib/StockTransactionsCrawler.js @@ -9,12 +9,12 @@ const URLS = { class StockTransactionsCrawler { /** - * Get data from the position screen - * @param {Date} startDate - The start date of the range - * @param {Date} endDate - The end date of the range - * @param {Number} page - The page of the data + * Crawls the tab "Negociacao" + * @param {Date} [startDate] - The start date to filter + * @param {Date} [endDate] - The end date to filter + * @param {Number} [page=1] - The page of the data * @param {typedefs.CeiCrawlerOptions} [options] - Options for the crawler - * @returns {Promise} - List of Stock histories + * @returns {Promise} - Stock transactions */ static async getStockTransactions(startDate, endDate, page, options = {}) { const lastExecution = options.lastExecutionInfo.generalDate; diff --git a/src/lib/typedefs.js b/src/lib/typedefs.js index 0eb78e8..d90b39a 100644 --- a/src/lib/typedefs.js +++ b/src/lib/typedefs.js @@ -84,4 +84,114 @@ * @property {AccountStatementEntry[]} movimentacoes - The entries for this date */ +/** + * @typedef IPO + * @property {String} id - The ID of the entry at CEI + * @property {String} dataLiquidacao - The date which the operation was liquidated + * @property {String} nomeEmpresa - Name of the company offer + * @property {String} nomeInstituicao - Name of the broker + * @property {String} oferta - Name of the company offering + * @property {Number} preco - The unit price + * @property {Number} quantitade - The quantity requested + * @property {String} tipoOferta - The type of the offer + * @property {Number} valor - The total amount of the operation + */ + +/** + * @typedef IPODaily + * @property {String} data - The date of the transactions + * @property {IPO[]} ofertasPublicas - The IPOs for that date + * @property {Number} totalItemsPagina - The total quantity of items in the page + */ + +/** + * @typedef IPOAsset + * @property {String} nomeEmpresa - The name of the company + * @property {String} ticker + * @property {String} oferta + * @property {String} codigoIsin + */ + +/** + * @typedef IPOValues + * @property {Number} price - The unit price for the asset + * @property {Number} precoMaximo - The max price set + * @property {Number} valor - The value of the operation + */ + +/** + * @typedef IPOReservation + * @property {String} modalidade + * @property {Number} quantidade + * @property {Number} valor + */ + +/** + * @typedef IPODetail + * @property {String} nomeProduto + * @property {String} nomeInstituicao - The name of the broker used + * @property {IPOAsset} ativo - Information regarding the IPO asset + * @property {IPOValues} valores + * @property {IPOReservation} reserva - Information regarding the reservation + * @property {Number} quantidadeAlocada - The amount alocated + * @property {String} dataLiquidacao + */ + +/** + * @typedef StockTransaction + * @property {String} codigoNegociacao - The code of the stock + * @property {String} mercado - The market where it was negotiated + * @property {String} nomeInstituicao - The broker used in the negotiation + * @property {Number} preco - The stock unit price + * @property {Number} quantidade - The quantity negotiated + * @property {String} tipoMovimentacao - The type of the transaction: "Compra" or "Venda" + * @property {Number} valor - The total amount of the transaction + */ + +/** + * @typedef StockTransactionsDaily + * @property {String} data - The date of the transactions + * @property {StockTransaction[]} negociacaoAtivos - The transactions for that date + * @property {Number} totalCompra - The total amount bought on that day + * @property {Number} totalVenda - The total amount sold on that day + * @property {Number} totalItemsPagina - The total quantity of items in the page + */ + +/** + * @typedef ProvisionedEvent + * @property {String} id - The UUID of this event on CEI + * @property {String} instituicao - Broker related to this event + * @property {Number} precoUnitario + * @property {String} previsaoPagamento - Date which the event should happen + * @property {String} produto - Name of the asset related to the event + * @property {Number} quantidade - Quantity related to this event + * @property {String} tipo + * @property {String} tipoEvento - Type of the event. E.g.: Dividendo, Rendimento, etc + * @property {Number} valorLiquido - Value related to this event + */ + +/** + * @typedef ProvisionedEventDetail + * @property {String} codigoIsin + * @property {String} codigoNegociacao - The code of the asset + * @property {String} dataAprovacao + * @property {String} dataAtualizacao + * @property {String} dataEx + * @property {Number} disponivel + * @property {String} distribuicao + * @property {String} empresa + * @property {String} escriturador + * @property {Number} impostoRenda + * @property {Number} indisponivel + * @property {Number} precoUnitario + * @property {String} previsaoPagamento + * @property {String} produto + * @property {Number} quantidade + * @property {String} tipo + * @property {String} tipoEvento + * @property {Number} valorBruto + * @property {Number} valorImpostoRenda + * @property {Number} valorLiquido + */ + exports.unused = {}; From 3ca3c695b42154c2eeeedeedae1e066d5b6319cd Mon Sep 17 00:00:00 2001 From: Menighin Date: Fri, 6 Aug 2021 00:07:19 -0300 Subject: [PATCH 14/17] Documenting yet... --- README.md | 79 ++++++++++++++++++++++++++++++-------- package.json | 7 +--- src/lib/CeiLoginService.js | 2 +- 3 files changed, 64 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index fb64813..358de25 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,19 @@ Crawler para ler dados do Canal Eletrônico do Investidor +## __Importante__ +Para versão antiga do CEI que não possui captcha obrigatório (por enquanto), utilize o [cei-crawler v2](https://github.com/Menighin/cei-crawler/tree/v2) + ## Descrição -O `cei-crawler` utiliza as seguintes dependências: -* [cheerio](https://github.com/cheeriojs/cheerio) para fazer o parse do HTML. -* [node-fetch](https://github.com/node-fetch/node-fetch) para fazer as requisições -* [abort-controller](https://github.com/mysticatea/abort-controller) para controlar o timeout das requisições -* [tough-cookie](https://github.com/salesforce/tough-cookie) para auxiliar no gerenciamento dos cookies -* [normalize-html-whitespace](https://www.npmjs.com/package/normalize-html-whitespace) para fazer a limpeza do HTML do CEI +Essa versão do crawler varre a [Nova Area Logada do CEI](https://www.investidor.b3.com.br/nova-area-logada?utm_source=cei&utm_medium=banner&utm_campaign=lancamento). +Essa área logada possui um captcha para que seja feito o login e por isso existem algumas estratégias de implementação para fazer o bypass do mesmo. +Além disso, o CEI agora possui uma API. Tudo que o crawler faz basicamente é encapsular as chamadas dessas API's. +Portanto, o formato dos dados vem direto do CEI, *não há* transformação feita por esse crawler. +Sendo assim, caso haja algo estranho ou errado nos dados retornados, provavelmente é o próprio CEI que está retornando. -Cada instância do `CeiCrawler` roda em um contexto separado, portante é possível realizar operações em usuários diferentes de forma simultânea +O `cei-crawler` utiliza as seguintes dependências: +* [puppeteer-core](https://www.npmjs.com/package/puppeteer-core) para navegar com o browser e resolver o captcha. +* [axios](https://www.npmjs.com/package/axios) para fazer as requisições http. ## Sponsor @@ -48,7 +52,48 @@ ceiCrawler.login(); // Login é opcional, pois antes de cada método o cei-crawl // A vantagem em realizar o login em um passo diferente é para o tratamento de erros ``` -### Métodos disponíveis +## Login & Captcha +A nova área logada do CEI possui validação por captcha. Não há forma simples de resolver e por isso algumas estratégias de resolução são implementadas. +Essas estratégias são setadas na instanciação do crawler, com o objeto de `options`. As disponíveis são: + +#### `raw-token` +Nessa estratégia de login, não é necessário informar usuário e senha porém deve-se informar o `token` e o `cache-guid` do usuário logado. +Essa estratégia é útil caso você possua algum serviço terceiro que faça o login no CEI e pegue o token pra você. + +Exemplo: +```javascript +const ceiCrawler = new CeiCrawler(_, _, { + loginOptions: { + strategy: 'raw-token' + }, + auth: { + "cache-guid": "cache-guid do usuário logado", + token: "JWT do usuário logado" + } +}); + +const values = await ceiCrawler.getConsolidatedValues(); +``` + +#### `user-resolve` +Nessa estratégia de login, o usuário será promptado para fazer o login ele mesmo em uma janela de browser que será aberta. +O crawler tenta preencher usuário e senha para você de forma que o input manual é somente para resolução do Captcha. +Uma vez feito o login, o crawler trata de pegar as credencias e seguir adiante chamando os métodos. +Nas opções do login deve-se também ser informado um caminho do browser para que o puppeteer o controle. + +Exemplo: +```javascript +const ceiCrawler = new CeiCrawler('user', 'password', { + loginOptions: { + strategy: 'user-resolve', + browserPath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe' + } +}); + +const values = await ceiCrawler.getConsolidatedValues(); +``` + +## Métodos disponíveis #### `getConsolidatedValues()` Retorna os investimentos consolidados num valor total e divididos em subcategorias @@ -74,12 +119,12 @@ Resultado: } ``` -#### `getPosition(_date_, _page_)` +#### `getPosition(date, page)` Retorna as posições da tela "Posição" em todas as categorias de investimentos. | Parâmetro | Tipo | Default | Descrição | |------------|--------|---------|--------------------------------------------------------------------------------------------------------------| -| **_date_** | Date | _null_ | Data da posição. Caso seja passado _null_ ou nenhum valor, será usada a ultima data de processamento do CEI. | +| **date**| Dte | _null_ | Data da posição. Caso seja passado _null_ ou nenhum valor, será usada a ultima data de processamento do CEI. | | **_page_** | Number | 1 | Paginação dos dados. Por default retorna a primeira página. | ```javascript @@ -157,7 +202,7 @@ Resultado: } ``` -#### `getPositionDetail(_id_, _category_, _type_)` +#### `getPositionDetail(id, category, type)` Retorna o detalhe de uma posição da lista anterior. | Parâmetro | Tipo | Default | Descrição | @@ -187,7 +232,7 @@ Resultado: } ``` -#### `getAccountStatement(_startDate_, _endDate_, _page_)` +#### `getAccountStatement(startDate, endDate, page)` Retorna as movimentações da aba "Movimentação" no CEI. | Parâmetro | Tipo | Default | Descrição | @@ -246,7 +291,7 @@ Resultado: } ``` -#### `getIpos(_date_, _page_)` +#### `getIpos(date, page)` Retorna os IPOs da tela "Ofertas Públicas" no CEI. | Parâmetro | Tipo | Default | Descrição | @@ -286,7 +331,7 @@ Resultado: } ``` -#### `getIPODetail(_id_)` +#### `getIPODetail(id)` Retorna o detalhe de uma posição da lista anterior. | Parâmetro | Tipo | Default | Descrição | @@ -322,7 +367,7 @@ Resultado: } ``` -#### `getStockTransactions(_startDate_, _endDate_, _page_)` +#### `getStockTransactions(startDate, endDate, page)` Retorna os dados da aba "Negociação" no CEI. | Parâmetro | Tipo | Default | Descrição | @@ -363,7 +408,7 @@ Resultado: } ``` -#### `getProvisionedEvents(_date_, _page_)` +#### `getProvisionedEvents(date, page)` Retorna os eventos da tela "Eventos Provisionados" no CEI. | Parâmetro | Tipo | Default | Descrição | @@ -399,7 +444,7 @@ Resultado: } ``` -#### `getProvisionedEventDetail(_id_)` +#### `getProvisionedEventDetail(id)` Retorna o detalhe de um evento provisionado da lista anterior. | Parâmetro | Tipo | Default | Descrição | diff --git a/package.json b/package.json index 1ed23e3..eda9d16 100644 --- a/package.json +++ b/package.json @@ -24,13 +24,8 @@ "license": "MIT", "dependencies": { "@babel/runtime": "^7.9.6", - "abort-controller": "^3.0.0", "axios": "^0.21.1", - "cheerio": "^1.0.0-rc.3", - "node-fetch": "^2.6.1", - "normalize-html-whitespace": "^1.0.0", - "puppeteer-core": "^10.1.0", - "tough-cookie": "^4.0.0" + "puppeteer-core": "^10.1.0" }, "devDependencies": { "@babel/core": "^7.9.0", diff --git a/src/lib/CeiLoginService.js b/src/lib/CeiLoginService.js index 1f9152e..52defca 100644 --- a/src/lib/CeiLoginService.js +++ b/src/lib/CeiLoginService.js @@ -34,7 +34,7 @@ class CeiLoginService { async _getTokenByUserResolve() { const browser = await puppeteer.launch({ headless: false, - executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe', + executablePath: this._options.loginOptions.browserPath, args: ['--start-maximized'] }); const homePage = await browser.newPage(); From 93975c56c5c44b52f593f759a9ccbe4eb8ecc067 Mon Sep 17 00:00:00 2001 From: Menighin Date: Fri, 6 Aug 2021 22:04:04 -0300 Subject: [PATCH 15/17] Documentation... Done I guess --- README.md | 60 +++++++++++++++++-------------------------- src/lib/CeiCrawler.js | 1 - src/lib/typedefs.js | 4 --- 3 files changed, 24 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 358de25..b7c1ad6 100644 --- a/README.md +++ b/README.md @@ -483,22 +483,23 @@ Resultado: ## Opções Na criação de um `CeiCrawler` é possivel especificar alguns valores para o parâmetro `options` que modificam a forma que o crawler funciona. As opções são: -| Propriedade | Tipo | Default | Descrição | -|-----------------------|-----------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **capDates** | _Boolean_ | _false_ | Se `true`, as datas utilizadas de input para buscas serão limitadas ao range de datas válidas do CEI, impedindo que ocorra um erro caso o usuário passe uma data maior ou menor. | -| **navigationTimeout** | _Number_ | 30000 | Tempo, em ms, que o crawler espera por uma ação antes de considerar timeout. | -| **timeout** | _Number_ | 180000 | Tempo, em ms, que o crawler espera para realizar login antes de considerar timeout. Diversas vezes, como a noite e aos fins de semana, o sistema do CEI parece ficar muito instavél e causa diversos timeouts no login. | -| **trace** | _Boolean_ | _false_ | Printa mensagens de debug no log. Útil para desenvolvimento. | +| Propriedade | Tipo | Default | Descrição | +|---------------------------|-----------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **debug** | _Boolean_ | _false_ | Se `true`, printa mensages de debug no log. | +| **loginOptions.strategy** | _String_ | `user-resolve` | Estratégia utilizada no login. Veja [Login & Captcha](https://github.com/Menighin/cei-crawler/tree/v3#login--captcha) para mais informações. | +| **login.browserPath** | _String_ | `undefined` | Caminho para o executavél do browser que será controlado para resolucao do Captcha. Veja [Login & Captcha](https://github.com/Menighin/cei-crawler/tree/v3#login--captcha) para mais informações. | +| **auth.token** | _String_ | `undefined` | Token JWT do usuário logado no CEI. Utilizado quando a estratégia de login é `raw-token` | +| **auth.cache-guid** | _String_ | `undefined` | UUID da sessão do usuário logado no CEI. Utilizado quando a estratégia de login é `raw-token` | Exemplo: ```javascript const ceiCrawlerOptions = { - trace: false, - capEndDate: true, - navigationTimeout: 60000, - timeout: 240000, - + debug: true, + loginOptions: { + strategy: 'user-resolve', + browserPath: 'path/to/browser.exe' + } }; let ceiCrawler = new CeiCrawler('username', 'password', ceiCrawlerOptions); @@ -507,13 +508,12 @@ let ceiCrawler = new CeiCrawler('username', 'password', ceiCrawlerOptions); ## Error Handling O CEI Crawler possui um exceção própria, `CeiCrawlerError`, que é lançada em alguns cenários. Essa exceção possui um atributo `type` para te direcionar no tratamento: -| type | Descrição | -|----------------|---------------------------------------------------------------------------------------------------------------------------| -| LOGIN_FAILED | Lançada quando o login falha por timeout ou por CPF errado digitado | -| WRONG_PASSWORD | Lançada quando a senha passada está errada | -| SUBMIT_ERROR | Lançada quando acontece um erro ao submeter um formulario de pesquisa em alguma página do CEI. Por exemplo: data inválida | -| SESSION_HAS_EXPIRED | Lançada quando a sessão do usuário expira, nesse caso é necessário realizar o login novamente `ceiCrawler.login()` | -| NAVIGATION_TIMEOUT | Lançada quando a requisição estoura o tempo limite definida na opção `navigationTimeout` | +| type | Descrição | +|------------------------|----------------------------------------------------------------------------------------------------------------------------------| +| UNAUTHORIZED | Lançada quando uma request retorna 401. Isso pode significar que o token utiliza é inválido ou expirou. | +| BAD_REQUEST | Lançada quando uma requisição falha por má formação. Pode ser um parâmetro errado, uma data menor que o limite minimo, etc. | +| TOO_MANY_REQUESTS | O CEI faz throttling de requisições. Se ao usar o crawler você fizer muitas requisições rapidamente esse erro pode ser retornado | +| INVALID_LOGIN_STRATEGY | Lançada quando informada uma estratégia de login inválida. | Exemplo de como fazer um bom tratamento de erros: @@ -525,31 +525,19 @@ const { CeiErrorTypes } = require('cei-crawler') const ceiCrawler = new CeiCrawler('usuario', 'senha', { navigationTimeout: 20000 }); try { - const wallet = ceiCrawler.getWallet(); + const positions = ceiCrawler.getPositions(); } catch (err) { if (err.name === 'CeiCrawlerError') { - if (err.type === CeiErrorTypes.LOGIN_FAILED) - // Handle login failed - else if (err.type === CeiErrorTypes.WRONG_PASSWORD) - // Handle wrong password - else if (err.type === CeiErrorTypes.SUBMIT_ERROR) - // Handle submit error - else if (err.type === CeiErrorTypes.SESSION_HAS_EXPIRED) - // Handle session expired - else if (err.type === CeiErrorTypes.NAVIGATION_TIMEOUT) - // Handle request timeout + if (err.type === CeiErrorTypes.UNAUTHORIZED) + // Handle unauthrozied + else if (err.type === CeiErrorTypes.TOO_MANY_REQUESTS) + // Handle too many requests + // else ... } else { // Handle generic errors } } ``` -## Features -- [x] Histórico de ações -- [x] Dividendos -- [x] Carteira de ações -- [x] Tesouro Direto (Resumido) -- [x] Tesouro Direto (Analítico) - ## Licença MIT diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index 2f2a157..b02ccd6 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -49,7 +49,6 @@ class CeiCrawler { if (!this.options.debug) this.options.debug = false; if (!this.options.navigationTimeout) this.options.navigationTimeout = 30000; if (!this.options.loginOptions) this.options.loginOptions = {}; - if (!this.options.loginOptions.timeout) this.options.loginOptions.timeout = 150000; if (!this.options.loginOptions.strategy) this.options.loginOptions.strategy = 'user-resolve'; } diff --git a/src/lib/typedefs.js b/src/lib/typedefs.js index d90b39a..e327297 100644 --- a/src/lib/typedefs.js +++ b/src/lib/typedefs.js @@ -12,7 +12,6 @@ /** * @typedef LoginOptions * @property {String} strategy - The strategy the crawler will use to make the login. Options are: `user-resolve`, `raw-token` - * @property {Number} timeout - Login timeout * @property {String} browserPath - Path of the browser to run puppeteer */ @@ -25,12 +24,9 @@ /** * @typedef CeiCrawlerOptions * @property {boolean} debug - Indicates if it should print debug messages. Helpful for debugging. - * @property {boolean} capDates - Prevent crawling with an invalid date in CEI - * @property {Number} navigationTimeout - Fetch timeout * @property {LoginOptions} loginOptions - The strategy the crawler will use to make the login. Options are: `user-input` * @property {CeiAuth} auth - Auth logged info * @property {LastExecutionInfo} lastExecutionInfo - CEI info about the last execution - * @memberof typdefs */ /** From c51ee9ec7c109bb373a6530c2adc34059091db97 Mon Sep 17 00:00:00 2001 From: Menighin Date: Sun, 8 Aug 2021 01:26:58 -0300 Subject: [PATCH 16/17] Testing --- README.md | 14 +-- src/lib/CeiCrawler.js | 25 ++-- src/lib/CeiCrawlerError.js | 5 - src/lib/CeiLoginService.js | 2 +- src/lib/PositionCrawler.js | 3 +- test/app.test.js | 252 +++++++++++++++++++------------------ 6 files changed, 154 insertions(+), 147 deletions(-) diff --git a/README.md b/README.md index b7c1ad6..5258b7a 100644 --- a/README.md +++ b/README.md @@ -483,13 +483,13 @@ Resultado: ## Opções Na criação de um `CeiCrawler` é possivel especificar alguns valores para o parâmetro `options` que modificam a forma que o crawler funciona. As opções são: -| Propriedade | Tipo | Default | Descrição | -|---------------------------|-----------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **debug** | _Boolean_ | _false_ | Se `true`, printa mensages de debug no log. | -| **loginOptions.strategy** | _String_ | `user-resolve` | Estratégia utilizada no login. Veja [Login & Captcha](https://github.com/Menighin/cei-crawler/tree/v3#login--captcha) para mais informações. | -| **login.browserPath** | _String_ | `undefined` | Caminho para o executavél do browser que será controlado para resolucao do Captcha. Veja [Login & Captcha](https://github.com/Menighin/cei-crawler/tree/v3#login--captcha) para mais informações. | -| **auth.token** | _String_ | `undefined` | Token JWT do usuário logado no CEI. Utilizado quando a estratégia de login é `raw-token` | -| **auth.cache-guid** | _String_ | `undefined` | UUID da sessão do usuário logado no CEI. Utilizado quando a estratégia de login é `raw-token` | +| Propriedade | Tipo | Default | Descrição | +|-----------------------------|-----------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **debug** | _Boolean_ | _false_ | Se `true`, printa mensages de debug no log. | +| **loginOptions.strategy** | _String_ | `user-resolve` | Estratégia utilizada no login. Veja [Login & Captcha](https://github.com/Menighin/cei-crawler/tree/v3#login--captcha) para mais informações. | +| **loginOptions.browserPath**| _String_ | `undefined` | Caminho para o executavél do browser que será controlado para resolucao do Captcha. Veja [Login & Captcha](https://github.com/Menighin/cei-crawler/tree/v3#login--captcha) para mais informações. | +| **auth.token** | _String_ | `undefined` | Token JWT do usuário logado no CEI. Utilizado quando a estratégia de login é `raw-token` | +| **auth.cache-guid** | _String_ | `undefined` | UUID da sessão do usuário logado no CEI. Utilizado quando a estratégia de login é `raw-token` | Exemplo: diff --git a/src/lib/CeiCrawler.js b/src/lib/CeiCrawler.js index b02ccd6..3848650 100644 --- a/src/lib/CeiCrawler.js +++ b/src/lib/CeiCrawler.js @@ -86,7 +86,6 @@ class CeiCrawler { */ async getPosition(date = null, page = 1) { await this._login(); - return await PositionCrawler.getPosition(date, page, this.options); } @@ -115,18 +114,6 @@ class CeiCrawler { return await AccountStatementCrawler.getAccountStatement(startDate, endDate, page, this.options); } - /** - * Crawls the tab "Negociacao" - * @param {Date} [startDate] - The start date to filter - * @param {Date} [endDate] - The end date to filter - * @param {Number} [page=1] - The page of the data - * @returns {Promise} - Stock transactions - */ - async getStockTransactions(startDate = null, endDate = null, page = 1) { - await this._login(); - return await StockTransactionsCrawler.getStockTransactions(startDate, endDate, page, this.options); - } - /** * Crawls the tab "Ofertas Públicas" * @param {Date} [date] - The date for the provisioned events @@ -148,6 +135,18 @@ class CeiCrawler { return await IpoCrawler.getIPODetail(id, this.options); } + /** + * Crawls the tab "Negociacao" + * @param {Date} [startDate] - The start date to filter + * @param {Date} [endDate] - The end date to filter + * @param {Number} [page=1] - The page of the data + * @returns {Promise} - Stock transactions + */ + async getStockTransactions(startDate = null, endDate = null, page = 1) { + await this._login(); + return await StockTransactionsCrawler.getStockTransactions(startDate, endDate, page, this.options); + } + /** * Crawls the tab "Eventos provisionados" * @param {Date} [date] - The date for the provisioned events diff --git a/src/lib/CeiCrawlerError.js b/src/lib/CeiCrawlerError.js index ea177d6..faf37b0 100644 --- a/src/lib/CeiCrawlerError.js +++ b/src/lib/CeiCrawlerError.js @@ -8,11 +8,6 @@ class CeiCrawlerError extends Error { } const CeiErrorTypes = Object.freeze({ - LOGIN_FAILED: 'LOGIN_FAILED', - WRONG_PASSWORD: 'WRONG_PASSWORD', - SUBMIT_ERROR: 'SUBMIT_ERROR', - SESSION_HAS_EXPIRED: 'SESSION_HAS_EXPIRED', - NAVIGATION_TIMEOUT: 'NAVIGATION_TIMEOUT', INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY', BAD_REQUEST: 'BAD_REQUEST', UNAUTHORIZED: 'UNAUTHORIZED', diff --git a/src/lib/CeiLoginService.js b/src/lib/CeiLoginService.js index 52defca..40c216c 100644 --- a/src/lib/CeiLoginService.js +++ b/src/lib/CeiLoginService.js @@ -21,7 +21,7 @@ class CeiLoginService { } async getToken() { - switch(this._options.loginOptions.strategy) { + switch (this._options.loginOptions.strategy) { case 'user-resolve': return await this._getTokenByUserResolve(); case 'raw-token': diff --git a/src/lib/PositionCrawler.js b/src/lib/PositionCrawler.js index f12fba4..be110ec 100644 --- a/src/lib/PositionCrawler.js +++ b/src/lib/PositionCrawler.js @@ -45,6 +45,7 @@ class PositionCrawler { * @returns {Any} */ static async getPositionDetail(id, category, type, options = {}) { + if (options.debug) console.log(`[PositionCrawler] Crawling wallet position detail for ${id} (${category}, ${type})`); @@ -57,7 +58,7 @@ class PositionCrawler { // Try to get the detail with type try { return await AxiosWrapper.request(URLS.DETAIL_1, { - peathParams: pathParams + pathParams: pathParams }); } catch (e) { if (e.type === CeiErrorTypes.TOO_MANY_REQUESTS) diff --git a/test/app.test.js b/test/app.test.js index 977e43c..608f68f 100644 --- a/test/app.test.js +++ b/test/app.test.js @@ -1,5 +1,6 @@ const test = require('ava') const CeiCrawler = require('../src/app') +const CeiUtils = require('./../src/lib/CeiUtils'); const { CeiErrorTypes } = require('../src/lib/CeiCrawlerError'); const dotenv = require('dotenv'); @@ -7,136 +8,147 @@ const dotenv = require('dotenv'); dotenv.config(); test.before(t => { - if (!process.env.CEI_USERNAME || !process.env.CEI_PASSWORD) { - throw Error('You should set environment variables CEI_USERNAME and CEI_PASSWORD in order to run tests'); + if (!process.env.TOKEN || !process.env.GUID) { + throw Error('You should set environment variables TOKEN and GUID in order to run tests'); } - t.context.ceiCrawler = new CeiCrawler(process.env.CEI_USERNAME, process.env.CEI_PASSWORD, { navigationTimeout: 60000 }); - t.context.ceiCrawlerCap = new CeiCrawler(process.env.CEI_USERNAME, process.env.CEI_PASSWORD, { capDates: true, navigationTimeout: 60000 }); - t.context.emptyOptionsCeiCrawler = new CeiCrawler(process.env.CEI_USERNAME, process.env.CEI_PASSWORD, { navigationTimeout: 60000 }); - t.context.wrongPasswordCeiCrawler = new CeiCrawler(process.env.CEI_USERNAME, process.env.CEI_PASSWORD + 'wrong', { navigationTimeout: 60000 }); - t.context.ceiCrawlerTimeout = new CeiCrawler(process.env.CEI_USERNAME, process.env.CEI_PASSWORD, { navigationTimeout: 1 }); -}); - -test.serial('login', async t => { - await t.context.ceiCrawler.login(); - t.is(t.context.ceiCrawler._isLogged, true); -}); - -test.serial('stock-history', async t => { - const result = await t.context.ceiCrawler.getStockHistory(); - t.true(result.length > 0); - - let hasAnyStock = false; - for (const r of result) { - if (r.stockHistory.length > 0) { - hasAnyStock = true; - break; - } - } - t.true(hasAnyStock); -}); - -test.serial('stock-history-empty', async t => { - const saturday = new Date(2020, 0, 4); - const sunday = new Date(2020, 0, 5); - const result = await t.context.ceiCrawler.getStockHistory(saturday, sunday); - t.true(result.length > 0); - - let hasAnyStock = false; - for (const r of result) { - if (r.stockHistory.length > 0) { - hasAnyStock = true; - break; + t.context.ceiCrawler = new CeiCrawler('', '', { + debug: true, + loginOptions: { + strategy: 'raw-token' + }, + auth: { + "cache-guid": process.env.GUID, + token: process.env.TOKEN } - } - t.false(hasAnyStock); -}); - -test.serial('invalid-dates', async t => { - const errorGetStockHistory = await t.throwsAsync(async () => t.context.ceiCrawler.getStockHistory(new Date(0), new Date(10000))); - const errorGetDividends = await t.throwsAsync(async () => t.context.ceiCrawler.getDividends(new Date(0))); - const errorGetWallet = await t.throwsAsync(async () => t.context.ceiCrawler.getWallet(new Date(0))); - const errorGetTreasure = await t.throwsAsync(async () => t.context.ceiCrawler.getTreasures(new Date(0))); - - t.true(errorGetStockHistory.type === CeiErrorTypes.SUBMIT_ERROR); - t.true(errorGetDividends.type === CeiErrorTypes.SUBMIT_ERROR); - t.true(errorGetWallet.type === CeiErrorTypes.SUBMIT_ERROR); - t.true(errorGetTreasure.type === CeiErrorTypes.SUBMIT_ERROR); -}); - -test.serial('stock-history-invalid-dates-with-cap-on', async t => { - const result = await t.context.ceiCrawlerCap.getStockHistory(new Date(0), new Date(10000)); - t.true(result.length > 0); -}); - -test.serial('dividends', async t => { - const nextWeek = new Date(new Date().getTime() + 1000 * 60 * 60 * 24 * 7); - const result = await t.context.ceiCrawlerCap.getDividends(nextWeek); - t.true(result.length > 0); -}); - -test.serial('wallet', async t => { - const nextWeek = new Date(new Date().getTime() + 1000 * 60 * 60 * 24 * 7); - const result = await t.context.ceiCrawlerCap.getWallet(nextWeek); - t.true(result.length > 0); -}); - -test.serial('treasure', async t => { - const nextWeek = new Date(new Date().getTime() + 1000 * 60 * 60 * 24 * 7); - const result = await t.context.ceiCrawlerCap.getTreasures(nextWeek); - t.true(result.length > 0); -}); - -test.serial('stock-history-options', async t => { - const result = await t.context.ceiCrawlerCap.getStockHistoryOptions(); - t.true(result.minDate.length > 0); -}); - -test.serial('wallet-options', async t => { - const result = await t.context.ceiCrawlerCap.getWalletOptions(); - t.true(result.minDate.length > 0); + }); }); -test.serial('dividends-options', async t => { - const result = await t.context.ceiCrawlerCap.getDividendsOptions(); - t.true(result.minDate.length > 0); -}); +// test.serial('consolidated-values', async t => { +// const consolidatedValues= await t.context.ceiCrawler.getConsolidatedValues(); +// t.true(consolidatedValues.total > 0); +// }); + +// test.serial('get-position', async t => { +// const positions = await t.context.ceiCrawler.getPosition(); +// t.true(positions.paginaAtual === 1); + +// const position = { +// category: positions.itens[0].categoriaProduto, +// type: positions.itens[0].tipoProduto, +// id: positions.itens[0].posicoes[0].id +// }; +// const positionDetail = await t.context.ceiCrawler.getPositionDetail(position.id, position.category, position.type); +// t.true(positionDetail.quantidade > 0); +// }); + +// test.serial('account-statement', async t => { +// const statement = await t.context.ceiCrawler.getAccountStatement(); +// t.true(statement.paginaAtual === 1); +// }); + +// test.serial('ipo-and-ipo-detail', async t => { +// const ipo = await t.context.ceiCrawler.getIPOs(); +// t.true(ipo.paginaAtual === 1); + +// const id = ipo.itens[0].ofertasPublicas[0].id; +// const ipoDetail = await t.context.ceiCrawler.getIPODetail(id); +// t.true(ipoDetail !== undefined); +// }); + +// test.serial('get-stock-transactions', async t => { +// const stockTransactions = await t.context.ceiCrawler.getStockTransactions(); +// t.true(stockTransactions.paginaAtual === 1); +// }); + +// test.serial('provisioned-events-detail', async t => { +// const provisionedEvents = await t.context.ceiCrawler.getProvisionedEvents(); +// t.true(provisionedEvents.paginaAtual === 1); + +// const id = provisionedEvents.itens[0].id; +// const eventDetail = await t.context.ceiCrawler.getProvisionedEventDetail(id); +// t.true(eventDetail !== undefined); +// }); + +// test.serial('invalid-strategy', async t => { +// await t.throwsAsync(async () => { +// const crawler = new CeiCrawler('', '', { +// loginOptions: { +// strategy: 'invalid-strategy' +// } +// }); +// await crawler.login(); +// }); +// }); + +// test.serial('invalid-browser-path', async t => { +// await t.throwsAsync(async () => { +// const crawler = new CeiCrawler('', '', { +// loginOptions: { +// strategy: 'user-resolve', +// browserPath: 'C:/invalid/path' +// } +// }); +// await crawler.login(); +// }); +// }); + +// test.serial('invalid-token', async t => { +// await t.throwsAsync(async () => { +// const crawler = new CeiCrawler('', '', { +// loginOptions: { +// strategy: 'raw-token', +// }, +// auth: { +// "cache-guid": "invalid", +// "token": "invalid" +// } +// }); +// await crawler.login(); +// }); +// }); + +// test.serial('invalid-position-call', async t => { +// await t.throwsAsync(async () => { +// const crawler = new CeiCrawler('', '', { +// loginOptions: { +// strategy: 'raw-token', +// }, +// auth: { +// "cache-guid": "invalid", +// "token": "invalid" +// }, +// debug: true +// }); +// crawler._isLogged = true; +// await crawler.getPositionDetail('id-91', 'cat', 'type'); +// }); +// }); + +test.serial('cei-utils', async t => { + let n = 0; + const callback = () => { + if (n++ < 2) + throw new Error('test'); + }; + + await t.notThrowsAsync(async () => { + await CeiUtils.retry(callback); + }); -test.serial('treasure-options', async t => { - const result = await t.context.ceiCrawlerCap.getTreasureOptions(); - t.true(result.institutions.length > 0); -}); + n = 0; -test.serial('login-fail', async t => { - const error = await t.throwsAsync(async () => { - const wrongCeiCrawler = new CeiCrawler('1234', 'invalidPassword'); - await wrongCeiCrawler.login(); + await t.throwsAsync(async () => { + await CeiUtils.retry(callback, 1); }); - t.true(error.type === CeiErrorTypes.LOGIN_FAILED); -}); -test.serial('wrong-password', async t => { - const error = await t.throwsAsync(async () => { - await t.context.wrongPasswordCeiCrawler.login(); - }); - t.true(error.type === CeiErrorTypes.WRONG_PASSWORD); -}); + t.true(CeiUtils.kebabize('WillKebabThis') === 'will-kebab-this'); -test.serial('request-timeout', async t => { - const error = await t.throwsAsync(async () => { - await t.context.ceiCrawlerTimeout.login(); - }); - t.true(error.type === CeiErrorTypes.NAVIGATION_TIMEOUT); -}); + const date = new Date(2021, 7, 8); + const dateLastMonth = CeiUtils.subtractMonth(date); + t.true(dateLastMonth.getMonth() === date.getMonth() - 1 && dateLastMonth.getDate() === date.getDate()); + + t.true(CeiUtils.getDateForQueryParam(date) === '2021-08-08'); -test.serial('ipo-operations', async t => { - const lastThreeDays = new Date(new Date().getTime() - 1000 * 60 * 60 * 24 * 3); - const result = await t.context.ceiCrawlerCap.getIPOTransactions(lastThreeDays); - t.true(result.length > 0); -}); -test.serial('ipo-options', async t => { - const result = await t.context.ceiCrawlerCap.getIPOOptions(); - t.true(result.minDate.length > 0); }); \ No newline at end of file From 5897eb0a0aa7a8ebd4da2f3e285859c4b36a78b0 Mon Sep 17 00:00:00 2001 From: Menighin Date: Sat, 14 Aug 2021 14:03:57 -0300 Subject: [PATCH 17/17] Release v3 --- README.md | 2 +- package.json | 2 +- src/lib/CeiCrawlerError.js | 2 + src/lib/CeiLoginService.js | 2 + test/app.test.js | 197 ++++++++++++++++++------------------- 5 files changed, 104 insertions(+), 101 deletions(-) diff --git a/README.md b/README.md index 5258b7a..e48da27 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Crawler para ler dados do Canal Eletrônico do Investidor -## __Importante__ +## __Importante__ (Versão sem captcha) Para versão antiga do CEI que não possui captcha obrigatório (por enquanto), utilize o [cei-crawler v2](https://github.com/Menighin/cei-crawler/tree/v2) ## Descrição diff --git a/package.json b/package.json index eda9d16..09e5c87 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cei-crawler", - "version": "2.5.0", + "version": "3.0.0", "description": "Crawler para pegar dados do Canal Eletronico do Investidor", "main": "src/app.js", "repository": { diff --git a/src/lib/CeiCrawlerError.js b/src/lib/CeiCrawlerError.js index faf37b0..36294f3 100644 --- a/src/lib/CeiCrawlerError.js +++ b/src/lib/CeiCrawlerError.js @@ -1,3 +1,4 @@ +/* istanbul ignore next */ class CeiCrawlerError extends Error { constructor(type, message, status = null) { super(message); @@ -7,6 +8,7 @@ class CeiCrawlerError extends Error { } } +/* istanbul ignore next */ const CeiErrorTypes = Object.freeze({ INVALID_LOGIN_STRATEGY: 'INVALID_LOGIN_STRATEGY', BAD_REQUEST: 'BAD_REQUEST', diff --git a/src/lib/CeiLoginService.js b/src/lib/CeiLoginService.js index 40c216c..3bc14f0 100644 --- a/src/lib/CeiLoginService.js +++ b/src/lib/CeiLoginService.js @@ -31,6 +31,7 @@ class CeiLoginService { } } + /* istanbul ignore next */ async _getTokenByUserResolve() { const browser = await puppeteer.launch({ headless: false, @@ -89,6 +90,7 @@ class CeiLoginService { * @param {puppeteer.Browser} browser Puppeteer browser running * @returns {puppeteer.Page} The main page to keep crawling */ + /* istanbul ignore next */ async _getMainPage(browser) { while (true) { for (const p of (await browser.pages())) { diff --git a/test/app.test.js b/test/app.test.js index 608f68f..ba9ead4 100644 --- a/test/app.test.js +++ b/test/app.test.js @@ -1,7 +1,6 @@ const test = require('ava') const CeiCrawler = require('../src/app') const CeiUtils = require('./../src/lib/CeiUtils'); -const { CeiErrorTypes } = require('../src/lib/CeiCrawlerError'); const dotenv = require('dotenv'); @@ -24,106 +23,106 @@ test.before(t => { }); }); -// test.serial('consolidated-values', async t => { -// const consolidatedValues= await t.context.ceiCrawler.getConsolidatedValues(); -// t.true(consolidatedValues.total > 0); -// }); - -// test.serial('get-position', async t => { -// const positions = await t.context.ceiCrawler.getPosition(); -// t.true(positions.paginaAtual === 1); - -// const position = { -// category: positions.itens[0].categoriaProduto, -// type: positions.itens[0].tipoProduto, -// id: positions.itens[0].posicoes[0].id -// }; -// const positionDetail = await t.context.ceiCrawler.getPositionDetail(position.id, position.category, position.type); -// t.true(positionDetail.quantidade > 0); -// }); - -// test.serial('account-statement', async t => { -// const statement = await t.context.ceiCrawler.getAccountStatement(); -// t.true(statement.paginaAtual === 1); -// }); - -// test.serial('ipo-and-ipo-detail', async t => { -// const ipo = await t.context.ceiCrawler.getIPOs(); -// t.true(ipo.paginaAtual === 1); +test.serial('consolidated-values', async t => { + const consolidatedValues= await t.context.ceiCrawler.getConsolidatedValues(); + t.true(consolidatedValues.total > 0); +}); + +test.serial('get-position', async t => { + const positions = await t.context.ceiCrawler.getPosition(); + t.true(positions.paginaAtual === 1); + + const position = { + category: positions.itens[0].categoriaProduto, + type: positions.itens[0].tipoProduto, + id: positions.itens[0].posicoes[0].id + }; + const positionDetail = await t.context.ceiCrawler.getPositionDetail(position.id, position.category, position.type); + t.true(positionDetail.quantidade > 0); +}); + +test.serial('account-statement', async t => { + const statement = await t.context.ceiCrawler.getAccountStatement(); + t.true(statement.paginaAtual === 1); +}); + +test.serial('ipo-and-ipo-detail', async t => { + const ipo = await t.context.ceiCrawler.getIPOs(); + t.true(ipo.paginaAtual === 1); -// const id = ipo.itens[0].ofertasPublicas[0].id; -// const ipoDetail = await t.context.ceiCrawler.getIPODetail(id); -// t.true(ipoDetail !== undefined); -// }); - -// test.serial('get-stock-transactions', async t => { -// const stockTransactions = await t.context.ceiCrawler.getStockTransactions(); -// t.true(stockTransactions.paginaAtual === 1); -// }); - -// test.serial('provisioned-events-detail', async t => { -// const provisionedEvents = await t.context.ceiCrawler.getProvisionedEvents(); -// t.true(provisionedEvents.paginaAtual === 1); + const id = ipo.itens[0].ofertasPublicas[0].id; + const ipoDetail = await t.context.ceiCrawler.getIPODetail(id); + t.true(ipoDetail !== undefined); +}); + +test.serial('get-stock-transactions', async t => { + const stockTransactions = await t.context.ceiCrawler.getStockTransactions(); + t.true(stockTransactions.paginaAtual === 1); +}); + +test.serial('provisioned-events-detail', async t => { + const provisionedEvents = await t.context.ceiCrawler.getProvisionedEvents(); + t.true(provisionedEvents.paginaAtual === 1); -// const id = provisionedEvents.itens[0].id; -// const eventDetail = await t.context.ceiCrawler.getProvisionedEventDetail(id); -// t.true(eventDetail !== undefined); -// }); - -// test.serial('invalid-strategy', async t => { -// await t.throwsAsync(async () => { -// const crawler = new CeiCrawler('', '', { -// loginOptions: { -// strategy: 'invalid-strategy' -// } -// }); -// await crawler.login(); -// }); -// }); - -// test.serial('invalid-browser-path', async t => { -// await t.throwsAsync(async () => { -// const crawler = new CeiCrawler('', '', { -// loginOptions: { -// strategy: 'user-resolve', -// browserPath: 'C:/invalid/path' -// } -// }); -// await crawler.login(); -// }); -// }); - -// test.serial('invalid-token', async t => { -// await t.throwsAsync(async () => { -// const crawler = new CeiCrawler('', '', { -// loginOptions: { -// strategy: 'raw-token', -// }, -// auth: { -// "cache-guid": "invalid", -// "token": "invalid" -// } -// }); -// await crawler.login(); -// }); -// }); - -// test.serial('invalid-position-call', async t => { -// await t.throwsAsync(async () => { -// const crawler = new CeiCrawler('', '', { -// loginOptions: { -// strategy: 'raw-token', -// }, -// auth: { -// "cache-guid": "invalid", -// "token": "invalid" -// }, -// debug: true -// }); -// crawler._isLogged = true; -// await crawler.getPositionDetail('id-91', 'cat', 'type'); -// }); -// }); + const id = provisionedEvents.itens[0].id; + const eventDetail = await t.context.ceiCrawler.getProvisionedEventDetail(id); + t.true(eventDetail !== undefined); +}); + +test.serial('invalid-strategy', async t => { + await t.throwsAsync(async () => { + const crawler = new CeiCrawler('', '', { + loginOptions: { + strategy: 'invalid-strategy' + } + }); + await crawler.login(); + }); +}); + +test.serial('invalid-browser-path', async t => { + await t.throwsAsync(async () => { + const crawler = new CeiCrawler('', '', { + loginOptions: { + strategy: 'user-resolve', + browserPath: 'C:/invalid/path' + } + }); + await crawler.login(); + }); +}); + +test.serial('invalid-token', async t => { + await t.throwsAsync(async () => { + const crawler = new CeiCrawler('', '', { + loginOptions: { + strategy: 'raw-token', + }, + auth: { + "cache-guid": "invalid", + "token": "invalid" + } + }); + await crawler.login(); + }); +}); + +test.serial('invalid-position-call', async t => { + await t.throwsAsync(async () => { + const crawler = new CeiCrawler('', '', { + loginOptions: { + strategy: 'raw-token', + }, + auth: { + "cache-guid": "invalid", + "token": "invalid" + }, + debug: true + }); + crawler._isLogged = true; + await crawler.getPositionDetail('id-91', 'cat', 'type'); + }); +}); test.serial('cei-utils', async t => { let n = 0;