Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow to anonymize csv #333

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .dev.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
MONGO_DB_URI=mongodb://root:example@localhost
POSTGRES_DB_URI=postgresql://postgres:example@localhost/test
MARIADB_DB_URI=mariadb://example:[email protected]/test
NODE_ENV=development
NODE_ENV=development
DEBUG="dbzar,dbzar:*"
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ on:
branches:
- master
- beta
- "milestone-**"
pull_request:
branches:
- master
- beta
- "milestones-**"

jobs:
lint:
Expand Down
10 changes: 10 additions & 0 deletions jest-mongodb-config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module.exports = {
mongodbMemoryServerOptions: {
binary: {
version: '6.0.6',
skipMD5: true,
},
autoStart: false,
instance: {},
},
};
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"mongodb",
"mariadb",
"mysql",
"csv",
"mock",
"fake"
],
Expand Down Expand Up @@ -93,6 +94,7 @@
"mysql": "^2.18.1",
"node-emoji": "^2.1.0",
"ora": "5.4.1",
"papaparse": "^5.4.1",
"pg": "^8.7.1"
},
"packageManager": "[email protected]"
Expand Down
4 changes: 2 additions & 2 deletions src/api/anon-column.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {type Anonymizer} from '../anonymizers/types';
import {createAnonymizer} from '../anonymizers/utils/create-anonymizer';
import {getProcessor} from '../cli/commands/anon-col/helpers/get-processor';
import {type Processor} from '../processors/base-processor/processor';
import {getDatabaseProcessor} from '../processors/get-db-processor';
import {createLogger} from '../services/loggers/debug-logger';
import {type Provider} from '../types/types';

Expand Down Expand Up @@ -29,7 +29,7 @@ export async function anonColumn(
throw new Error('No uri in configuration');
}

const processor: Processor | undefined = getProcessor(uri);
const processor: Processor | undefined = getDatabaseProcessor(uri);

logger(`processing ${dbName}`);
if (processor) {
Expand Down
6 changes: 3 additions & 3 deletions src/api/anon-db.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {getProcessor} from '../cli/commands/anon-col/helpers/get-processor';
import {type Config} from '../config/types';
import {type Processor} from '../processors/base-processor/processor';
import {getCollections} from '../processors/utils/get-collections';
import {getCollections} from '../processors/databases/utils/get-collections';
import {getDatabaseProcessor} from '../processors/get-db-processor';
import {createLogger} from '../services/loggers/debug-logger';

const logger = createLogger(__filename);
Expand All @@ -20,7 +20,7 @@ export async function anonDb(config: Config): Promise<void> {
throw new Error('No uri in configuration');
}

const processor: Processor | undefined = getProcessor(uri);
const processor: Processor | undefined = getDatabaseProcessor(uri);

logger(`processing ${dbName}`);
if (processor) {
Expand Down
33 changes: 0 additions & 33 deletions src/cli/commands/anon-col/helpers/get-processor.ts

This file was deleted.

5 changes: 3 additions & 2 deletions src/cli/commands/anon-col/helpers/process-column.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import ora from 'ora';
import {type Anonymizer} from '../../../../anonymizers/types';
import {type Processor} from '../../../../processors/base-processor/processor';
import {getProcessor} from './get-processor';
import {getDatabaseProcessor} from '../../../../processors/get-db-processor';
import {isUserConfirmed} from './is-user-confirmed';

export async function processColumn(
Expand All @@ -12,7 +12,8 @@ export async function processColumn(
columnName: string,
checkConfirm = true,
) {
const processor: Processor | undefined = getProcessor(connectionString);
const processor: Processor | undefined =
getDatabaseProcessor(connectionString);

if (processor) {
// Check confirm
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import {type Command} from 'commander';
import {type Config} from '../../../config/types';
import {type Processor} from '../../../processors/base-processor/processor';
import {getCollections} from '../../../processors/utils/get-collections';
import {getCollections} from '../../../processors/databases/utils/get-collections';
import {getDatabaseProcessor} from '../../../processors/get-db-processor';
import {createLogger} from '../../../services/loggers/debug-logger';
import {getProcessor} from '../anon-col/helpers/get-processor';
import {processDb} from './process-db';
import {loadDbzarConfig} from './utils/load-dbzar-config';

Expand All @@ -25,7 +25,7 @@ export async function anonDbAction(this: Command) {

logger(`uri = ${uri}`);

const processor: Processor | undefined = getProcessor(uri);
const processor: Processor | undefined = getDatabaseProcessor(uri);
const collections = getCollections(config);

if (processor && collections) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {Command} from 'commander';
import {anonDbAction} from './anon-db-action';
import {anonDbAction} from './anon-db.action';

export const anonDbCommand = new Command('anon-db');

Expand Down
2 changes: 1 addition & 1 deletion src/cli/commands/anon-db/process-db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import ora from 'ora';
import {providerEmoji} from '../../../anonymizers/consts/provider-emoji';
import {providerVerb} from '../../../anonymizers/consts/provider-verb';
import {type Processor} from '../../../processors/base-processor/processor';
import {type Collection} from '../../../processors/types/collection';
import {type Collection} from '../../../processors/databases/types/collection';
import {type ProviderType} from '../../../types/types';

async function processCollection(
Expand Down
22 changes: 22 additions & 0 deletions src/cli/commands/anon-file/anon-file.action.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import {extname} from 'path';
import {type Command} from 'commander';
import {createLogger} from '../../../services/loggers/debug-logger';

const logger = createLogger(__filename);

export async function anonFileAction(this: Command) {
logger('processing file');

const [filePath] = this.args;
logger(`filePath = ${filePath}`);

if (filePath) {
const extension = extname(filePath)?.slice(1).toLowerCase();

if (extension === 'csv') {
logger('parsing csv file');
}
} else {
throw new Error('no file path');
}
}
9 changes: 9 additions & 0 deletions src/cli/commands/anon-file/anon-file.command.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import {Command} from 'commander';
import {anonFileAction} from './anon-file.action';

export const anonFileCommand = new Command('anon-file');

anonFileCommand
.description('Anonymize a single file')
.argument('[filePath]', 'file path')
.action(anonFileAction);
4 changes: 3 additions & 1 deletion src/cli/dbzar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import process from 'node:process';
import {Command} from 'commander';
import {anonColCommand} from './commands/anon-col/anon-col';
import {anonDbCommand} from './commands/anon-db/anon-db-command';
import {anonDbCommand} from './commands/anon-db/anon-db.command';
import {anonFileCommand} from './commands/anon-file/anon-file.command';

const program = new Command();

program.addCommand(anonColCommand);
program.addCommand(anonDbCommand);
program.addCommand(anonFileCommand);

program.parse(process.argv);
2 changes: 1 addition & 1 deletion src/config/types.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {type Provider, type ProviderType} from '../types/types';

export enum EngineType {
export enum DatabaseEngineType {
PostGres = 'postgres',
Mongo = 'mongo',
MariaDB = 'mariadb',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import knex, {type Knex} from 'knex';
import {type Anonymizer} from '../../anonymizers/types';
import {debugLogger} from '../../services/loggers/debug-logger';
import {BaseProcessor} from '../base-processor/base-processor';
import {type Processor} from '../base-processor/processor';
import {type Anonymizer} from '../../../anonymizers/types';
import {debugLogger} from '../../../services/loggers/debug-logger';
import {BaseProcessor} from '../../base-processor/base-processor';
import {type Processor} from '../../base-processor/processor';

const logger = debugLogger.extend('mariadb-processor');

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import process from 'process';
import {type Db, MongoClient, type MongoClientOptions} from 'mongodb';
import {MaskAnonymizer} from '../../../anonymizers/mask/mask-anonymizer';
import {type Anonymizer} from '../../../anonymizers/types';
import {MongoClient, type Db, type MongoClientOptions} from 'mongodb';
import {MaskAnonymizer} from '../../../../anonymizers/mask/mask-anonymizer';
import {type Anonymizer} from '../../../../anonymizers/types';
import {MongoProcessor} from '../mongo-processor';

describe('mongo-processor', () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import {type Db, MongoClient} from 'mongodb';
import {type Anonymizer} from '../../anonymizers/types';
import {debugLogger} from '../../services/loggers/debug-logger';
import {BaseProcessor} from '../base-processor/base-processor';
import {type Processor} from '../base-processor/processor';
import {MongoClient, type Db} from 'mongodb';
import {type Anonymizer} from '../../../anonymizers/types';
import {debugLogger} from '../../../services/loggers/debug-logger';
import {BaseProcessor} from '../../base-processor/base-processor';
import {type Processor} from '../../base-processor/processor';

const logger = debugLogger.extend('mongo-processor');
export class MongoProcessor extends BaseProcessor implements Processor {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import {type Knex} from 'knex';
import {newDb} from 'pg-mem';
import {MaskAnonymizer} from '../../../../anonymizers/mask/mask-anonymizer';
import {type Anonymizer} from '../../../../anonymizers/types';
import {PostgresProcessor} from '../postgres/postgres-processor';

describe('PostgresProcessor', () => {
let knex: Knex;

beforeEach(async () => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
knex = newDb().adapters.createKnex();
});

afterEach(async () => {
await knex.destroy();
});

it('should process a single doc', async () => {
await knex.schema.createTable('users', (table) => {
table.increments('id');
table.string('firstName');
});

await knex('users').insert({firstName: 'John'});

const selectedRows1 = await knex('users').select('firstName');
expect(selectedRows1[0].firstName).toBe('John');

const spy1 = jest
.spyOn(knex.client, 'destroy')
.mockImplementationOnce(async () => {
// Do nothing
});
const spy2 = jest
.spyOn(PostgresProcessor.prototype as any, 'buildClient')
.mockImplementationOnce(() => knex);

// Anonymize the db
const processor: PostgresProcessor = new PostgresProcessor(
'postgresql://localhost',
);

const anonymizer: Anonymizer = new MaskAnonymizer();
await processor.processColumn('users', 'firstName', anonymizer);

spy1.mockRestore();
spy2.mockRestore();

// Check again
const selectedRows2 = await knex('users').select('firstName');
expect(selectedRows2[0].firstName).toBe('****');
});

it('should process multiple rows', async () => {
await knex.schema.createTable('users', (table) => {
table.increments('id');
table.string('firstName');
});

await knex('users').insert([{firstName: 'test1'}, {firstName: 'test2'}]);

const spy1 = jest
.spyOn(knex.client, 'destroy')
.mockImplementationOnce(async () => {
// Do nothing
});
const spy2 = jest
.spyOn(PostgresProcessor.prototype as any, 'buildClient')
.mockImplementationOnce(() => knex);

// Anonymize the db
const processor: PostgresProcessor = new PostgresProcessor(
'postgresql://localhost',
);

const anonymizer: Anonymizer = new MaskAnonymizer();
await processor.processColumn('users', 'firstName', anonymizer);

spy1.mockRestore();
spy2.mockRestore();

// Check again
const selectedRows2 = await knex('users').select('firstName');
expect(selectedRows2[0].firstName).toBe('*****');
expect(selectedRows2[1].firstName).toBe('*****');
});
});
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {type Knex} from 'knex';
import {newDb} from 'pg-mem';
import {MaskAnonymizer} from '../../../anonymizers/mask/mask-anonymizer';
import {type Anonymizer} from '../../../anonymizers/types';

import {MaskAnonymizer} from '../../../../../anonymizers/mask/mask-anonymizer';
import {type Anonymizer} from '../../../../../anonymizers/types';
import {PostgresProcessor} from '../postgres-processor';

describe('PostgresProcessor', () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import knex, {type Knex} from 'knex';
import {type Anonymizer} from '../../anonymizers/types';
import {debugLogger} from '../../services/loggers/debug-logger';
import {BaseProcessor} from '../base-processor/base-processor';
import {type Processor} from '../base-processor/processor';
import {type Anonymizer} from '../../../../anonymizers/types';
import {debugLogger} from '../../../../services/loggers/debug-logger';
import {BaseProcessor} from '../../../base-processor/base-processor';
import {type Processor} from '../../../base-processor/processor';

const logger = debugLogger.extend('postgress-processor');

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {type Anonymizer} from '../../anonymizers/types';
import {type Anonymizer} from '../../../anonymizers/types';

export type Collection = {
dbName: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {type Anonymizer} from '../../anonymizers/types';
import {createAnonymizer} from '../../anonymizers/utils/create-anonymizer';
import {type Config} from '../../config/types';
import {type Anonymizer} from '../../../anonymizers/types';
import {createAnonymizer} from '../../../anonymizers/utils/create-anonymizer';
import {type Config} from '../../../config/types';
import {type Collection} from '../types/collection';

export function getCollections(config: Config): Collection[] {
Expand Down
Empty file added src/processors/files/index.ts
Empty file.
Loading
Loading