Skip to content

Commit

Permalink
Fix extra columns excluded in DataView CSV export (#2727)
Browse files Browse the repository at this point in the history
* fix extra columns excluded in DataView CSV export

* only include data view columns specified

* refactor code

* refactor code

* refactor code

* fix all extra columns present in filtered dataset exports

all columns including those not selected are included in exports for filtered datasets

* fix failing test

* add test

* fix deprecated syntax, refactor test

* refactor tests

* fix failing test

* refactor tests
  • Loading branch information
kelvin-muchiri authored Oct 23, 2024
1 parent 4c8cba7 commit 7c1c41d
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 21 deletions.
15 changes: 9 additions & 6 deletions onadata/apps/api/tests/viewsets/test_dataview_viewset.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
# -*- coding: utf-8 -*-
"""Test DataViewViewSet"""

import csv
import json
import os
from datetime import datetime, timedelta
from unittest.mock import patch


from django.conf import settings
from django.core.cache import cache
from django.core.files.storage import default_storage
from django.test.utils import override_settings
from django.utils.timezone import utc

from openpyxl import load_workbook

from flaky import flaky
from openpyxl import load_workbook

from onadata.apps.api.tests.viewsets.test_abstract_viewset import TestAbstractViewSet
from onadata.apps.api.viewsets.attachment_viewset import AttachmentViewSet
Expand Down Expand Up @@ -1050,7 +1049,11 @@ def test_xlsx_export_with_choice_labels(self, async_result):
"name": "My DataView",
"xform": f"http://testserver/api/v1/forms/{xform.pk}",
"project": f"http://testserver/api/v1/projects/{project.pk}",
"columns": '["name", "age", "gender", "pizza_type"]',
"columns": (
'["name", "age", "gender", "pizza_type", "_id", "_uuid", '
'"_submission_time", "_index", "_parent_table_name", "_parent_index", '
'"_tags", "_notes", "_version", "_duration","_submitted_by"]'
),
"query": ('[{"column":"age","filter":"=","value":"28"}]'),
}
self._create_dataview(data=data)
Expand Down Expand Up @@ -1089,8 +1092,8 @@ def test_xlsx_export_with_choice_labels(self, async_result):
self.assertTrue(export.is_successful)
workbook = load_workbook(export.full_filepath)
workbook.iso_dates = True
sheet_name = workbook.get_sheet_names()[0]
main_sheet = workbook.get_sheet_by_name(sheet_name)
sheet_name = workbook.sheetnames[0]
main_sheet = workbook[sheet_name]
sheet_headers = list(main_sheet.values)[0]
sheet_data = list(main_sheet.values)[1]
inst = self.xform.instances.get(id=sheet_data[4])
Expand Down
59 changes: 58 additions & 1 deletion onadata/libs/tests/utils/test_csv_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
"""
Test CSVDataFrameBuilder
"""

import csv
import os
from builtins import chr, open
from tempfile import NamedTemporaryFile

from builtins import chr, open
from django.test.utils import override_settings
from django.utils.dateparse import parse_datetime

from onadata.apps.logger.models import DataView
from onadata.apps.logger.models.entity_list import EntityList
from onadata.apps.logger.models.xform import XForm
from onadata.apps.logger.xform_instance_parser import xform_instance_to_dict
Expand Down Expand Up @@ -2086,3 +2088,58 @@ def test_entity_list_dataset(self):
]
row = next(csv_reader)
self.assertCountEqual(row, expected_row)

def test_extra_columns_dataview(self):
"""Extra columns are included in export for dataview
Extra columns included only if in the dataview
"""
md_xform = """
| survey |
| | type | name | label |
| | text | name | Name |
| | integer | age | Age |
| | select_multiple fruits | fruit | Fruit |
| | | | |
| choices | list name | name | label |
| | fruits | 1 | Mango |
| | fruits | 2 | Orange |
| | fruits | 3 | Apple |
"""
xform = self._publish_markdown(md_xform, self.user, id_string="b")
cursor = [{"name": "Maria", "age": 25, "fruit": "1 2"}]
csv_df_builder = CSVDataFrameBuilder(
self.user.username,
xform.id_string,
split_select_multiples=False,
include_images=False,
show_choice_labels=True,
)
extra_cols = [
"_id",
"_uuid",
"_submission_time",
"_date_modified",
"_tags",
"_notes",
"_version",
"_duration",
"_submitted_by",
"_total_media",
"_media_count",
"_media_all_received",
]

for extra_col in extra_cols:
dataview = DataView.objects.create(
xform=xform,
name="test",
columns=["age", extra_col],
project=self.project,
)
temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
csv_df_builder.export_to(temp_file.name, cursor, dataview=dataview)
csv_file = open(temp_file.name, "r")
csv_reader = csv.reader(csv_file)
header = next(csv_reader)
self.assertEqual(header, ["age", extra_col])
54 changes: 45 additions & 9 deletions onadata/libs/tests/utils/test_export_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
Tests Export Builder Functionality
"""

from __future__ import unicode_literals

import csv
Expand All @@ -23,6 +24,7 @@
from savReaderWriter import SavHeaderReader, SavReader

from onadata.apps.logger.import_tools import django_file
from onadata.apps.logger.models import DataView
from onadata.apps.main.tests.test_base import TestBase
from onadata.apps.viewer.models.data_dictionary import DataDictionary
from onadata.apps.viewer.models.parsed_instance import _encode_for_mongo, query_data
Expand Down Expand Up @@ -1622,10 +1624,10 @@ def test_to_xlsx_export_works(self):
"children_cartoons",
"children_cartoons_characters",
]
self.assertEqual(list(workbook.get_sheet_names()), expected_sheet_names)
self.assertEqual(list(workbook.sheetnames), expected_sheet_names)

# check header columns
main_sheet = workbook.get_sheet_by_name("childrens_survey")
main_sheet = workbook["childrens_survey"]
expected_column_headers = [
"name",
"age",
Expand Down Expand Up @@ -1654,7 +1656,7 @@ def test_to_xlsx_export_works(self):
sorted(list(column_headers)), sorted(expected_column_headers)
)

childrens_sheet = workbook.get_sheet_by_name("children")
childrens_sheet = workbook["children"]
expected_column_headers = [
"children/name",
"children/age",
Expand Down Expand Up @@ -1683,7 +1685,7 @@ def test_to_xlsx_export_works(self):
sorted(list(column_headers)), sorted(expected_column_headers)
)

cartoons_sheet = workbook.get_sheet_by_name("children_cartoons")
cartoons_sheet = workbook["children_cartoons"]
expected_column_headers = [
"children/cartoons/name",
"children/cartoons/why",
Expand All @@ -1704,9 +1706,7 @@ def test_to_xlsx_export_works(self):
sorted(list(column_headers)), sorted(expected_column_headers)
)

characters_sheet = workbook.get_sheet_by_name(
"children_cartoons_characters"
)
characters_sheet = workbook["children_cartoons_characters"]
expected_column_headers = [
"children/cartoons/characters/name",
"children/cartoons/characters/good_or_evil",
Expand Down Expand Up @@ -1740,7 +1740,7 @@ def test_to_xlsx_export_respects_custom_field_delimiter(self):
workbook = load_workbook(filename)

# check header columns
main_sheet = workbook.get_sheet_by_name("childrens_survey")
main_sheet = workbook["childrens_survey"]
expected_column_headers = [
"name",
"age",
Expand Down Expand Up @@ -1819,7 +1819,7 @@ def test_to_xlsx_export_generates_valid_sheet_names(self):
"childrens_survey_with_a_very_l2",
"childrens_survey_with_a_very_l3",
]
self.assertEqual(list(workbook.get_sheet_names()), expected_sheet_names)
self.assertEqual(list(workbook.sheetnames), expected_sheet_names)

# pylint: disable=invalid-name
def test_child_record_parent_table_is_updated_when_sheet_is_renamed(self):
Expand Down Expand Up @@ -3668,3 +3668,39 @@ def test_sav_export_with_duplicate_metadata(self, mock_uuid):
rows[1] = list(map(_str_if_bytes, rows[1]))
self.assertEqual(expected_data, rows)
shutil.rmtree(temp_dir)

def test_extra_columns_dataview(self):
"""Extra columns are included in export for dataview
Extra columns included only if in the dataview
"""
self._publish_xls_file_and_set_xform(
_logger_fixture_path("childrens_survey.xlsx")
)
export_builder = ExportBuilder()
export_builder.set_survey(self.xform.survey)
extra_cols = [
"_id",
"_uuid",
"_submission_time",
"_index",
"_parent_table_name",
"_parent_index",
"_tags",
"_notes",
"_version",
"_duration",
"_submitted_by",
]

for extra_col in extra_cols:
dataview = DataView.objects.create(
xform=self.xform,
name="test",
columns=["name", extra_col],
project=self.project,
)
fields = export_builder.get_fields(
dataview, export_builder.sections[0], "title"
)
self.assertEqual(fields, ["name", extra_col])
12 changes: 9 additions & 3 deletions onadata/libs/utils/csv_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
CSV export utility functions.
"""

from collections import OrderedDict
from itertools import chain, tee

Expand All @@ -10,11 +11,10 @@

import unicodecsv as csv
from pyxform.question import Question
from pyxform.section import RepeatingSection, Section, GroupedSection
from pyxform.section import GroupedSection, RepeatingSection, Section
from six import iteritems

from onadata.apps.logger.models import EntityList
from onadata.apps.logger.models import OsmData
from onadata.apps.logger.models import EntityList, OsmData
from onadata.apps.logger.models.xform import XForm, question_types_to_exclude
from onadata.apps.viewer.models.data_dictionary import DataDictionary
from onadata.libs.utils.common_tags import (
Expand Down Expand Up @@ -852,6 +852,12 @@ def export_to(self, path, cursor, dataview=None):
]
)
)

# add extra columns
for column in filter(lambda col: col not in columns, dataview.columns):
if column in self.extra_columns:
columns.append(column)

else:
columns = list(
chain.from_iterable(
Expand Down
12 changes: 10 additions & 2 deletions onadata/libs/utils/export_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
ExportBuilder
"""

from __future__ import unicode_literals

import csv
Expand Down Expand Up @@ -1480,15 +1481,22 @@ def get_fields(self, dataview, section, key):
Return list of element value with the key in section['elements'].
"""
if dataview:
return [
columns = [
(
element.get("_label_xpath") or element[key]
if self.SHOW_CHOICE_LABELS
else element[key]
)
for element in section["elements"]
if element["title"] in dataview.columns
] + self.extra_columns
]

# add extra columns
for column in filter(lambda col: col not in columns, dataview.columns):
if column in self.extra_columns:
columns.append(column)

return columns

return [
(
Expand Down

0 comments on commit 7c1c41d

Please sign in to comment.