From 758d2d8e367ee6f655b650c59604cd69c8cf1b93 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 13 Mar 2024 11:57:33 -0600
Subject: [PATCH 01/70] first draft of survey_responses notebook

notebook currently uses the survey_info part of the config
uses config to get the surveys, and use the xlsx files to translate between data and readable labels
generates a plot for every question present in the data

hopefully will work with multiple surveys, but only tested with 1 so far
---
 viz_scripts/survey_responses.ipynb | 269 +++++++++++++++++++++++++++++
 1 file changed, 269 insertions(+)
 create mode 100644 viz_scripts/survey_responses.ipynb

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
new file mode 100644
index 00000000..7c8f5ecb
--- /dev/null
+++ b/viz_scripts/survey_responses.ipynb
@@ -0,0 +1,269 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7fa9a20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %conda install openpyxl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c7fbf97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "year = None\n",
+    "month = None\n",
+    "program = \"washingtoncommons\"\n",
+    "study_type = \"study\"\n",
+    "mode_of_interest = None\n",
+    "include_test_users = False\n",
+    "dynamic_labels = {  }\n",
+    "use_imperial = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "585410e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import defaultdict\n",
+    "\n",
+    "import urllib.request\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from plots import *\n",
+    "import scaffolding\n",
+    "\n",
+    "sns.set_style(\"whitegrid\")\n",
+    "sns.set()\n",
+    "%matplotlib inline\n",
+    "\n",
+    "# get metric vs imperial vars\n",
+    "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a85ca35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#probably going to end up passing this in\n",
+    "survey_info = {\n",
+    "      \"surveys\": {\n",
+    "        \"UserProfileSurvey\": {\n",
+    "          \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-onboarding-survey-v4.xml\",\n",
+    "          \"version\": 1.3,\n",
+    "          \"compatibleWith\": 1,\n",
+    "          \"dataKey\": \"manual/demographic_survey\",\n",
+    "          \"labelTemplate\": {\n",
+    "            \"en\": \"Answered\",\n",
+    "            \"es\": \"Contestada\"\n",
+    "          }\n",
+    "        },\n",
+    "        \"TripConfirmSurvey\": {\n",
+    "            \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-trip-survey-v2.json\",\n",
+    "            \"version\": 1.2,\n",
+    "            \"compatibleWith\": 1,\n",
+    "            \"dataKey\": \"manual/trip_user_input\",\n",
+    "            \"labelVars\": {\n",
+    "              \"modes\": {\n",
+    "                \"key\": \"What_was_the_main_pu_f_this_trip_trip_leg\",\n",
+    "                \"type\": \"length\"\n",
+    "              },\n",
+    "              \"purposes\": {\n",
+    "                \"key\": \"_2_What_was_the_mode_of_transp\",\n",
+    "                \"type\": \"length\"\n",
+    "              }\n",
+    "            },\n",
+    "            \"labelTemplate\": {\n",
+    "              \"en\": \"{ purposes, plural, =0 {No purposes} one {1 purpose} other {# purposes} }, { modes, plural, =0 {No modes} one {1 mode} other {# modes} }\",\n",
+    "              \"es\": \"{ purposes, plural, =0 {No propósitos} one {1 propósito} other {# propósitos} }, { modes, plural, =0 {No modos} one {1 modo} other {# modos} }\"\n",
+    "            }\n",
+    "          }\n",
+    "      },\n",
+    "      \"trip-labels\": \"ENKETO\"\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf0b2f08",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#input: list of survey names from the config\n",
+    "#output: list of links to the sheets where questions/answers are\n",
+    "#will run n surveys times\n",
+    "def get_sheet_links(survey_list):\n",
+    "    sheet_list = []\n",
+    "    for name in survey_list:\n",
+    "        form_path = survey_info['surveys'][name]['formPath']\n",
+    "        #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH XLSX FILE TYPE\n",
+    "        l_path = form_path.split('.')\n",
+    "        l_path[-1] = 'xlsx'\n",
+    "        s = '.'\n",
+    "        sheet_path = s.join(l_path)\n",
+    "        sheet_list.append(sheet_path)\n",
+    "    return sheet_list\n",
+    "\n",
+    "#input: list of urls for the survey xlsx files\n",
+    "#output: two dictionaries to translate the ?s/ans\n",
+    "#will run n surveys times\n",
+    "def build_dictionaries(url_list):\n",
+    "    opt_dicts = {}\n",
+    "    quest_dicts = {}\n",
+    "    \n",
+    "    for url in sheet_list:\n",
+    "        result = urllib.request.urlopen(url).read()\n",
+    "        xls = pd.ExcelFile(result)\n",
+    "        questions = pd.read_excel(xls, 'survey')\n",
+    "        options = pd.read_excel(xls, 'choices')\n",
+    "\n",
+    "        opt_dict.update(dict(zip(options.name, options.label)))\n",
+    "        quest_dict.update(dict(zip(questions.name, questions.label)))\n",
+    "    \n",
+    "    return opt_dict, quest_dict\n",
+    "\n",
+    "#input: dataframe containing all trips that have non-blank user_input\n",
+    "#output: dataframe with questions in the columns and answers in the rows\n",
+    "#for loop will run n survey responses times (this could get big!)\n",
+    "def create_dataframe(df_trips_w_surveys):\n",
+    "    df = df_trips_w_surveys.reset_index()\n",
+    "    rows = []\n",
+    "    for i in range(len(df)):\n",
+    "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse']['data'])\n",
+    "        rows.append(row)\n",
+    "\n",
+    "    df = pd.concat(rows)\n",
+    "    #drop the non-question columns, should leave behind all the questions\n",
+    "    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'meta.instanceID', 'meta.deprecatedID'])\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "#input: list of labels that will end up on the chart\n",
+    "#output: translated to readable list, with multiples handled\n",
+    "#the for loop will run n times, where num_options <= n < all possible combinations of options\n",
+    "#if people are selecting many different combinations, could be large\n",
+    "def traslate_options(labels):\n",
+    "    for i in range(len(labels)):\n",
+    "        l_labels = labels[i].split(\" \")\n",
+    "        for k in range(len(l_labels)):\n",
+    "            print()\n",
+    "            l_labels[k] = opt_dict[l_labels[k]]\n",
+    "        sep = \"\\n\"\n",
+    "        labels[i] = sep.join(l_labels)\n",
+    "    \n",
+    "    return labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b9db890",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#list of all surveys that are not a \"UserProfileSurvey\"\n",
+    "survey_list = list(survey_info['surveys'].keys())\n",
+    "survey_list.remove('UserProfileSurvey')\n",
+    "sheet_list = get_sheet_links(survey_list)\n",
+    "\n",
+    "print('survey sheets: ', sheet_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "caeb880b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load all of the confirmed trips\n",
+    "tq = scaffolding.get_time_query(year, month)\n",
+    "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
+    "\n",
+    "#remove blank inputs\n",
+    "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
+    "print(len(survey_trips))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c00da0a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#create translation dictionaries\n",
+    "opt_dicts, quest_dicts = build_dictionaries(sheet_list)\n",
+    "\n",
+    "#format survey trips into responses dataframe\n",
+    "df_responses = create_dataframe(survey_trips)\n",
+    "\n",
+    "#replace questions\n",
+    "df_responses = df_responses.rename(columns = quest_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "08d04b39",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "#create one plot per question\n",
+    "for col in df.columns:\n",
+    "    quest_frame = df.copy()\n",
+    "    quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
+    "    labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist())\n",
+    "    values = quest_frame[col].value_counts(dropna=True).tolist()\n",
+    "     \n",
+    "    pie_chart_purpose(col, labels, values, \"howdy\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "132e00a1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From a5262573d73df89ea5c4495a334942f437f84e9a Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Thu, 14 Mar 2024 18:39:56 -0600
Subject: [PATCH 02/70] add quality text to pie charts

---
 viz_scripts/survey_responses.ipynb | 42 ++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 7c8f5ecb..4a5888a2 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -123,8 +123,8 @@
     "#output: two dictionaries to translate the ?s/ans\n",
     "#will run n surveys times\n",
     "def build_dictionaries(url_list):\n",
-    "    opt_dicts = {}\n",
-    "    quest_dicts = {}\n",
+    "    opt_dict = {}\n",
+    "    quest_dict = {}\n",
     "    \n",
     "    for url in sheet_list:\n",
     "        result = urllib.request.urlopen(url).read()\n",
@@ -157,7 +157,7 @@
     "#output: translated to readable list, with multiples handled\n",
     "#the for loop will run n times, where num_options <= n < all possible combinations of options\n",
     "#if people are selecting many different combinations, could be large\n",
-    "def traslate_options(labels):\n",
+    "def traslate_options(labels, opt_dict):\n",
     "    for i in range(len(labels)):\n",
     "        l_labels = labels[i].split(\" \")\n",
     "        for k in range(len(l_labels)):\n",
@@ -166,7 +166,17 @@
     "        sep = \"\\n\"\n",
     "        labels[i] = sep.join(l_labels)\n",
     "    \n",
-    "    return labels"
+    "    return labels\n",
+    "\n",
+    "#input: all of the responses to a single survey\n",
+    "#output: text with num responses and users\n",
+    "#THIS SHOULD GO IN SCAFFOLDING AND INCLUDE WHOLE POOL AT SOME POINT\n",
+    "def get_text(responses):\n",
+    "    num_resp = len(responses)\n",
+    "    num_users = responses.user_id.nunique()\n",
+    "    quality_text = f\"Based on {num_resp} responses from {num_users} users\"\n",
+    "    print(quality_text)\n",
+    "    return quality_text"
    ]
   },
   {
@@ -194,10 +204,20 @@
     "#load all of the confirmed trips\n",
     "tq = scaffolding.get_time_query(year, month)\n",
     "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
-    "\n",
     "#remove blank inputs\n",
     "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
-    "print(len(survey_trips))"
+    "print(len(survey_trips))\n",
+    "\n",
+    "#survey counts df\n",
+    "survey_trips = survey_trips.reset_index()\n",
+    "survey_trips['survey_name'] = survey_trips.user_input.apply(lambda sr: sr['trip_user_input']['data']['name'])\n",
+    "\n",
+    "#gather the cols needed for charts and text\n",
+    "survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
+    "survey_trips.head()\n",
+    "\n",
+    "#get quality text\n",
+    "qual_text = get_text(survey_trips)"
    ]
   },
   {
@@ -208,7 +228,7 @@
    "outputs": [],
    "source": [
     "#create translation dictionaries\n",
-    "opt_dicts, quest_dicts = build_dictionaries(sheet_list)\n",
+    "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",
@@ -227,13 +247,13 @@
    "outputs": [],
    "source": [
     "#create one plot per question\n",
-    "for col in df.columns:\n",
-    "    quest_frame = df.copy()\n",
+    "for col in df_responses.columns:\n",
+    "    quest_frame = df_responses.copy()\n",
     "    quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
-    "    labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist())\n",
+    "    labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
     "    values = quest_frame[col].value_counts(dropna=True).tolist()\n",
     "     \n",
-    "    pie_chart_purpose(col, labels, values, \"howdy\")"
+    "    pie_chart_purpose(col+'\\n'+qual_text, labels, values, \"howdy\")"
    ]
   },
   {

From c4cfdfaa0266a88532f0dcbbbf7426b65d660f26 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 20 Mar 2024 16:21:33 -0600
Subject: [PATCH 03/70] revise the way surveys are read

starting to read via xml instead of spreadsheet, more support across languages
---
 viz_scripts/survey_responses.ipynb | 187 +++++++++++++++--------------
 1 file changed, 98 insertions(+), 89 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 4a5888a2..5c8100e3 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -1,15 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a7fa9a20",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# %conda install openpyxl"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -19,7 +9,7 @@
    "source": [
     "year = None\n",
     "month = None\n",
-    "program = \"washingtoncommons\"\n",
+    "program = \"dfc-fermata\"\n",
     "study_type = \"study\"\n",
     "mode_of_interest = None\n",
     "include_test_users = False\n",
@@ -27,6 +17,35 @@
     "use_imperial = False"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce0dcc9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#probably going to end up passing this in\n",
+    "survey_info =  {\n",
+    "    \"surveys\": {\n",
+    "      \"UserProfileSurvey\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/JGreenlee/nrel-openpath-deploy-configs/fermata-demo/survey_resources/dfc-fermata/fermata-onboarding-v0.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/demographic_survey\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      },\n",
+    "      \"TripConfirmSurvey\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      }\n",
+    "    },\n",
+    "    \"trip-labels\": \"ENKETO\"\n",
+    "  }"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -35,11 +54,10 @@
    "outputs": [],
    "source": [
     "from collections import defaultdict\n",
-    "\n",
     "import urllib.request\n",
-    "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
+    "from xml.dom import minidom\n",
     "\n",
     "from plots import *\n",
     "import scaffolding\n",
@@ -55,52 +73,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9a85ca35",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#probably going to end up passing this in\n",
-    "survey_info = {\n",
-    "      \"surveys\": {\n",
-    "        \"UserProfileSurvey\": {\n",
-    "          \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-onboarding-survey-v4.xml\",\n",
-    "          \"version\": 1.3,\n",
-    "          \"compatibleWith\": 1,\n",
-    "          \"dataKey\": \"manual/demographic_survey\",\n",
-    "          \"labelTemplate\": {\n",
-    "            \"en\": \"Answered\",\n",
-    "            \"es\": \"Contestada\"\n",
-    "          }\n",
-    "        },\n",
-    "        \"TripConfirmSurvey\": {\n",
-    "            \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-trip-survey-v2.json\",\n",
-    "            \"version\": 1.2,\n",
-    "            \"compatibleWith\": 1,\n",
-    "            \"dataKey\": \"manual/trip_user_input\",\n",
-    "            \"labelVars\": {\n",
-    "              \"modes\": {\n",
-    "                \"key\": \"What_was_the_main_pu_f_this_trip_trip_leg\",\n",
-    "                \"type\": \"length\"\n",
-    "              },\n",
-    "              \"purposes\": {\n",
-    "                \"key\": \"_2_What_was_the_mode_of_transp\",\n",
-    "                \"type\": \"length\"\n",
-    "              }\n",
-    "            },\n",
-    "            \"labelTemplate\": {\n",
-    "              \"en\": \"{ purposes, plural, =0 {No purposes} one {1 purpose} other {# purposes} }, { modes, plural, =0 {No modes} one {1 mode} other {# modes} }\",\n",
-    "              \"es\": \"{ purposes, plural, =0 {No propósitos} one {1 propósito} other {# propósitos} }, { modes, plural, =0 {No modos} one {1 modo} other {# modos} }\"\n",
-    "            }\n",
-    "          }\n",
-    "      },\n",
-    "      \"trip-labels\": \"ENKETO\"\n",
-    "    }"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bf0b2f08",
+   "id": "b18bc854",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -111,9 +84,9 @@
     "    sheet_list = []\n",
     "    for name in survey_list:\n",
     "        form_path = survey_info['surveys'][name]['formPath']\n",
-    "        #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH XLSX FILE TYPE\n",
+    "        #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE\n",
     "        l_path = form_path.split('.')\n",
-    "        l_path[-1] = 'xlsx'\n",
+    "        l_path[-1] = 'xml'\n",
     "        s = '.'\n",
     "        sheet_path = s.join(l_path)\n",
     "        sheet_list.append(sheet_path)\n",
@@ -127,16 +100,38 @@
     "    quest_dict = {}\n",
     "    \n",
     "    for url in sheet_list:\n",
-    "        result = urllib.request.urlopen(url).read()\n",
-    "        xls = pd.ExcelFile(result)\n",
-    "        questions = pd.read_excel(xls, 'survey')\n",
-    "        options = pd.read_excel(xls, 'choices')\n",
+    "        result = urllib.request.urlopen(url)\n",
+    "        doc = minidom.parse(result) \n",
     "\n",
-    "        opt_dict.update(dict(zip(options.name, options.label)))\n",
-    "        quest_dict.update(dict(zip(questions.name, questions.label)))\n",
+    "        labels = doc.getElementsByTagName(\"label\") \n",
+    "        for label in labels:\n",
+    "            if(bool(label.parentNode.getAttribute(\"ref\"))):\n",
+    "                print(label.parentNode.getAttribute(\"ref\").split('/')[-1])\n",
+    "                print(label.firstChild.data)\n",
+    "                \n",
+    "                quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
+    "    \n",
+    "    return opt_dict, quest_dict\n",
+    "\n",
+    "def build_dictionaries(url_list):\n",
+    "    opt_dict = {}\n",
+    "    quest_dict = {}\n",
+    "    \n",
+    "    for url in sheet_list:\n",
+    "        result = urllib.request.urlopen(url)\n",
+    "        tree = ET.parse(result)\n",
+    "        root = tree.getroot()\n",
+    "        \n",
+    "        print(root.findall(\".\"))\n",
+    "    \n",
+    "        for child in root:\n",
+    "            print(child.tag, child.attrib)\n",
+    "            \n",
+    "        print(root.findall(\".//label\"))\n",
     "    \n",
     "    return opt_dict, quest_dict\n",
     "\n",
+    "\n",
     "#input: dataframe containing all trips that have non-blank user_input\n",
     "#output: dataframe with questions in the columns and answers in the rows\n",
     "#for loop will run n survey responses times (this could get big!)\n",
@@ -144,12 +139,22 @@
     "    df = df_trips_w_surveys.reset_index()\n",
     "    rows = []\n",
     "    for i in range(len(df)):\n",
-    "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse']['data'])\n",
+    "        data_key = list(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]\n",
+    "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n",
     "        rows.append(row)\n",
-    "\n",
     "    df = pd.concat(rows)\n",
+    "#     print(df.head())\n",
+    "    \n",
+    "    rename_nests = {}\n",
+    "    for col in df.columns:\n",
+    "        rename_nests[col] = col.split('.')[-1]\n",
+    "    \n",
+    "    print(rename_nests)\n",
+    "    df = df.rename(columns=rename_nests)\n",
+    "    \n",
     "    #drop the non-question columns, should leave behind all the questions\n",
-    "    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'meta.instanceID', 'meta.deprecatedID'])\n",
+    "    #need to do this better, won't always be the same\n",
+    "    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'instanceID'])\n",
     "\n",
     "    return df\n",
     "\n",
@@ -182,7 +187,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4b9db890",
+   "id": "3b355efd",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -197,7 +202,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "caeb880b",
+   "id": "854b3070",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -206,7 +211,6 @@
     "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
     "#remove blank inputs\n",
     "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
-    "print(len(survey_trips))\n",
     "\n",
     "#survey counts df\n",
     "survey_trips = survey_trips.reset_index()\n",
@@ -223,46 +227,51 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c00da0a7",
+   "id": "7fe65f88",
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
+    "\n",
     "#create translation dictionaries\n",
     "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
+    "print(opt_dict)\n",
+    "print(quest_dict)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
-    "df_responses = create_dataframe(survey_trips)\n",
-    "\n",
-    "#replace questions\n",
-    "df_responses = df_responses.rename(columns = quest_dict)"
+    "df_responses = create_dataframe(survey_trips)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "08d04b39",
+   "id": "7bcebeef",
    "metadata": {
     "scrolled": false
    },
    "outputs": [],
    "source": [
+    "file_suffix = scaffolding.get_file_suffix(year, month, program)\n",
+    "\n",
     "#create one plot per question\n",
     "for col in df_responses.columns:\n",
+    "    filename = col + file_suffix\n",
+    "    print(filename)\n",
+    "    \n",
     "    quest_frame = df_responses.copy()\n",
     "    quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
-    "    labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
+    "    labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
+    "#     labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
     "    values = quest_frame[col].value_counts(dropna=True).tolist()\n",
+    "    \n",
+    "    try:\n",
+    "        label = quest_dict[col]\n",
+    "    except:\n",
+    "        label = col\n",
     "     \n",
-    "    pie_chart_purpose(col+'\\n'+qual_text, labels, values, \"howdy\")"
+    "    #if other is 0 don't display it :)\n",
+    "    pie_chart_purpose(label+'\\n'+qual_text, labels, values, filename)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "132e00a1",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From 427f44b5d997b244cb3f73dcd1a6e445a1337b0d Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 20 Mar 2024 16:33:06 -0600
Subject: [PATCH 04/70] update dictionary building

there was a bug (duplicate code) in how I was creating the dictionaries
---
 viz_scripts/survey_responses.ipynb | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 5c8100e3..759d0469 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -113,24 +113,6 @@
     "    \n",
     "    return opt_dict, quest_dict\n",
     "\n",
-    "def build_dictionaries(url_list):\n",
-    "    opt_dict = {}\n",
-    "    quest_dict = {}\n",
-    "    \n",
-    "    for url in sheet_list:\n",
-    "        result = urllib.request.urlopen(url)\n",
-    "        tree = ET.parse(result)\n",
-    "        root = tree.getroot()\n",
-    "        \n",
-    "        print(root.findall(\".\"))\n",
-    "    \n",
-    "        for child in root:\n",
-    "            print(child.tag, child.attrib)\n",
-    "            \n",
-    "        print(root.findall(\".//label\"))\n",
-    "    \n",
-    "    return opt_dict, quest_dict\n",
-    "\n",
     "\n",
     "#input: dataframe containing all trips that have non-blank user_input\n",
     "#output: dataframe with questions in the columns and answers in the rows\n",
@@ -231,10 +213,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "\n",
     "#create translation dictionaries\n",
     "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
+    "\n",
     "print(opt_dict)\n",
     "print(quest_dict)\n",
     "\n",

From 94f48c5af9945c17b0bb5a5952ce5d330045dd13 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 20 Mar 2024 16:33:46 -0600
Subject: [PATCH 05/70] only display questions currently in the survey

We only want to display question that are still in the survey, as those are the questions we will actually have a response for
---
 viz_scripts/survey_responses.ipynb | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 759d0469..128f3b48 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -244,14 +244,11 @@
     "    labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
     "#     labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
     "    values = quest_frame[col].value_counts(dropna=True).tolist()\n",
-    "    \n",
-    "    try:\n",
-    "        label = quest_dict[col]\n",
-    "    except:\n",
-    "        label = col\n",
     "     \n",
     "    #if other is 0 don't display it :)\n",
-    "    pie_chart_purpose(label+'\\n'+qual_text, labels, values, filename)"
+    "    #will only show questions in the current survey (not older versions) and that have at least 1 response\n",
+    "    if col in quest_dict and len(quest_frame[col]) != 0:\n",
+    "        pie_chart_purpose(quest_dict[col]+'\\n'+qual_text, labels, values, filename)"
    ]
   }
  ],

From 7f3d4779446f91bb9428408018fdb37de28db942 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Fri, 22 Mar 2024 12:16:49 -0600
Subject: [PATCH 06/70] connect charts to frontend

added code to the index.html file to fetch surveys, add each of the options for the survey questions, and display the charts by default on the dashboard
---
 frontend/index.html                 | 60 ++++++++++++++++++++++++++++-
 frontend/metrics_study_surveys.html |  4 ++
 viz_scripts/survey_responses.ipynb  |  7 ++--
 3 files changed, 67 insertions(+), 4 deletions(-)
 create mode 100644 frontend/metrics_study_surveys.html

diff --git a/frontend/index.html b/frontend/index.html
index a7e3ac11..94e116ff 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -344,6 +344,12 @@
       const end_year = date.getFullYear();
       var current_month = start_month;
       var current_year = start_year;
+
+      //testing dfc fermata .. doesn't start until April...
+      // if ((current_month >= end_month) && (current_year >= end_year)) {
+      //   current_month = current_month - 2; //dfc has not started yet...
+      // }
+
       dates.push([current_month, current_year]);
       while (!(current_month == end_month && current_year == end_year)) {
         current_month += 1;
@@ -356,6 +362,25 @@
       };
       return dates;
     };
+
+    function getDictionaryList(form_list) {
+      var quest_dict = {};
+        return new Promise(async (resolve) => {
+          for (i in form_list) {
+            response = await fetch(form_list[i]);
+            text = await response.text();
+            const parser = new DOMParser();
+            const doc = parser.parseFromString(text, "text/xml");
+            labels = doc.getElementsByTagName("label");
+              for (i in labels) {
+                try {
+                  quest_dict[labels[i].parentNode.getAttribute("ref").split('/').slice(-1)] = labels[i].firstChild.data;
+                } catch (e) { }
+              } 
+            }
+          resolve(quest_dict);
+        })
+    };
   </script>
 
   <script type="text/javascript">
@@ -405,7 +430,40 @@
           console.log("Units for display are", data.display_config.use_imperial, dist_units);
           // Load list of plots corresponding to study/program
           dynamic_labels = data.label_options
-          if (data.intro.program_or_study == 'program') {
+          surveys = data.survey_info.surveys 
+          console.log(data.survey_info['trip-labels'])
+          if (data.survey_info['trip-labels'] === 'ENKETO') {
+            survey_list = Object.keys(surveys)
+            survey_list = survey_list.filter(name => name !== 'UserProfileSurvey')
+
+            sheet_list = []
+            for (name in survey_list) {
+              form_path = data.survey_info.surveys[survey_list[name]].formPath;
+              //hard code the old survey
+              // form_path = 'https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml';
+              //THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE
+              l_path = form_path.split('.')
+              l_path[-1] = 'xml'
+              sheet_path = l_path.join('.')
+              sheet_list.push(sheet_path)
+            }
+
+            getDictionaryList(sheet_list).then((quest_dict) => {
+              console.log(quest_dict);
+              load_file = "metrics_study_surveys.html"
+              $.get(load_file, function (file) {
+                Object.entries(quest_dict).forEach(([key, value]) => {
+                  var text = '<option ' + 'value="' + key + '" data-sizex="4" data-sizey="4">' + value + '</option>';
+                  file = file.concat('\n', text);
+                });
+                console.log(file);
+                $('#metric').append(file);
+                addPreconfiguredMetrics(Object.keys(quest_dict).slice(0, 5)); //only adding the first 6 elements
+              });
+            });         
+          
+          }
+          else if (data.intro.program_or_study == 'program') {
             // Note: We're disabling energy metrics on public dashboard when dynamic labels are available.
             // TODO: Remove the if (data.label_options) in future when energy computation is handled properly.
             if (dynamic_labels) {
diff --git a/frontend/metrics_study_surveys.html b/frontend/metrics_study_surveys.html
new file mode 100644
index 00000000..322c81e4
--- /dev/null
+++ b/frontend/metrics_study_surveys.html
@@ -0,0 +1,4 @@
+<!-- htmnl options should be 1 per chart question -->
+<!-- <option value="question key" data-sizex="4" data-sizey="4">translated question</option> -->
+
+<option value="ntrips_mode_confirm" data-sizex="4" data-sizey="4">Number of trips</option>
\ No newline at end of file
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 128f3b48..d9d4c469 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -7,9 +7,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = None\n",
-    "month = None\n",
-    "program = \"dfc-fermata\"\n",
+    "year = 2024\n",
+    "month = 3\n",
+    "program = \"default\"\n",
     "study_type = \"study\"\n",
     "mode_of_interest = None\n",
     "include_test_users = False\n",
@@ -233,6 +233,7 @@
    "outputs": [],
    "source": [
     "file_suffix = scaffolding.get_file_suffix(year, month, program)\n",
+    "print(file_suffix)\n",
     "\n",
     "#create one plot per question\n",
     "for col in df_responses.columns:\n",

From 74768f7e6c90f5603dff8cc86029762e29329aff Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Fri, 22 Mar 2024 15:04:29 -0600
Subject: [PATCH 07/70] translate answers, drop zero other

use labels except for likert, use values
---
 viz_scripts/plots.py               |  4 +++-
 viz_scripts/survey_responses.ipynb | 32 +++++++++++++-----------------
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py
index c27e94f0..9d633ef4 100644
--- a/viz_scripts/plots.py
+++ b/viz_scripts/plots.py
@@ -39,13 +39,15 @@ def merge_small_entries(labels, values):
     # We could have already had a non-zero other, and it could be small or large
     if "Other" not in v2l_df.index:
         # zero other will end up with misc_count
-        v2l_df.loc["Other"] = misc_count
+        if misc_count.vals > 0:
+            v2l_df.loc["Other"] = misc_count
     elif "Other" in small_chunk.index:
         # non-zero small other will already be in misc_count
         v2l_df.loc["Other"] = misc_count
     else:
         # non-zero large other, will not already be in misc_count
         v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count
+    
     disp.display(v2l_df)
 
     return (v2l_df.index.to_list(),v2l_df.vals.to_list())
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index d9d4c469..5081e892 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -106,11 +106,14 @@
     "        labels = doc.getElementsByTagName(\"label\") \n",
     "        for label in labels:\n",
     "            if(bool(label.parentNode.getAttribute(\"ref\"))):\n",
-    "                print(label.parentNode.getAttribute(\"ref\").split('/')[-1])\n",
-    "                print(label.firstChild.data)\n",
-    "                \n",
     "                quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
-    "    \n",
+    "            \n",
+    "            if label.parentNode.nodeName == 'item':\n",
+    "                if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
+    "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data\n",
+    "                else:\n",
+    "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.firstChild.data  \n",
+    "\n",
     "    return opt_dict, quest_dict\n",
     "\n",
     "\n",
@@ -125,13 +128,11 @@
     "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n",
     "        rows.append(row)\n",
     "    df = pd.concat(rows)\n",
-    "#     print(df.head())\n",
     "    \n",
     "    rename_nests = {}\n",
     "    for col in df.columns:\n",
     "        rename_nests[col] = col.split('.')[-1]\n",
-    "    \n",
-    "    print(rename_nests)\n",
+    "\n",
     "    df = df.rename(columns=rename_nests)\n",
     "    \n",
     "    #drop the non-question columns, should leave behind all the questions\n",
@@ -146,9 +147,8 @@
     "#if people are selecting many different combinations, could be large\n",
     "def traslate_options(labels, opt_dict):\n",
     "    for i in range(len(labels)):\n",
-    "        l_labels = labels[i].split(\" \")\n",
+    "        l_labels = str(labels[i]).split(\" \")\n",
     "        for k in range(len(l_labels)):\n",
-    "            print()\n",
     "            l_labels[k] = opt_dict[l_labels[k]]\n",
     "        sep = \"\\n\"\n",
     "        labels[i] = sep.join(l_labels)\n",
@@ -209,18 +209,17 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7fe65f88",
+   "id": "7b7001a9",
    "metadata": {},
    "outputs": [],
    "source": [
     "#create translation dictionaries\n",
     "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
     "\n",
-    "print(opt_dict)\n",
-    "print(quest_dict)\n",
-    "\n",
     "#format survey trips into responses dataframe\n",
-    "df_responses = create_dataframe(survey_trips)"
+    "df_responses = create_dataframe(survey_trips)\n",
+    "\n",
+    "file_suffix = scaffolding.get_file_suffix(year, month, program)"
    ]
   },
   {
@@ -232,9 +231,6 @@
    },
    "outputs": [],
    "source": [
-    "file_suffix = scaffolding.get_file_suffix(year, month, program)\n",
-    "print(file_suffix)\n",
-    "\n",
     "#create one plot per question\n",
     "for col in df_responses.columns:\n",
     "    filename = col + file_suffix\n",
@@ -243,7 +239,7 @@
     "    quest_frame = df_responses.copy()\n",
     "    quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
     "    labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
-    "#     labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
+    "    labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
     "    values = quest_frame[col].value_counts(dropna=True).tolist()\n",
     "     \n",
     "    #if other is 0 don't display it :)\n",

From 99e0cc80a9415722ab2bea9b04b392080252548f Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Fri, 22 Mar 2024 17:26:52 -0600
Subject: [PATCH 08/70] updates while working with washingtoncommons

uncovered a few bugs
---
 frontend/index.html                |  3 +-
 viz_scripts/survey_responses.ipynb | 62 ++++++++++++++++++++----------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index 94e116ff..7689ffc1 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -443,7 +443,8 @@
               // form_path = 'https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml';
               //THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE
               l_path = form_path.split('.')
-              l_path[-1] = 'xml'
+              l_path.splice(l_path.length -1, 1, 'xml');
+              console.log(l_path);
               sheet_path = l_path.join('.')
               sheet_list.push(sheet_path)
             }
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 5081e892..01a55e10 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -26,24 +26,40 @@
    "source": [
     "#probably going to end up passing this in\n",
     "survey_info =  {\n",
-    "    \"surveys\": {\n",
-    "      \"UserProfileSurvey\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/JGreenlee/nrel-openpath-deploy-configs/fermata-demo/survey_resources/dfc-fermata/fermata-onboarding-v0.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/demographic_survey\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      \"surveys\": {\n",
+    "        \"UserProfileSurvey\": {\n",
+    "          \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-onboarding-survey-v4.xml\",\n",
+    "          \"version\": 1.3,\n",
+    "          \"compatibleWith\": 1,\n",
+    "          \"dataKey\": \"manual/demographic_survey\",\n",
+    "          \"labelTemplate\": {\n",
+    "            \"en\": \"Answered\",\n",
+    "            \"es\": \"Contestada\"\n",
+    "          }\n",
+    "        },\n",
+    "        \"TripConfirmSurvey\": {\n",
+    "            \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-trip-survey-v2.json\",\n",
+    "            \"version\": 1.2,\n",
+    "            \"compatibleWith\": 1,\n",
+    "            \"dataKey\": \"manual/trip_user_input\",\n",
+    "            \"labelVars\": {\n",
+    "              \"modes\": {\n",
+    "                \"key\": \"What_was_the_main_pu_f_this_trip_trip_leg\",\n",
+    "                \"type\": \"length\"\n",
+    "              },\n",
+    "              \"purposes\": {\n",
+    "                \"key\": \"_2_What_was_the_mode_of_transp\",\n",
+    "                \"type\": \"length\"\n",
+    "              }\n",
+    "            },\n",
+    "            \"labelTemplate\": {\n",
+    "              \"en\": \"{ purposes, plural, =0 {No purposes} one {1 purpose} other {# purposes} }, { modes, plural, =0 {No modes} one {1 mode} other {# modes} }\",\n",
+    "              \"es\": \"{ purposes, plural, =0 {No propósitos} one {1 propósito} other {# propósitos} }, { modes, plural, =0 {No modos} one {1 modo} other {# modos} }\"\n",
+    "            }\n",
+    "          }\n",
     "      },\n",
-    "      \"TripConfirmSurvey\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      }\n",
-    "    },\n",
-    "    \"trip-labels\": \"ENKETO\"\n",
-    "  }"
+    "      \"trip-labels\": \"ENKETO\"\n",
+    "    }"
    ]
   },
   {
@@ -111,9 +127,11 @@
     "            if label.parentNode.nodeName == 'item':\n",
     "                if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
     "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data\n",
-    "                else:\n",
+    "                elif label.parentNode.getElementsByTagName(\"value\"):\n",
     "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.firstChild.data  \n",
-    "\n",
+    "                elif label.parentNode.getElementsByTagName(\"name\"):\n",
+    "                    opt_dict[label.parentNode.getElementsByTagName(\"name\")[0].firstChild.data] = label.firstChild.data  \n",
+    "                    \n",
     "    return opt_dict, quest_dict\n",
     "\n",
     "\n",
@@ -137,7 +155,7 @@
     "    \n",
     "    #drop the non-question columns, should leave behind all the questions\n",
     "    #need to do this better, won't always be the same\n",
-    "    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'instanceID'])\n",
+    "    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'instanceID', 'deprecatedID'])\n",
     "\n",
     "    return df\n",
     "\n",
@@ -174,6 +192,7 @@
    "outputs": [],
    "source": [
     "#list of all surveys that are not a \"UserProfileSurvey\"\n",
+    "print(survey_info['surveys'])\n",
     "survey_list = list(survey_info['surveys'].keys())\n",
     "survey_list.remove('UserProfileSurvey')\n",
     "sheet_list = get_sheet_links(survey_list)\n",
@@ -215,6 +234,8 @@
    "source": [
     "#create translation dictionaries\n",
     "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
+    "print(opt_dict)\n",
+    "print(quest_dict)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",
@@ -232,6 +253,7 @@
    "outputs": [],
    "source": [
     "#create one plot per question\n",
+    "print(df_responses.head())\n",
     "for col in df_responses.columns:\n",
     "    filename = col + file_suffix\n",
     "    print(filename)\n",

From eb14800cec9da3a9ea4e4dc994c2fc74994000c2 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 25 Mar 2024 17:07:00 -0600
Subject: [PATCH 09/70] add check to prevent showing "label" questions

These will never have answers, so no need to chart them. Example was "Please rate the following statements: "
---
 frontend/index.html                | 5 ++++-
 viz_scripts/survey_responses.ipynb | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index 7689ffc1..8bf81f1a 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -374,7 +374,10 @@
             labels = doc.getElementsByTagName("label");
               for (i in labels) {
                 try {
-                  quest_dict[labels[i].parentNode.getAttribute("ref").split('/').slice(-1)] = labels[i].firstChild.data;
+                  if ((labels[i].parentNode.getAttribute("appearance") !== "label")) //label type questions don't ever have answers
+                  {
+                    quest_dict[labels[i].parentNode.getAttribute("ref").split('/').slice(-1)] = labels[i].firstChild.data;
+                  }
                 } catch (e) { }
               } 
             }
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 01a55e10..82765101 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -122,7 +122,8 @@
     "        labels = doc.getElementsByTagName(\"label\") \n",
     "        for label in labels:\n",
     "            if(bool(label.parentNode.getAttribute(\"ref\"))):\n",
-    "                quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
+    "                if label.parentNode.getAttribute(\"appearance\") != \"label\": #label appearance = not a question?\n",
+    "                    quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
     "            \n",
     "            if label.parentNode.nodeName == 'item':\n",
     "                if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",

From 3a3533dd5ba2bf55a944d859d825c01cef08f75e Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 25 Mar 2024 17:07:15 -0600
Subject: [PATCH 10/70] clean up some print statements

---
 viz_scripts/survey_responses.ipynb | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 82765101..d33f3237 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -193,7 +193,6 @@
    "outputs": [],
    "source": [
     "#list of all surveys that are not a \"UserProfileSurvey\"\n",
-    "print(survey_info['surveys'])\n",
     "survey_list = list(survey_info['surveys'].keys())\n",
     "survey_list.remove('UserProfileSurvey')\n",
     "sheet_list = get_sheet_links(survey_list)\n",
@@ -235,8 +234,8 @@
    "source": [
     "#create translation dictionaries\n",
     "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
-    "print(opt_dict)\n",
-    "print(quest_dict)\n",
+    "print(\"Questions dictionary:\\n\",quest_dict)\n",
+    "print(\"Options dictionary:\\n\", opt_dict)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",
@@ -257,7 +256,6 @@
     "print(df_responses.head())\n",
     "for col in df_responses.columns:\n",
     "    filename = col + file_suffix\n",
-    "    print(filename)\n",
     "    \n",
     "    quest_frame = df_responses.copy()\n",
     "    quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",

From f832c8387d18996dd82f57aa51e83495d0f6d713 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 25 Mar 2024 17:11:35 -0600
Subject: [PATCH 11/70] drop columns that don't represent questions

all question names begin with a capital letter, so we can safely drop all columns that begin lowercase

had to change from a hardcoded list because different survey data had different extra columns, and we need this to be as general as possible to prevent extra maintanence in the future
---
 viz_scripts/survey_responses.ipynb | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index d33f3237..c96f2c70 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -154,9 +154,13 @@
     "\n",
     "    df = df.rename(columns=rename_nests)\n",
     "    \n",
-    "    #drop the non-question columns, should leave behind all the questions\n",
-    "    #need to do this better, won't always be the same\n",
-    "    df = df.drop(columns = ['end', 'start', 'attrid', 'attrxmlns:orx', 'attrxmlns:orx', 'attrxmlns:jr', 'instanceID', 'deprecatedID'])\n",
+    "    #drop the non-question columns, questions start with capital letters\n",
+    "    drop_cols = []\n",
+    "    for col_name in df.columns:\n",
+    "        if not col_name[0].isupper():\n",
+    "            drop_cols.append(col_name)\n",
+    "            \n",
+    "    df = df.drop(columns = drop_cols)\n",
     "\n",
     "    return df\n",
     "\n",

From dc3bab84333d9267e3ce781142db2dd8c376359f Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 26 Mar 2024 11:23:53 -0600
Subject: [PATCH 12/70] fix failed charts and alt text

now properly showing the debug df and the alt text on all charts - including the ones that don't have enough data
---
 viz_scripts/survey_responses.ipynb | 43 ++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index c96f2c70..f4348e2c 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -226,7 +226,18 @@
     "survey_trips.head()\n",
     "\n",
     "#get quality text\n",
-    "qual_text = get_text(survey_trips)"
+    "qual_text = get_text(survey_trips)\n",
+    "\n",
+    "#debug_df\n",
+    "debug_df = pd.DataFrame.from_dict({\n",
+    "        \"year\": year,\n",
+    "        \"month\": month,\n",
+    "        \"Registered_participants\": len(scaffolding.get_participant_uuids(program, include_test_users)),\n",
+    "        \"Participants_with_at_least_one_trip\": scaffolding.unique_users(all_confirmed_trips),\n",
+    "        \"Participant_with_at_least_one_labeled_trip\": scaffolding.unique_users(survey_trips),\n",
+    "        \"Trips_with_at_least_one_label\": len(survey_trips)\n",
+    "        },\n",
+    "    orient='index', columns=[\"value\"])"
    ]
   },
   {
@@ -256,21 +267,25 @@
    },
    "outputs": [],
    "source": [
-    "#create one plot per question\n",
-    "print(df_responses.head())\n",
-    "for col in df_responses.columns:\n",
+    "#create one plot per question in the survey\n",
+    "for col in quest_dict.keys():\n",
     "    filename = col + file_suffix\n",
+    "    plot_title_no_quality = quest_dict[col]\n",
+    "\n",
+    "    try:\n",
+    "        plot_title = plot_title_no_quality+'\\n'+qual_text\n",
+    "        quest_frame = df_responses.copy()\n",
+    "        quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
+    "        labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
+    "        labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
+    "        values = quest_frame[col].value_counts(dropna=True).tolist()\n",
+    "\n",
+    "        pie_chart_purpose(plot_title, labels, values, filename)\n",
+    "        alt_text = store_alt_text_pie(pd.DataFrame(values, labels), filename, plot_title)\n",
     "    \n",
-    "    quest_frame = df_responses.copy()\n",
-    "    quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
-    "    labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
-    "    labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
-    "    values = quest_frame[col].value_counts(dropna=True).tolist()\n",
-    "     \n",
-    "    #if other is 0 don't display it :)\n",
-    "    #will only show questions in the current survey (not older versions) and that have at least 1 response\n",
-    "    if col in quest_dict and len(quest_frame[col]) != 0:\n",
-    "        pie_chart_purpose(quest_dict[col]+'\\n'+qual_text, labels, values, filename)"
+    "    except:\n",
+    "        generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
+    "        alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
    ]
   }
  ],

From ea0c660b1509968d9297605bc6a67d6e70c76f3d Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 26 Mar 2024 11:37:37 -0600
Subject: [PATCH 13/70] no need to drop columns

before, I was dropping all columns that were not survey questions

no longer needed because we are generating charts directly from the list of questions (not the data) - this also matches better with the frontend!
---
 viz_scripts/survey_responses.ipynb | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index f4348e2c..c8048e47 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -7,8 +7,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = 2024\n",
-    "month = 3\n",
+    "year = None\n",
+    "month = None\n",
     "program = \"default\"\n",
     "study_type = \"study\"\n",
     "mode_of_interest = None\n",
@@ -153,14 +153,6 @@
     "        rename_nests[col] = col.split('.')[-1]\n",
     "\n",
     "    df = df.rename(columns=rename_nests)\n",
-    "    \n",
-    "    #drop the non-question columns, questions start with capital letters\n",
-    "    drop_cols = []\n",
-    "    for col_name in df.columns:\n",
-    "        if not col_name[0].isupper():\n",
-    "            drop_cols.append(col_name)\n",
-    "            \n",
-    "    df = df.drop(columns = drop_cols)\n",
     "\n",
     "    return df\n",
     "\n",

From 157ad1592c1c878f4933eaa4074bdd1fc72ac4f7 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 26 Mar 2024 11:47:59 -0600
Subject: [PATCH 14/70] sensed instead of labeled trips by mode

---
 frontend/metrics_study_surveys.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/metrics_study_surveys.html b/frontend/metrics_study_surveys.html
index 322c81e4..be8569c4 100644
--- a/frontend/metrics_study_surveys.html
+++ b/frontend/metrics_study_surveys.html
@@ -1,4 +1,4 @@
 <!-- htmnl options should be 1 per chart question -->
 <!-- <option value="question key" data-sizex="4" data-sizey="4">translated question</option> -->
 
-<option value="ntrips_mode_confirm" data-sizex="4" data-sizey="4">Number of trips</option>
\ No newline at end of file
+<option value="ntrips_sensed_mode" data-sizex="4" data-sizey="4">Number of trips (sensed)</option>
\ No newline at end of file

From e6c7fe67f7a7f7715a0087395b827a5e1693fdd9 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Fri, 29 Mar 2024 11:35:28 -0600
Subject: [PATCH 15/70] pull out "input" type questions

Filter off the "input" type questions -- such as "other, please specify" so that we could choose the best way to display them later
---
 viz_scripts/survey_responses.ipynb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index c8048e47..d357529a 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -122,7 +122,9 @@
     "        labels = doc.getElementsByTagName(\"label\") \n",
     "        for label in labels:\n",
     "            if(bool(label.parentNode.getAttribute(\"ref\"))):\n",
-    "                if label.parentNode.getAttribute(\"appearance\") != \"label\": #label appearance = not a question?\n",
+    "                #label appearance = not a question?\n",
+    "                #nodeName input -- to word cloud as a later addition?\n",
+    "                if label.parentNode.getAttribute(\"appearance\") != \"label\" && label.parentNode.nodeName != \"input\": \n",
     "                    quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
     "            \n",
     "            if label.parentNode.nodeName == 'item':\n",

From 8ac86b7feb5b51986d3333159e1afe2d9f4d88da Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 2 Apr 2024 17:22:42 -0600
Subject: [PATCH 16/70] draft "trips presented" changes

In order to properly generate the quality text, we need to know how many trips a particular survey was presented for

To accomplish this, we can use the strings in the config that determine what survey to show the user (or a python version thereof)

We need to use the composite trips to have the sections, and perform some other data manipulations in order to have this work properly

Next is finding all of the responses that are actually present for each survey ... and associating that with each question ...
---
 viz_scripts/scaffolding.py         |   7 ++
 viz_scripts/survey_responses.ipynb | 156 ++++++++++++++++++++++-------
 2 files changed, 126 insertions(+), 37 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 6f21989c..e7234d2d 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -56,6 +56,13 @@ def load_all_confirmed_trips(tq):
     disp.display(all_ct.head())
     return all_ct
 
+def load_all_composite_trips(tq):
+    agg = esta.TimeSeries.get_aggregate_time_series()
+    all_ct = agg.get_data_df("analysis/composite_trip", tq)
+    print("Loaded all composite trips of length %s" % len(all_ct))
+    disp.display(all_ct.head())
+    return all_ct
+
 def load_all_participant_trips(program, tq, load_test_users):
     participant_list = get_participant_uuids(program, load_test_users)
     all_ct = load_all_confirmed_trips(tq)
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index d357529a..fb2362e6 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -26,40 +26,60 @@
    "source": [
     "#probably going to end up passing this in\n",
     "survey_info =  {\n",
-    "      \"surveys\": {\n",
-    "        \"UserProfileSurvey\": {\n",
-    "          \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-onboarding-survey-v4.xml\",\n",
-    "          \"version\": 1.3,\n",
-    "          \"compatibleWith\": 1,\n",
-    "          \"dataKey\": \"manual/demographic_survey\",\n",
-    "          \"labelTemplate\": {\n",
-    "            \"en\": \"Answered\",\n",
-    "            \"es\": \"Contestada\"\n",
-    "          }\n",
-    "        },\n",
-    "        \"TripConfirmSurvey\": {\n",
-    "            \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/washingtoncommons/washingtoncommons-trip-survey-v2.json\",\n",
-    "            \"version\": 1.2,\n",
-    "            \"compatibleWith\": 1,\n",
-    "            \"dataKey\": \"manual/trip_user_input\",\n",
-    "            \"labelVars\": {\n",
-    "              \"modes\": {\n",
-    "                \"key\": \"What_was_the_main_pu_f_this_trip_trip_leg\",\n",
-    "                \"type\": \"length\"\n",
-    "              },\n",
-    "              \"purposes\": {\n",
-    "                \"key\": \"_2_What_was_the_mode_of_transp\",\n",
-    "                \"type\": \"length\"\n",
-    "              }\n",
-    "            },\n",
-    "            \"labelTemplate\": {\n",
-    "              \"en\": \"{ purposes, plural, =0 {No purposes} one {1 purpose} other {# purposes} }, { modes, plural, =0 {No modes} one {1 mode} other {# modes} }\",\n",
-    "              \"es\": \"{ purposes, plural, =0 {No propósitos} one {1 propósito} other {# propósitos} }, { modes, plural, =0 {No modos} one {1 modo} other {# modos} }\"\n",
-    "            }\n",
-    "          }\n",
+    "    \"surveys\": {\n",
+    "      \"UserProfileSurvey\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-onboarding-v0.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/demographic_survey\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      },\n",
+    "      \"DfcEvReturnTrip\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-return-trip-v0.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      },\n",
+    "      \"DfcEvRoamingTrip\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-roaming-trip-v0.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
     "      },\n",
-    "      \"trip-labels\": \"ENKETO\"\n",
-    "    }"
+    "      \"DfcGasTrip\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-gas-trip-v0.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      }\n",
+    "    },\n",
+    "    \"buttons\": {\n",
+    "      \"trip-label\": [\n",
+    "        {\n",
+    "          \"surveyName\": \"DfcGasTrip\",\n",
+    "          \"not-filled-in-label\": { \"en\": \"Gas Car Survey\" },\n",
+    "          \"showsIf\": \"sections[0].sensed_mode_str == 'CAR'\",\n",
+    "          \"showedIf\": \"sections[0]['sensed_mode_str'] == 'CAR'\"\n",
+    "        },\n",
+    "        {\n",
+    "          \"surveyName\": \"DfcEvRoamingTrip\",\n",
+    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
+    "          \"showsIf\": \"sections[0].sensed_mode_str != 'CAR' && !pointIsWithinBounds(end_loc.coordinates, [[-105.153, 39.745], [-105.150, 39.743]])\",\n",
+    "          \"showedIf\": \"sections[0]['sensed_mode_str'] != 'CAR' and not pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
+    "        },\n",
+    "        {\n",
+    "          \"surveyName\": \"DfcEvReturnTrip\",\n",
+    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
+    "          \"showsIf\": \"sections[0].sensed_mode_str != 'CAR' && pointIsWithinBounds(end_loc.coordinates, [[-105.153, 39.745], [-105.150, 39.743]])\",\n",
+    "          \"showedIf\": \"sections[0]['sensed_mode_str'] != 'CAR' and pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
+    "        }\n",
+    "      ]\n",
+    "    },\n",
+    "    \"trip-labels\": \"ENKETO\"\n",
+    "  }"
    ]
   },
   {
@@ -124,7 +144,7 @@
     "            if(bool(label.parentNode.getAttribute(\"ref\"))):\n",
     "                #label appearance = not a question?\n",
     "                #nodeName input -- to word cloud as a later addition?\n",
-    "                if label.parentNode.getAttribute(\"appearance\") != \"label\" && label.parentNode.nodeName != \"input\": \n",
+    "                if label.parentNode.getAttribute(\"appearance\") != \"label\" and label.parentNode.nodeName != \"input\": \n",
     "                    quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
     "            \n",
     "            if label.parentNode.nodeName == 'item':\n",
@@ -201,13 +221,31 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "854b3070",
+   "id": "79608979",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#same as phone code\n",
+    "def pointIsWithinBounds(pt, bounds): \n",
+    "    #pt's lon must be east of, or greater than, NW's lon; and west of, or less than, SE's lon\n",
+    "    lonInRange = pt[0] > bounds[0][0] and pt[0] < bounds[1][0];\n",
+    "    #pt's lat must be south of, or less than, NW's lat; and north of, or greater than, SE's lat\n",
+    "    latInRange = pt[1] < bounds[0][1] and pt[1] > bounds[1][1];\n",
+    "    \n",
+    "    return latInRange and lonInRange;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5cd182b7",
    "metadata": {},
    "outputs": [],
    "source": [
     "#load all of the confirmed trips\n",
     "tq = scaffolding.get_time_query(year, month)\n",
     "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
+    "\n",
     "#remove blank inputs\n",
     "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
     "\n",
@@ -217,7 +255,6 @@
     "\n",
     "#gather the cols needed for charts and text\n",
     "survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
-    "survey_trips.head()\n",
     "\n",
     "#get quality text\n",
     "qual_text = get_text(survey_trips)\n",
@@ -234,6 +271,52 @@
     "    orient='index', columns=[\"value\"])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c739be2d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#how we filter the rows\n",
+    "def filter_fn(row):\n",
+    "    globals = {'pointIsWithinBounds': pointIsWithinBounds}\n",
+    "    locals = {'end_loc': row['end_loc'], 'start_loc': row['start_loc'], 'sections': row['sections']}\n",
+    "\n",
+    "    return eval(eval_string, globals, locals)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8b887f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#create the total dfs\n",
+    "tq = scaffolding.get_time_query(year, month)\n",
+    "all_composite_trips = scaffolding.load_all_composite_trips(tq)\n",
+    "\n",
+    "# transform data to meet a data format requirement\n",
+    "all_composite_trips['sections'] = all_composite_trips['sections'].apply(lambda x: [x[0]['data']] if len(x) > 0 else x)\n",
+    "\n",
+    "#only want trips with sections\n",
+    "all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
+    "\n",
+    "#gather the eval strings\n",
+    "eval_strings = {}\n",
+    "for item in survey_info['buttons']['trip-label']:\n",
+    "    eval_strings[item['surveyName']] = item['showedIf']\n",
+    "\n",
+    "#use eval strings to build \"total\" dataframes\n",
+    "total_dfs = {}\n",
+    "for name, eval_string in eval_strings.items():\n",
+    "    meets_bools = all_composite_trips.apply(filter_fn, axis=1)\n",
+    "    df = all_composite_trips[meets_bools]\n",
+    "    \n",
+    "    total_dfs[name] = df"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -248,7 +331,6 @@
     "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",
-    "\n",
     "file_suffix = scaffolding.get_file_suffix(year, month, program)"
    ]
   },

From 5fe10e1888ac084803240baf00d0794bbd286be6 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 10:41:00 -0600
Subject: [PATCH 17/70] increase accuracy of quality text

Each survey will have a different "denominator" of quality text which should be presented to the user to represent what percentage of total eligible trips have survey responses

using "showsIfPy" as a backup for strings that are too complicated, changing access notation for dictionaries in https://github.com/e-mission/nrel-openpath-deploy-configs/pull/88, and replacing && and ! allows us to evaluate the strings in python

needed to track the survey names in order to know which denominator to use, so turned the list of urls into a dictionary instead of a list, and turned the dictionary of questions into a dictionary of dictionaries, one for each question
---
 viz_scripts/survey_responses.ipynb | 186 ++++++++++++++---------------
 1 file changed, 91 insertions(+), 95 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index fb2362e6..110beb13 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7c7fbf97",
+   "id": "ce0dcc9f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,16 +14,8 @@
     "mode_of_interest = None\n",
     "include_test_users = False\n",
     "dynamic_labels = {  }\n",
-    "use_imperial = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ce0dcc9f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "use_imperial = False\n",
+    "\n",
     "#probably going to end up passing this in\n",
     "survey_info =  {\n",
     "    \"surveys\": {\n",
@@ -61,20 +53,17 @@
     "        {\n",
     "          \"surveyName\": \"DfcGasTrip\",\n",
     "          \"not-filled-in-label\": { \"en\": \"Gas Car Survey\" },\n",
-    "          \"showsIf\": \"sections[0].sensed_mode_str == 'CAR'\",\n",
-    "          \"showedIf\": \"sections[0]['sensed_mode_str'] == 'CAR'\"\n",
+    "          \"showsIf\": \"sections[0]['sensed_mode_str'] == 'CAR'\"\n",
     "        },\n",
     "        {\n",
     "          \"surveyName\": \"DfcEvRoamingTrip\",\n",
     "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
-    "          \"showsIf\": \"sections[0].sensed_mode_str != 'CAR' && !pointIsWithinBounds(end_loc.coordinates, [[-105.153, 39.745], [-105.150, 39.743]])\",\n",
-    "          \"showedIf\": \"sections[0]['sensed_mode_str'] != 'CAR' and not pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
+    "          \"showsIf\": \"sections[0]['sensed_mode_str'] != 'CAR' && !pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
     "        },\n",
     "        {\n",
     "          \"surveyName\": \"DfcEvReturnTrip\",\n",
     "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
-    "          \"showsIf\": \"sections[0].sensed_mode_str != 'CAR' && pointIsWithinBounds(end_loc.coordinates, [[-105.153, 39.745], [-105.150, 39.743]])\",\n",
-    "          \"showedIf\": \"sections[0]['sensed_mode_str'] != 'CAR' and pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
+    "          \"showsIf\": \"sections[0]['sensed_mode_str'] != 'CAR' && pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
     "        }\n",
     "      ]\n",
     "    },\n",
@@ -97,6 +86,7 @@
     "\n",
     "from plots import *\n",
     "import scaffolding\n",
+    "import re\n",
     "\n",
     "sns.set_style(\"whitegrid\")\n",
     "sns.set()\n",
@@ -109,7 +99,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b18bc854",
+   "id": "c739be2d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -117,7 +107,7 @@
     "#output: list of links to the sheets where questions/answers are\n",
     "#will run n surveys times\n",
     "def get_sheet_links(survey_list):\n",
-    "    sheet_list = []\n",
+    "    sheet_list = {}\n",
     "    for name in survey_list:\n",
     "        form_path = survey_info['surveys'][name]['formPath']\n",
     "        #THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE\n",
@@ -125,19 +115,24 @@
     "        l_path[-1] = 'xml'\n",
     "        s = '.'\n",
     "        sheet_path = s.join(l_path)\n",
-    "        sheet_list.append(sheet_path)\n",
+    "        sheet_list[name] = sheet_path\n",
+    "\n",
     "    return sheet_list\n",
     "\n",
     "#input: list of urls for the survey xlsx files\n",
-    "#output: two dictionaries to translate the ?s/ans\n",
+    "#output: two dictionaries to translate the ?s/ans (?s dict is nested per survey)\n",
     "#will run n surveys times\n",
-    "def build_dictionaries(url_list):\n",
+    "def build_dictionaries(sheet_list):\n",
     "    opt_dict = {}\n",
     "    quest_dict = {}\n",
     "    \n",
-    "    for url in sheet_list:\n",
+    "    for survey_name in sheet_list:\n",
+    "        url = sheet_list[survey_name]\n",
     "        result = urllib.request.urlopen(url)\n",
     "        doc = minidom.parse(result) \n",
+    "        \n",
+    "        #nested dictionaries to keep surveys grouped\n",
+    "        survey_questions = {}\n",
     "\n",
     "        labels = doc.getElementsByTagName(\"label\") \n",
     "        for label in labels:\n",
@@ -145,7 +140,7 @@
     "                #label appearance = not a question?\n",
     "                #nodeName input -- to word cloud as a later addition?\n",
     "                if label.parentNode.getAttribute(\"appearance\") != \"label\" and label.parentNode.nodeName != \"input\": \n",
-    "                    quest_dict[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
+    "                    survey_questions[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
     "            \n",
     "            if label.parentNode.nodeName == 'item':\n",
     "                if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
@@ -154,7 +149,9 @@
     "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.firstChild.data  \n",
     "                elif label.parentNode.getElementsByTagName(\"name\"):\n",
     "                    opt_dict[label.parentNode.getElementsByTagName(\"name\")[0].firstChild.data] = label.firstChild.data  \n",
-    "                    \n",
+    "        \n",
+    "        quest_dict[survey_name] = survey_questions\n",
+    "        \n",
     "    return opt_dict, quest_dict\n",
     "\n",
     "\n",
@@ -167,6 +164,7 @@
     "    for i in range(len(df)):\n",
     "        data_key = list(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]\n",
     "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n",
+    "        row['user_id'] = df.loc[i].user_id\n",
     "        rows.append(row)\n",
     "    df = pd.concat(rows)\n",
     "    \n",
@@ -192,21 +190,34 @@
     "    \n",
     "    return labels\n",
     "\n",
-    "#input: all of the responses to a single survey\n",
-    "#output: text with num responses and users\n",
-    "#THIS SHOULD GO IN SCAFFOLDING AND INCLUDE WHOLE POOL AT SOME POINT\n",
-    "def get_text(responses):\n",
-    "    num_resp = len(responses)\n",
-    "    num_users = responses.user_id.nunique()\n",
-    "    quality_text = f\"Based on {num_resp} responses from {num_users} users\"\n",
-    "    print(quality_text)\n",
-    "    return quality_text"
+    "#input: pt - set of coordinates in 1d array, bounds - 2 points in 2-d array\n",
+    "#output: boolean if point is within the bounding box\n",
+    "#this function is a pythonized version of the one in the phone code - good candidate for common lib\n",
+    "def pointIsWithinBounds(pt, bounds): \n",
+    "    #pt's lon must be east of, or greater than, NW's lon; and west of, or less than, SE's lon\n",
+    "    lonInRange = pt[0] > bounds[0][0] and pt[0] < bounds[1][0];\n",
+    "    #pt's lat must be south of, or less than, NW's lat; and north of, or greater than, SE's lat\n",
+    "    latInRange = pt[1] < bounds[0][1] and pt[1] > bounds[1][1];\n",
+    "    \n",
+    "    return latInRange and lonInRange;\n",
+    "\n",
+    "#input: single row of dataframe\n",
+    "#output: T/F evaluation of the conditon\n",
+    "#eval_string is re-defined for each survey\n",
+    "#TODO:\n",
+    "    #parameterize the eval_string\n",
+    "    #flatten row into dict instead of pulling attr\n",
+    "def filter_fn(row):\n",
+    "    globals = {'pointIsWithinBounds': pointIsWithinBounds}\n",
+    "    locals = {'end_loc': row['end_loc'], 'start_loc': row['start_loc'], 'sections': row['sections']}\n",
+    "\n",
+    "    return eval(eval_string, globals, locals)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3b355efd",
+   "id": "5319980e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -221,33 +232,16 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "79608979",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#same as phone code\n",
-    "def pointIsWithinBounds(pt, bounds): \n",
-    "    #pt's lon must be east of, or greater than, NW's lon; and west of, or less than, SE's lon\n",
-    "    lonInRange = pt[0] > bounds[0][0] and pt[0] < bounds[1][0];\n",
-    "    #pt's lat must be south of, or less than, NW's lat; and north of, or greater than, SE's lat\n",
-    "    latInRange = pt[1] < bounds[0][1] and pt[1] > bounds[1][1];\n",
-    "    \n",
-    "    return latInRange and lonInRange;"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5cd182b7",
+   "id": "3b355efd",
    "metadata": {},
    "outputs": [],
    "source": [
-    "#load all of the confirmed trips\n",
+    "#load all of the composite trips - require sections\n",
     "tq = scaffolding.get_time_query(year, month)\n",
-    "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
+    "all_composite_trips = scaffolding.load_all_composite_trips(tq)\n",
     "\n",
     "#remove blank inputs\n",
-    "survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
+    "survey_trips = all_composite_trips[all_composite_trips['user_input'] != {}]\n",
     "\n",
     "#survey counts df\n",
     "survey_trips = survey_trips.reset_index()\n",
@@ -256,36 +250,18 @@
     "#gather the cols needed for charts and text\n",
     "survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
     "\n",
-    "#get quality text\n",
-    "qual_text = get_text(survey_trips)\n",
-    "\n",
     "#debug_df\n",
     "debug_df = pd.DataFrame.from_dict({\n",
     "        \"year\": year,\n",
     "        \"month\": month,\n",
     "        \"Registered_participants\": len(scaffolding.get_participant_uuids(program, include_test_users)),\n",
-    "        \"Participants_with_at_least_one_trip\": scaffolding.unique_users(all_confirmed_trips),\n",
+    "        \"Participants_with_at_least_one_trip\": scaffolding.unique_users(all_composite_trips),\n",
     "        \"Participant_with_at_least_one_labeled_trip\": scaffolding.unique_users(survey_trips),\n",
     "        \"Trips_with_at_least_one_label\": len(survey_trips)\n",
     "        },\n",
     "    orient='index', columns=[\"value\"])"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c739be2d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#how we filter the rows\n",
-    "def filter_fn(row):\n",
-    "    globals = {'pointIsWithinBounds': pointIsWithinBounds}\n",
-    "    locals = {'end_loc': row['end_loc'], 'start_loc': row['start_loc'], 'sections': row['sections']}\n",
-    "\n",
-    "    return eval(eval_string, globals, locals)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -294,9 +270,6 @@
    "outputs": [],
    "source": [
     "#create the total dfs\n",
-    "tq = scaffolding.get_time_query(year, month)\n",
-    "all_composite_trips = scaffolding.load_all_composite_trips(tq)\n",
-    "\n",
     "# transform data to meet a data format requirement\n",
     "all_composite_trips['sections'] = all_composite_trips['sections'].apply(lambda x: [x[0]['data']] if len(x) > 0 else x)\n",
     "\n",
@@ -306,7 +279,14 @@
     "#gather the eval strings\n",
     "eval_strings = {}\n",
     "for item in survey_info['buttons']['trip-label']:\n",
-    "    eval_strings[item['surveyName']] = item['showedIf']\n",
+    "    if 'showsIfPy' in item.keys():\n",
+    "        eval_strings[item['surveyName']] = item['showsIfPy']\n",
+    "    else:\n",
+    "        raw_string = item['showsIf']\n",
+    "        expression = raw_string.replace('&&', 'and')\n",
+    "        expression = expression.replace('||', 'or')\n",
+    "        expression = re.sub(r\"!(?!=)\", \"not \", expression)\n",
+    "        eval_strings[item['surveyName']] = expression\n",
     "\n",
     "#use eval strings to build \"total\" dataframes\n",
     "total_dfs = {}\n",
@@ -344,25 +324,41 @@
    "outputs": [],
    "source": [
     "#create one plot per question in the survey\n",
-    "for col in quest_dict.keys():\n",
-    "    filename = col + file_suffix\n",
-    "    plot_title_no_quality = quest_dict[col]\n",
+    "for survey_name in quest_dict.keys():\n",
+    "    print(\"Charts for:\", survey_name)\n",
     "\n",
-    "    try:\n",
-    "        plot_title = plot_title_no_quality+'\\n'+qual_text\n",
-    "        quest_frame = df_responses.copy()\n",
-    "        quest_frame = quest_frame[quest_frame[col] != \"\"] #could have blank responses for non-mandatory ?s\n",
-    "        labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
-    "        labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
-    "        values = quest_frame[col].value_counts(dropna=True).tolist()\n",
+    "    for col in quest_dict[survey_name].keys():\n",
+    "        \n",
+    "        filename = col + file_suffix\n",
+    "        plot_title_no_quality = quest_dict[survey_name][col]\n",
     "\n",
-    "        pie_chart_purpose(plot_title, labels, values, filename)\n",
-    "        alt_text = store_alt_text_pie(pd.DataFrame(values, labels), filename, plot_title)\n",
-    "    \n",
-    "    except:\n",
-    "        generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
-    "        alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
+    "        try:\n",
+    "            quest_frame = df_responses.copy()\n",
+    "            #could have blank responses for non-mandatory ?s\n",
+    "            quest_frame = quest_frame[quest_frame[col] != \"\"]\n",
+    "            qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
+    "            \n",
+    "            plot_title = plot_title_no_quality+'\\n'+qual_text\n",
+    "            \n",
+    "            labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
+    "            labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
+    "            values = quest_frame[col].value_counts(dropna=True).tolist()\n",
+    "\n",
+    "            pie_chart_purpose(plot_title, labels, values, filename)\n",
+    "            alt_text = store_alt_text_pie(pd.DataFrame(values, labels), filename, plot_title)\n",
+    "\n",
+    "        except:\n",
+    "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
+    "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbe7f86d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 7d8a114bb0be2d44662178596bc2f9186370c8bf Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 10:55:54 -0600
Subject: [PATCH 18/70] bypass evals for non-conditional surveys

if there is only one survey, it isn't conditional, so the denominator is all composite trips
---
 viz_scripts/survey_responses.ipynb | 42 +++++++++++++++++-------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 110beb13..94593486 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -276,25 +276,31 @@
     "#only want trips with sections\n",
     "all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
     "\n",
-    "#gather the eval strings\n",
-    "eval_strings = {}\n",
-    "for item in survey_info['buttons']['trip-label']:\n",
-    "    if 'showsIfPy' in item.keys():\n",
-    "        eval_strings[item['surveyName']] = item['showsIfPy']\n",
-    "    else:\n",
-    "        raw_string = item['showsIf']\n",
-    "        expression = raw_string.replace('&&', 'and')\n",
-    "        expression = expression.replace('||', 'or')\n",
-    "        expression = re.sub(r\"!(?!=)\", \"not \", expression)\n",
-    "        eval_strings[item['surveyName']] = expression\n",
-    "\n",
-    "#use eval strings to build \"total\" dataframes\n",
     "total_dfs = {}\n",
-    "for name, eval_string in eval_strings.items():\n",
-    "    meets_bools = all_composite_trips.apply(filter_fn, axis=1)\n",
-    "    df = all_composite_trips[meets_bools]\n",
-    "    \n",
-    "    total_dfs[name] = df"
+    "#for conditional surveys!\n",
+    "if 'buttons' in survey_info.keys():\n",
+    "    #gather the eval strings\n",
+    "    eval_strings = {}\n",
+    "    for item in survey_info['buttons']['trip-label']:\n",
+    "        if 'showsIfPy' in item.keys():\n",
+    "            eval_strings[item['surveyName']] = item['showsIfPy']\n",
+    "        else:\n",
+    "            raw_string = item['showsIf']\n",
+    "            expression = raw_string.replace('&&', 'and')\n",
+    "            expression = expression.replace('||', 'or')\n",
+    "            expression = re.sub(r\"!(?!=)\", \"not \", expression)\n",
+    "            eval_strings[item['surveyName']] = expression\n",
+    "\n",
+    "    #use eval strings to build \"total\" dataframes\n",
+    "    for name, eval_string in eval_strings.items():\n",
+    "        meets_bools = all_composite_trips.apply(filter_fn, axis=1)\n",
+    "        df = all_composite_trips[meets_bools]\n",
+    "\n",
+    "        total_dfs[name] = df\n",
+    "        \n",
+    "else:\n",
+    "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
+    "    total_dfs[survey_name] = all_composite_trips"
    ]
   },
   {

From 8a2c2f445875774e15c1993f3f1a054bca41b24e Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 11:05:43 -0600
Subject: [PATCH 19/70] flatten entire row instead of pulling values

when preparing to evaluate the string, I had pulled individual values as a workaround

using row.to_dict() instead is more generalizable
---
 viz_scripts/survey_responses.ipynb | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 94593486..9a6c5fa5 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -206,12 +206,11 @@
     "#eval_string is re-defined for each survey\n",
     "#TODO:\n",
     "    #parameterize the eval_string\n",
-    "    #flatten row into dict instead of pulling attr\n",
     "def filter_fn(row):\n",
-    "    globals = {'pointIsWithinBounds': pointIsWithinBounds}\n",
-    "    locals = {'end_loc': row['end_loc'], 'start_loc': row['start_loc'], 'sections': row['sections']}\n",
+    "    funcs = {'pointIsWithinBounds': pointIsWithinBounds}\n",
+    "    row_vars = row.to_dict()\n",
     "\n",
-    "    return eval(eval_string, globals, locals)"
+    "    return eval(eval_string, funcs, row_vars)"
    ]
   },
   {

From a04a316d55445dbb63d9415ae8cbb7d306effbf3 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 11:14:07 -0600
Subject: [PATCH 20/70] add eval string as a parameter to filter fncn

editing this filtering process to explicitly pass the eval string rather than rely on the fact the function shares scope with where it is called - for clarity
---
 viz_scripts/survey_responses.ipynb | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 9a6c5fa5..982c0aa5 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -204,9 +204,7 @@
     "#input: single row of dataframe\n",
     "#output: T/F evaluation of the conditon\n",
     "#eval_string is re-defined for each survey\n",
-    "#TODO:\n",
-    "    #parameterize the eval_string\n",
-    "def filter_fn(row):\n",
+    "def filter_fn(row, eval_string):\n",
     "    funcs = {'pointIsWithinBounds': pointIsWithinBounds}\n",
     "    row_vars = row.to_dict()\n",
     "\n",
@@ -292,7 +290,7 @@
     "\n",
     "    #use eval strings to build \"total\" dataframes\n",
     "    for name, eval_string in eval_strings.items():\n",
-    "        meets_bools = all_composite_trips.apply(filter_fn, axis=1)\n",
+    "        meets_bools = all_composite_trips.apply(lambda x: filter_fn(x, eval_string), axis=1)\n",
     "        df = all_composite_trips[meets_bools]\n",
     "\n",
     "        total_dfs[name] = df\n",

From af3dfa896ec081d62c42c4b21641347c0eedd9c9 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 11:48:53 -0600
Subject: [PATCH 21/70] add the survey info parameter

---
 viz_scripts/bin/generate_plots.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/bin/generate_plots.py b/viz_scripts/bin/generate_plots.py
index 0bec985b..4facaf50 100644
--- a/viz_scripts/bin/generate_plots.py
+++ b/viz_scripts/bin/generate_plots.py
@@ -90,7 +90,8 @@ def compute_for_date(month, year):
         include_test_users=dynamic_config.get('metrics', {}).get('include_test_users', False),
         dynamic_labels = dynamic_labels,
         use_imperial = dynamic_config.get('display_config', {}).get('use_imperial', True),
-        sensed_algo_prefix=dynamic_config.get('metrics', {}).get('sensed_algo_prefix', "cleaned"))
+        sensed_algo_prefix=dynamic_config.get('metrics', {}).get('sensed_algo_prefix', "cleaned"),
+        survey_info = dynamic_config.get('survey_info', {}))
 
     print(f"Running at {arrow.get()} with params {params}")
 

From 399c7b3a51603fea859884d7a435c5dec3e1fc54 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 11:49:04 -0600
Subject: [PATCH 22/70] add new notebook to crontab

---
 viz_scripts/docker/crontab | 1 +
 1 file changed, 1 insertion(+)

diff --git a/viz_scripts/docker/crontab b/viz_scripts/docker/crontab
index 6e7cc552..2ffcec9b 100644
--- a/viz_scripts/docker/crontab
+++ b/viz_scripts/docker/crontab
@@ -5,5 +5,6 @@
 0 8 * * * python bin/generate_plots.py mode_specific_metrics.ipynb default >> /var/log/intake.stdinout 2>&1
 0 8 * * * python bin/generate_plots.py mode_specific_timeseries.ipynb default >> /var/log/intake.stdinout 2>&1
 0 8 * * * python bin/generate_plots.py energy_calculations.ipynb default >> /var/log/intake.stdinout 2>&1
+0 8 * * * python bin/generate_plots.py survey_responses.ipynb default >> /var/log/intake.stdinout 2>&1
 # For testing only
 # */5 * * * * python bin/generate_plots.py mode_purpose_share.ipynb default >> /var/log/intake.stdinout 2>&1

From 75d4fb6001ff020c161e27c106042de27bbcbb6f Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 11:59:28 -0600
Subject: [PATCH 23/70] deal with missing data

if there is no data - we catch and create empty dataframes
---
 viz_scripts/survey_responses.ipynb | 40 +++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 982c0aa5..5758d670 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -166,13 +166,17 @@
     "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n",
     "        row['user_id'] = df.loc[i].user_id\n",
     "        rows.append(row)\n",
-    "    df = pd.concat(rows)\n",
-    "    \n",
-    "    rename_nests = {}\n",
-    "    for col in df.columns:\n",
-    "        rename_nests[col] = col.split('.')[-1]\n",
+    "        \n",
+    "    if len(rows) > 0:\n",
+    "        df = pd.concat(rows)\n",
+    "\n",
+    "        rename_nests = {}\n",
+    "        for col in df.columns:\n",
+    "            rename_nests[col] = col.split('.')[-1]\n",
     "\n",
-    "    df = df.rename(columns=rename_nests)\n",
+    "        df = df.rename(columns=rename_nests)\n",
+    "    else:\n",
+    "        df = pd.DataFrame()\n",
     "\n",
     "    return df\n",
     "\n",
@@ -237,15 +241,24 @@
     "tq = scaffolding.get_time_query(year, month)\n",
     "all_composite_trips = scaffolding.load_all_composite_trips(tq)\n",
     "\n",
-    "#remove blank inputs\n",
-    "survey_trips = all_composite_trips[all_composite_trips['user_input'] != {}]\n",
+    "if len(all_composite_trips) > 0:\n",
+    "    #remove blank inputs\n",
+    "    survey_trips = all_composite_trips[all_composite_trips['user_input'] != {}]\n",
     "\n",
-    "#survey counts df\n",
-    "survey_trips = survey_trips.reset_index()\n",
-    "survey_trips['survey_name'] = survey_trips.user_input.apply(lambda sr: sr['trip_user_input']['data']['name'])\n",
+    "    #survey counts df\n",
+    "    survey_trips = survey_trips.reset_index()\n",
+    "    survey_trips['survey_name'] = survey_trips.user_input.apply(lambda sr: sr['trip_user_input']['data']['name'])\n",
     "\n",
-    "#gather the cols needed for charts and text\n",
-    "survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
+    "    #gather the cols needed for charts and text\n",
+    "    survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
+    "    \n",
+    "    # transform data to meet a data format requirement\n",
+    "    all_composite_trips['sections'] = all_composite_trips['sections'].apply(lambda x: [x[0]['data']] if len(x) > 0 else x)\n",
+    "\n",
+    "    #only want trips with sections\n",
+    "    all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
+    "else:\n",
+    "    survey_trips = pd.DataFrame()\n",
     "\n",
     "#debug_df\n",
     "debug_df = pd.DataFrame.from_dict({\n",
@@ -267,7 +280,6 @@
    "outputs": [],
    "source": [
     "#create the total dfs\n",
-    "# transform data to meet a data format requirement\n",
     "all_composite_trips['sections'] = all_composite_trips['sections'].apply(lambda x: [x[0]['data']] if len(x) > 0 else x)\n",
     "\n",
     "#only want trips with sections\n",

From 45026b7166cfdd678c95456f730d20b3d14c865e Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 12:00:00 -0600
Subject: [PATCH 24/70] clean up notebook

removed empty cell and changed "None" inputs to dummy values
---
 viz_scripts/survey_responses.ipynb | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 5758d670..1fa0ca39 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -7,11 +7,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = None\n",
-    "month = None\n",
+    "year = 2024\n",
+    "month = 4\n",
     "program = \"default\"\n",
     "study_type = \"study\"\n",
-    "mode_of_interest = None\n",
+    "mode_of_interest = 'e-bike'\n",
     "include_test_users = False\n",
     "dynamic_labels = {  }\n",
     "use_imperial = False\n",
@@ -366,14 +366,6 @@
     "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
     "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bbe7f86d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From 733e559b1411b500f724ec4c81c8d4931027df97 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 3 Apr 2024 12:26:02 -0600
Subject: [PATCH 25/70] remove old code

---
 viz_scripts/survey_responses.ipynb | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 1fa0ca39..2c42187e 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -280,11 +280,6 @@
    "outputs": [],
    "source": [
     "#create the total dfs\n",
-    "all_composite_trips['sections'] = all_composite_trips['sections'].apply(lambda x: [x[0]['data']] if len(x) > 0 else x)\n",
-    "\n",
-    "#only want trips with sections\n",
-    "all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
-    "\n",
     "total_dfs = {}\n",
     "#for conditional surveys!\n",
     "if 'buttons' in survey_info.keys():\n",

From 8a4869bfc00e98977df9a3179e9db46941f1268a Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Thu, 4 Apr 2024 15:23:00 -0600
Subject: [PATCH 26/70] no "input" type questions in html

removing these from the html in the same way I did in the notebook -- maybe we would add these back and a different chart type later
---
 frontend/index.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/index.html b/frontend/index.html
index 8bf81f1a..6c8dc2fb 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -374,7 +374,7 @@
             labels = doc.getElementsByTagName("label");
               for (i in labels) {
                 try {
-                  if ((labels[i].parentNode.getAttribute("appearance") !== "label")) //label type questions don't ever have answers
+                  if ((labels[i].parentNode.getAttribute("appearance") !== "label" && labels[i].parentNode.nodeName != "input")) //label type questions don't ever have answers
                   {
                     quest_dict[labels[i].parentNode.getAttribute("ref").split('/').slice(-1)] = labels[i].firstChild.data;
                   }

From df6fde3e2afb1cf9ca08b0997db7f21b06837ddc Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Fri, 5 Apr 2024 17:23:33 -0600
Subject: [PATCH 27/70] edits to quality text

polishing the way quality text is handled
---
 viz_scripts/survey_responses.ipynb | 50 +++++++++++++++++-------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 2c42187e..32c88031 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -211,8 +211,14 @@
     "def filter_fn(row, eval_string):\n",
     "    funcs = {'pointIsWithinBounds': pointIsWithinBounds}\n",
     "    row_vars = row.to_dict()\n",
+    "    \n",
+    "    try:\n",
+    "        result = eval(eval_string, funcs, row_vars)\n",
+    "    #if unable to evaluate row, assume not a match (handle old surveys!)\n",
+    "    except:\n",
+    "        result = False\n",
     "\n",
-    "    return eval(eval_string, funcs, row_vars)"
+    "    return result"
    ]
   },
   {
@@ -275,7 +281,24 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c8b887f3",
+   "id": "7b7001a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#create translation dictionaries\n",
+    "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
+    "print(\"Questions dictionary:\\n\",quest_dict)\n",
+    "print(\"Options dictionary:\\n\", opt_dict)\n",
+    "\n",
+    "#format survey trips into responses dataframe\n",
+    "df_responses = create_dataframe(survey_trips)\n",
+    "file_suffix = scaffolding.get_file_suffix(year, month, program)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93dbaa5e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -304,24 +327,11 @@
     "        \n",
     "else:\n",
     "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
-    "    total_dfs[survey_name] = all_composite_trips"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7b7001a9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#create translation dictionaries\n",
-    "opt_dict, quest_dict = build_dictionaries(sheet_list)\n",
-    "print(\"Questions dictionary:\\n\",quest_dict)\n",
-    "print(\"Options dictionary:\\n\", opt_dict)\n",
+    "    total_dfs[survey_name] = all_composite_trips\n",
     "\n",
-    "#format survey trips into responses dataframe\n",
-    "df_responses = create_dataframe(survey_trips)\n",
-    "file_suffix = scaffolding.get_file_suffix(year, month, program)"
+    "# uncomment to check total dataframes\n",
+    "for key in total_dfs.keys():\n",
+    "    print(key, len(total_dfs[key]))"
    ]
   },
   {
@@ -344,10 +354,8 @@
     "\n",
     "        try:\n",
     "            quest_frame = df_responses.copy()\n",
-    "            #could have blank responses for non-mandatory ?s\n",
     "            quest_frame = quest_frame[quest_frame[col] != \"\"]\n",
     "            qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
-    "            \n",
     "            plot_title = plot_title_no_quality+'\\n'+qual_text\n",
     "            \n",
     "            labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",

From d6b2d80cbbed7733e4aaf217e95ae8a22461683b Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 9 Apr 2024 09:23:22 -0600
Subject: [PATCH 28/70] add sensed metrics to survey dashboards

Even though there are no labels, we can still display charts with sensed mode and other generic metrics
---
 frontend/index.html                 |  5 +++--
 frontend/metrics_study_surveys.html | 11 ++++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index 6c8dc2fb..3eca1e5c 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -460,8 +460,9 @@
                   var text = '<option ' + 'value="' + key + '" data-sizex="4" data-sizey="4">' + value + '</option>';
                   file = file.concat('\n', text);
                 });
-                console.log(file);
-                $('#metric').append(file);
+                console.log("configuring units");
+                const unitConfigured = file.replaceAll("${data.display_config.use_imperial}", dist_units);
+                $('#metric').append(unitConfigured);
                 addPreconfiguredMetrics(Object.keys(quest_dict).slice(0, 5)); //only adding the first 6 elements
               });
             });         
diff --git a/frontend/metrics_study_surveys.html b/frontend/metrics_study_surveys.html
index be8569c4..2eea75b6 100644
--- a/frontend/metrics_study_surveys.html
+++ b/frontend/metrics_study_surveys.html
@@ -1,4 +1,13 @@
 <!-- htmnl options should be 1 per chart question -->
 <!-- <option value="question key" data-sizex="4" data-sizey="4">translated question</option> -->
 
-<option value="ntrips_sensed_mode" data-sizex="4" data-sizey="4">Number of trips (sensed)</option>
\ No newline at end of file
+<option value="ntrips_sensed_mode" data-sizex="4" data-sizey="4">Number of trips (sensed)</option>
+<option value="ntrips_under10miles_sensed_mode" data-sizex="4" data-sizey="4">Trip count under 80th Percentile (sensed)</option>
+<option value="miles_sensed_mode" data-sizex="4" data-sizey="4">Trip distance (${data.display_config.use_imperial}) by mode (sensed)</option>
+<option value="miles_sensed_mode_land" data-sizex="4" data-sizey="4">Trip distance by land mode (sensed)</option>
+<option value="average_miles_sensed_mode" data-sizex="6" data-sizey="4">Average trip length (${data.display_config.use_imperial}) (sensed)</option>
+<option value="ntrips_per_day" data-sizex="6" data-sizey="4">Trip frequency</option>
+<option value="ntrips_sensed_per_day" data-sizex="6" data-sizey="4">Trip frequency (sensed)</option>
+<option value="ntrips_per_weekday" data-sizex="6" data-sizey="4">Trip frequency (weekday)</option>
+<option value="ntrips_sensed_per_weekday" data-sizex="6" data-sizey="4">Trip frequency (weekday, sensed)</option>
+<option value="ts_users" data-sizex="8" data-sizey="2">Timeseries of active users</option>
\ No newline at end of file

From 0da6c331a2cbe25732fc8044d15c3d7c05e0d7df Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 9 Apr 2024 09:23:55 -0600
Subject: [PATCH 29/70] comments for every case of dashboard

emulating the comment for the study case
---
 frontend/index.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index 3eca1e5c..9134ac50 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -435,7 +435,7 @@
           dynamic_labels = data.label_options
           surveys = data.survey_info.surveys 
           console.log(data.survey_info['trip-labels'])
-          if (data.survey_info['trip-labels'] === 'ENKETO') {
+          if (data.survey_info['trip-labels'] === 'ENKETO') { //CASE: SURVEYS
             survey_list = Object.keys(surveys)
             survey_list = survey_list.filter(name => name !== 'UserProfileSurvey')
 
@@ -468,7 +468,7 @@
             });         
           
           }
-          else if (data.intro.program_or_study == 'program') {
+          else if (data.intro.program_or_study == 'program') { //CASE: PROGRAM
             // Note: We're disabling energy metrics on public dashboard when dynamic labels are available.
             // TODO: Remove the if (data.label_options) in future when energy computation is handled properly.
             if (dynamic_labels) {

From 8ff0b8ba693c0b164a6a85cefb0b40ceefea2aa5 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 9 Apr 2024 09:53:28 -0600
Subject: [PATCH 30/70] break survey notebook if irrelevant

similar to the mode-specific notebooks, we ONLY want to run this notebook if the trip labels are surveys (ENKETO)
---
 viz_scripts/survey_responses.ipynb | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 32c88031..14929af7 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -96,6 +96,20 @@
     "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f85d2a9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do not run this notebook at all unless it is for a survey configuration; nbclient will run up through this cell\n",
+    "if not survey_info['trip-labels'] == 'ENKETO':\n",
+    "    ipython = get_ipython()\n",
+    "    ipython._showtraceback = scaffolding.no_traceback_handler\n",
+    "    raise Exception(\"The plots in this notebook are only relevant to deployments with trip-level surveys\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

From 8cdd6de8005646aac5787c8e834d8bc71bd978b7 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 9 Apr 2024 12:17:04 -0600
Subject: [PATCH 31/70] filter trips - required to keep out test users

---
 viz_scripts/scaffolding.py         | 10 ++++++++++
 viz_scripts/survey_responses.ipynb |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index e7234d2d..2dab15fa 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -74,6 +74,16 @@ def load_all_participant_trips(program, tq, load_test_users):
     disp.display(participant_ct_df.head())
     return participant_ct_df
 
+def filter_composite_trips(all_comp_trips, program, load_test_users):
+    participant_list = get_participant_uuids(program, load_test_users)
+    # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
+    if len(all_comp_trips) == 0:
+        return all_comp_trips
+    participant_ct_df = all_comp_trips[all_comp_trips.user_id.isin(participant_list)]
+    print("After filtering, found %s participant trips " % len(participant_ct_df))
+    disp.display(participant_ct_df.head())
+    return participant_ct_df
+
 def filter_labeled_trips(mixed_trip_df):
     # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
     if len(mixed_trip_df) == 0:
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 14929af7..e3267358 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -260,6 +260,8 @@
     "#load all of the composite trips - require sections\n",
     "tq = scaffolding.get_time_query(year, month)\n",
     "all_composite_trips = scaffolding.load_all_composite_trips(tq)\n",
+    "#we need to filter out trips (based on if including test users)\n",
+    "all_composite_trips = scaffolding.filter_composite_trips(all_composite_trips, program, include_test_users)\n",
     "\n",
     "if len(all_composite_trips) > 0:\n",
     "    #remove blank inputs\n",

From 4ea47242be630aa0e75dea9c3b9daed65fbcdad4 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 9 Apr 2024 13:05:27 -0600
Subject: [PATCH 32/70] debug df per survey

the pools of trips and labeled trips differ per - survey, so the debug dataframes should reflect that

if statement used to generate empty labeled df if total df is empty
---
 viz_scripts/survey_responses.ipynb | 31 +++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index e3267358..3d24098f 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -232,7 +232,21 @@
     "    except:\n",
     "        result = False\n",
     "\n",
-    "    return result"
+    "    return result\n",
+    "\n",
+    "#create a debug dataframe\n",
+    "def generate_debug_df(program, include_test_users, full_df, labeled_df):\n",
+    "    debug_df = pd.DataFrame.from_dict({\n",
+    "            \"year\": year,\n",
+    "            \"month\": month,\n",
+    "            \"Registered_participants\": len(scaffolding.get_participant_uuids(program, include_test_users)),\n",
+    "            \"Participants_with_at_least_one_trip\": scaffolding.unique_users(full_df),\n",
+    "            \"Participant_with_at_least_one_labeled_trip\": scaffolding.unique_users(labeled_df),\n",
+    "            \"Trips_with_at_least_one_label\": len(labeled_df)\n",
+    "            },\n",
+    "        orient='index', columns=[\"value\"])\n",
+    "    \n",
+    "    return debug_df"
    ]
   },
   {
@@ -280,18 +294,7 @@
     "    #only want trips with sections\n",
     "    all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
     "else:\n",
-    "    survey_trips = pd.DataFrame()\n",
-    "\n",
-    "#debug_df\n",
-    "debug_df = pd.DataFrame.from_dict({\n",
-    "        \"year\": year,\n",
-    "        \"month\": month,\n",
-    "        \"Registered_participants\": len(scaffolding.get_participant_uuids(program, include_test_users)),\n",
-    "        \"Participants_with_at_least_one_trip\": scaffolding.unique_users(all_composite_trips),\n",
-    "        \"Participant_with_at_least_one_labeled_trip\": scaffolding.unique_users(survey_trips),\n",
-    "        \"Trips_with_at_least_one_label\": len(survey_trips)\n",
-    "        },\n",
-    "    orient='index', columns=[\"value\"])"
+    "    survey_trips = pd.DataFrame()"
    ]
   },
   {
@@ -362,6 +365,8 @@
     "#create one plot per question in the survey\n",
     "for survey_name in quest_dict.keys():\n",
     "    print(\"Charts for:\", survey_name)\n",
+    "    debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
+    "    \n",
     "\n",
     "    for col in quest_dict[survey_name].keys():\n",
     "        \n",

From eff012bdda6d9dd7eb4b8bdf1cdc54ad4fc05456 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Wed, 17 Apr 2024 17:01:21 -0600
Subject: [PATCH 33/70] use emcommon to determine survey prompted

Instead of duplicating the survey-selection logic, it is now central and located in emcommon
---
 viz_scripts/survey_responses.ipynb | 60 ++++++------------------------
 1 file changed, 11 insertions(+), 49 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 3d24098f..5fa03ec5 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -84,6 +84,8 @@
     "import pandas as pd\n",
     "from xml.dom import minidom\n",
     "\n",
+    "from emcommon.survey import conditional_surveys\n",
+    "\n",
     "from plots import *\n",
     "import scaffolding\n",
     "import re\n",
@@ -208,32 +210,6 @@
     "    \n",
     "    return labels\n",
     "\n",
-    "#input: pt - set of coordinates in 1d array, bounds - 2 points in 2-d array\n",
-    "#output: boolean if point is within the bounding box\n",
-    "#this function is a pythonized version of the one in the phone code - good candidate for common lib\n",
-    "def pointIsWithinBounds(pt, bounds): \n",
-    "    #pt's lon must be east of, or greater than, NW's lon; and west of, or less than, SE's lon\n",
-    "    lonInRange = pt[0] > bounds[0][0] and pt[0] < bounds[1][0];\n",
-    "    #pt's lat must be south of, or less than, NW's lat; and north of, or greater than, SE's lat\n",
-    "    latInRange = pt[1] < bounds[0][1] and pt[1] > bounds[1][1];\n",
-    "    \n",
-    "    return latInRange and lonInRange;\n",
-    "\n",
-    "#input: single row of dataframe\n",
-    "#output: T/F evaluation of the conditon\n",
-    "#eval_string is re-defined for each survey\n",
-    "def filter_fn(row, eval_string):\n",
-    "    funcs = {'pointIsWithinBounds': pointIsWithinBounds}\n",
-    "    row_vars = row.to_dict()\n",
-    "    \n",
-    "    try:\n",
-    "        result = eval(eval_string, funcs, row_vars)\n",
-    "    #if unable to evaluate row, assume not a match (handle old surveys!)\n",
-    "    except:\n",
-    "        result = False\n",
-    "\n",
-    "    return result\n",
-    "\n",
     "#create a debug dataframe\n",
     "def generate_debug_df(program, include_test_users, full_df, labeled_df):\n",
     "    debug_df = pd.DataFrame.from_dict({\n",
@@ -324,33 +300,19 @@
     "#create the total dfs\n",
     "total_dfs = {}\n",
     "#for conditional surveys!\n",
+    "wrapped_config = {'survey_info': survey_info}\n",
     "if 'buttons' in survey_info.keys():\n",
-    "    #gather the eval strings\n",
-    "    eval_strings = {}\n",
-    "    for item in survey_info['buttons']['trip-label']:\n",
-    "        if 'showsIfPy' in item.keys():\n",
-    "            eval_strings[item['surveyName']] = item['showsIfPy']\n",
+    "    all_composite_trips['survey_name_prompted'] = all_composite_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
+    "    \n",
+    "    for survey_name in list(sheet_list.keys()):\n",
+    "        if survey_name in all_composite_trips['survey_name_prompted'].unique():\n",
+    "            total_dfs[survey_name] = all_composite_trips[all_composite_trips['survey_name_prompted'] == survey_name]\n",
     "        else:\n",
-    "            raw_string = item['showsIf']\n",
-    "            expression = raw_string.replace('&&', 'and')\n",
-    "            expression = expression.replace('||', 'or')\n",
-    "            expression = re.sub(r\"!(?!=)\", \"not \", expression)\n",
-    "            eval_strings[item['surveyName']] = expression\n",
-    "\n",
-    "    #use eval strings to build \"total\" dataframes\n",
-    "    for name, eval_string in eval_strings.items():\n",
-    "        meets_bools = all_composite_trips.apply(lambda x: filter_fn(x, eval_string), axis=1)\n",
-    "        df = all_composite_trips[meets_bools]\n",
-    "\n",
-    "        total_dfs[name] = df\n",
-    "        \n",
+    "            #never prompted\n",
+    "            total_dfs[survey_name] = pd.DataFrame()\n",
     "else:\n",
     "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
-    "    total_dfs[survey_name] = all_composite_trips\n",
-    "\n",
-    "# uncomment to check total dataframes\n",
-    "for key in total_dfs.keys():\n",
-    "    print(key, len(total_dfs[key]))"
+    "    total_dfs[survey_name] = all_composite_trips"
    ]
   },
   {

From 6c7496fc9724ec463ded9ac2ce869ca10673196d Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Thu, 18 Apr 2024 17:16:04 -0600
Subject: [PATCH 34/70] add color mapping to surveys

this mapping will keep responses that appear in different surveys the same color, keep the same chart from having the same color twice (unless a given survey has 20 different answers), and in the case of multiple selections being encountered, colors that segment of the chart the "average" of all the answers collected
---
 viz_scripts/plots.py               | 11 +++++++++++
 viz_scripts/scaffolding.py         | 12 ++++++++++++
 viz_scripts/survey_responses.ipynb |  5 ++++-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py
index 3fa8d3ae..c10e94e8 100644
--- a/viz_scripts/plots.py
+++ b/viz_scripts/plots.py
@@ -110,6 +110,17 @@ def pie_chart_purpose(plot_title,labels,values,colors_map,file_name):
 
     m_labels, m_values = merge_small_entries(labels, values)
     
+    for key in labels:
+        if key not in colors_map.keys():
+            #likely a "selected multiple" case
+            i = 0
+            avg_color = (0, 0, 0)
+            for selection in key.split("\n"):
+                avg_color = tuple(map(lambda i, j: i + j, avg_color, colors_map[selection]))
+                i+=1
+
+            colors_map[key] = tuple(map(lambda i, j: i / j, avg_color, (i, i, i)))
+    
     def func(pct, values):
         total = sum(values)
         absolute = int(round(pct*total/100.0))
diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index cb94ef74..8f3246db 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -230,6 +230,18 @@ def mapping_color_labels(dynamic_labels, dic_re, dic_pur):
 
     return colors_mode, colors_purpose
 
+# Function: Maps survey answers to colors.
+# Input: dictionary of raw and translated survey answers
+# Output: Map for color with survey answers
+def mapping_color_surveys(dic_options):
+    dictionary_values = (list(OrderedDict.fromkeys(dic_options.values())))
+    
+    colors = {}
+    for i in range(len(dictionary_values)):
+        colors[dictionary_values[i]] = plt.cm.tab20.colors[i%20]
+
+    return colors
+
 def load_viz_notebook_sensor_inference_data(year, month, program, include_test_users=False, sensed_algo_prefix="cleaned"):
     """ Inputs:
     year/month/program = parameters from the visualization notebook
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 5fa03ec5..1418fcfe 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -285,6 +285,9 @@
     "print(\"Questions dictionary:\\n\",quest_dict)\n",
     "print(\"Options dictionary:\\n\", opt_dict)\n",
     "\n",
+    "#color dictionary\n",
+    "color_map = scaffolding.mapping_color_surveys(opt_dict)\n",
+    "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",
     "file_suffix = scaffolding.get_file_suffix(year, month, program)"
@@ -345,7 +348,7 @@
     "            labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
     "            values = quest_frame[col].value_counts(dropna=True).tolist()\n",
     "\n",
-    "            pie_chart_purpose(plot_title, labels, values, filename)\n",
+    "            pie_chart_purpose(plot_title, labels, values, color_map, filename)\n",
     "            alt_text = store_alt_text_pie(pd.DataFrame(values, labels), filename, plot_title)\n",
     "\n",
     "        except:\n",

From 1e41c355661f7599328004f956c2366873e095d1 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Thu, 18 Apr 2024 17:16:55 -0600
Subject: [PATCH 35/70] fix label translations

due to duplicate encounters, the likert questions were labeled inaccurately, this check resolved that issue
---
 viz_scripts/survey_responses.ipynb | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 1418fcfe..367e368b 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -159,12 +159,13 @@
     "                    survey_questions[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
     "            \n",
     "            if label.parentNode.nodeName == 'item':\n",
-    "                if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
-    "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data\n",
-    "                elif label.parentNode.getElementsByTagName(\"value\"):\n",
-    "                    opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.firstChild.data  \n",
-    "                elif label.parentNode.getElementsByTagName(\"name\"):\n",
-    "                    opt_dict[label.parentNode.getElementsByTagName(\"name\")[0].firstChild.data] = label.firstChild.data  \n",
+    "                if label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data not in opt_dict.keys(): #prevent overwriting\n",
+    "                    if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
+    "                        opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data\n",
+    "                    elif label.parentNode.getElementsByTagName(\"value\"):\n",
+    "                        opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.firstChild.data  \n",
+    "                    elif label.parentNode.getElementsByTagName(\"name\"):\n",
+    "                        opt_dict[label.parentNode.getElementsByTagName(\"name\")[0].firstChild.data] = label.firstChild.data  \n",
     "        \n",
     "        quest_dict[survey_name] = survey_questions\n",
     "        \n",

From 80263adcf5a1c33fb4a307f032ad4a09df00513d Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 22 Apr 2024 15:17:33 -0600
Subject: [PATCH 36/70] add informative print statements

while testing with the survey data, it can be difficult to interpret what is a bug and what is an artifact of churn in the surveys and their conditions - monitoring via print statements is very helpful
---
 viz_scripts/survey_responses.ipynb | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 367e368b..8a147083 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -270,6 +270,7 @@
     "\n",
     "    #only want trips with sections\n",
     "    all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
+    "    print(len(all_composite_trips), \"trips with sections\")\n",
     "else:\n",
     "    survey_trips = pd.DataFrame()"
    ]
@@ -316,7 +317,10 @@
     "            total_dfs[survey_name] = pd.DataFrame()\n",
     "else:\n",
     "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
-    "    total_dfs[survey_name] = all_composite_trips"
+    "    total_dfs[survey_name] = all_composite_trips\n",
+    "    \n",
+    "for key in total_dfs.keys():\n",
+    "    print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")"
    ]
   },
   {
@@ -356,6 +360,13 @@
     "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
     "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
    ]
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e59ca0d5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From c98b6237d06893a91e2f1323e7daaf221625c773 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 22 Apr 2024 15:17:50 -0600
Subject: [PATCH 37/70] add emcommon as a dependency to the yml file

---
 viz_scripts/docker/environment36.dashboard.additions.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/viz_scripts/docker/environment36.dashboard.additions.yml b/viz_scripts/docker/environment36.dashboard.additions.yml
index 59d26ebc..94e55988 100644
--- a/viz_scripts/docker/environment36.dashboard.additions.yml
+++ b/viz_scripts/docker/environment36.dashboard.additions.yml
@@ -4,6 +4,8 @@ channels:
 - defaults
 dependencies:
 - seaborn=0.11.1
+- git
 - pip:
   - nbparameterise==0.6
   - devcron==0.4
+  - git+https://github.com/JGreenlee/e-mission-common@0.4.0

From 10fe708639896458c21961e26580a5263e14aaac Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 22 Apr 2024 15:53:39 -0600
Subject: [PATCH 38/70] move if conditions

The if condition preventing duplication only worked for some cases, move the if inside the cases to prevent errors on different configurations
---
 viz_scripts/survey_responses.ipynb | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 8a147083..590ef07f 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -159,12 +159,14 @@
     "                    survey_questions[str(label.parentNode.getAttribute(\"ref\").split('/')[-1])] = label.firstChild.data\n",
     "            \n",
     "            if label.parentNode.nodeName == 'item':\n",
-    "                if label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data not in opt_dict.keys(): #prevent overwriting\n",
-    "                    if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
+    "                if label.parentNode.parentNode.getAttribute(\"appearance\") == \"likert\":\n",
+    "                    if label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data not in opt_dict.keys():\n",
     "                        opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data\n",
-    "                    elif label.parentNode.getElementsByTagName(\"value\"):\n",
+    "                elif label.parentNode.getElementsByTagName(\"value\"):\n",
+    "                    if label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data not in opt_dict.keys():\n",
     "                        opt_dict[label.parentNode.getElementsByTagName(\"value\")[0].firstChild.data] = label.firstChild.data  \n",
-    "                    elif label.parentNode.getElementsByTagName(\"name\"):\n",
+    "                elif label.parentNode.getElementsByTagName(\"name\"):\n",
+    "                    if label.parentNode.getElementsByTagName(\"name\")[0].firstChild.data not in opt_dict.keys():\n",
     "                        opt_dict[label.parentNode.getElementsByTagName(\"name\")[0].firstChild.data] = label.firstChild.data  \n",
     "        \n",
     "        quest_dict[survey_name] = survey_questions\n",

From 1b882659143f3d75b52df04d952ce8bbc7178ea6 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 22 Apr 2024 15:55:45 -0600
Subject: [PATCH 39/70] choose a more distinct color pallete

the fact that tab 20 has "paired" colors was producing shades very similar to one another on the same chart, made worse when colors were "mixed" to account for duplicate selection

could also choose Set3 for 12 colors instead of 10, but seems unlikely to have more than 10 answers to the same question and these are most similar to what is used on the rest of the dashboard
---
 viz_scripts/scaffolding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 8f3246db..a4734f05 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -238,7 +238,7 @@ def mapping_color_surveys(dic_options):
     
     colors = {}
     for i in range(len(dictionary_values)):
-        colors[dictionary_values[i]] = plt.cm.tab20.colors[i%20]
+        colors[dictionary_values[i]] = plt.cm.tab10.colors[i%10]
 
     return colors
 

From 1a7b911569fadb12f7f9d4cc8471eab7690cd279 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Mon, 22 Apr 2024 15:58:26 -0600
Subject: [PATCH 40/70] clean up inputs cell

removed the sample survey_info, this is added into the notebook for testing or passed in when the notebook is run
---
 viz_scripts/survey_responses.ipynb | 57 ++----------------------------
 1 file changed, 2 insertions(+), 55 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 590ef07f..5943cd53 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -13,62 +13,9 @@
     "study_type = \"study\"\n",
     "mode_of_interest = 'e-bike'\n",
     "include_test_users = False\n",
-    "dynamic_labels = {  }\n",
+    "dynamic_labels = {}\n",
     "use_imperial = False\n",
-    "\n",
-    "#probably going to end up passing this in\n",
-    "survey_info =  {\n",
-    "    \"surveys\": {\n",
-    "      \"UserProfileSurvey\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-onboarding-v0.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/demographic_survey\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      },\n",
-    "      \"DfcEvReturnTrip\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-return-trip-v0.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      },\n",
-    "      \"DfcEvRoamingTrip\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-roaming-trip-v0.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      },\n",
-    "      \"DfcGasTrip\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-gas-trip-v0.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      }\n",
-    "    },\n",
-    "    \"buttons\": {\n",
-    "      \"trip-label\": [\n",
-    "        {\n",
-    "          \"surveyName\": \"DfcGasTrip\",\n",
-    "          \"not-filled-in-label\": { \"en\": \"Gas Car Survey\" },\n",
-    "          \"showsIf\": \"sections[0]['sensed_mode_str'] == 'CAR'\"\n",
-    "        },\n",
-    "        {\n",
-    "          \"surveyName\": \"DfcEvRoamingTrip\",\n",
-    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
-    "          \"showsIf\": \"sections[0]['sensed_mode_str'] != 'CAR' && !pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
-    "        },\n",
-    "        {\n",
-    "          \"surveyName\": \"DfcEvReturnTrip\",\n",
-    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
-    "          \"showsIf\": \"sections[0]['sensed_mode_str'] != 'CAR' && pointIsWithinBounds(end_loc['coordinates'], [[-105.153, 39.745], [-105.150, 39.743]])\"\n",
-    "        }\n",
-    "      ]\n",
-    "    },\n",
-    "    \"trip-labels\": \"ENKETO\"\n",
-    "  }"
+    "survey_info =  {}"
    ]
   },
   {

From 0fae9fc5e62fcab1f04a303c6859fb1cbc3821c5 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 23 Apr 2024 08:49:19 -0600
Subject: [PATCH 41/70] change condition for the debug dataframe

Error when running these charts from the command line caused by empty dataframe that still had columns

Prevented the error by checking that the dataframe was not empty as well as for the column
---
 viz_scripts/generic_metrics_sensed.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb
index 762cbf1a..eb68a0f5 100644
--- a/viz_scripts/generic_metrics_sensed.ipynb
+++ b/viz_scripts/generic_metrics_sensed.ipynb
@@ -156,7 +156,7 @@
     "    alt_text = store_alt_text_pie(pd.DataFrame(values_d10, labels_d10), file_name, plot_title)\n",
     "    print(expanded_ct.loc[(expanded_ct['distance'] <= cutoff)].primary_mode.value_counts(dropna=True))\n",
     "except:\n",
-    "    d10_df = expanded_ct.query(\"distance <= \" + cutoff) if \"distance\" in expanded_ct.columns else expanded_ct\n",
+    "    d10_df = expanded_ct.query(\"distance <= \" + str(cutoff)) if 'distance' in expanded_ct.columns and len(expanded_ct) > 0 else expanded_ct\n",
     "    debug_df.loc[\"Trips_less_than_80th_pct\"] = scaffolding.trip_label_count(\"Mode_confirm\", d10_df)\n",
     "    generate_missing_plot(plot_title_no_quality,debug_df,file_name)\n",
     "    alt_text = store_alt_text_missing(debug_df, file_name, plot_title_no_quality)"

From f1e94f5fd7e012d1f6de7170e9f154bb4333ea68 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@centre.edu>
Date: Tue, 23 Apr 2024 09:01:50 -0600
Subject: [PATCH 42/70] missing punctuation - fix corrupt notebook

---
 viz_scripts/survey_responses.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 5943cd53..b207a88e 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -309,6 +309,7 @@
     "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
     "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
    ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

From 0f31ed68a7e1bdfb41990b37fa25eaf97c3265dc Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Sun, 5 May 2024 03:02:22 -0600
Subject: [PATCH 43/70] switch from composite to confirmed trips

---
 viz_scripts/survey_responses.ipynb | 74 +++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index b207a88e..1289fbc3 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -131,6 +131,7 @@
     "        data_key = list(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]\n",
     "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n",
     "        row['user_id'] = df.loc[i].user_id\n",
+    "        row['survey_name'] = df.loc[i]['survey_name']\n",
     "        rows.append(row)\n",
     "        \n",
     "    if len(rows) > 0:\n",
@@ -199,13 +200,13 @@
    "source": [
     "#load all of the composite trips - require sections\n",
     "tq = scaffolding.get_time_query(year, month)\n",
-    "all_composite_trips = scaffolding.load_all_composite_trips(tq)\n",
+    "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
     "#we need to filter out trips (based on if including test users)\n",
-    "all_composite_trips = scaffolding.filter_composite_trips(all_composite_trips, program, include_test_users)\n",
+    "all_confirmed_trips = scaffolding.filter_composite_trips(all_confirmed_trips, program, include_test_users)\n",
     "\n",
-    "if len(all_composite_trips) > 0:\n",
+    "if len(all_confirmed_trips) > 0:\n",
     "    #remove blank inputs\n",
-    "    survey_trips = all_composite_trips[all_composite_trips['user_input'] != {}]\n",
+    "    survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
     "\n",
     "    #survey counts df\n",
     "    survey_trips = survey_trips.reset_index()\n",
@@ -214,12 +215,6 @@
     "    #gather the cols needed for charts and text\n",
     "    survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
     "    \n",
-    "    # transform data to meet a data format requirement\n",
-    "    all_composite_trips['sections'] = all_composite_trips['sections'].apply(lambda x: [x[0]['data']] if len(x) > 0 else x)\n",
-    "\n",
-    "    #only want trips with sections\n",
-    "    all_composite_trips = all_composite_trips[all_composite_trips['sections'].str.len() > 0]\n",
-    "    print(len(all_composite_trips), \"trips with sections\")\n",
     "else:\n",
     "    survey_trips = pd.DataFrame()"
    ]
@@ -272,6 +267,19 @@
     "    print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5732576d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#merge any cols with the same name into 1 col -- should have different values in their survey_name col\n",
+    "#https://stackoverflow.com/questions/24390645/python-pandas-merge-samed-name-columns-in-a-dataframe\n",
+    "def sjoin(x): return ';'.join(x[x.notnull()].astype(str))\n",
+    "df_responses = df_responses.groupby(level=0, axis=1).apply(lambda x: x.apply(sjoin, axis=1))"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -284,39 +292,49 @@
     "#create one plot per question in the survey\n",
     "for survey_name in quest_dict.keys():\n",
     "    print(\"Charts for:\", survey_name)\n",
-    "    debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
-    "    \n",
+    "#     debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
+    "    debug_df = pd.DataFrame()\n",
     "\n",
     "    for col in quest_dict[survey_name].keys():\n",
     "        \n",
+    "        print(col)\n",
+    "        \n",
     "        filename = col + file_suffix\n",
-    "        plot_title_no_quality = quest_dict[survey_name][col]\n",
+    "        plot_title_no_quality = survey_name + \"\\n\" + quest_dict[survey_name][col]\n",
     "\n",
     "        try:\n",
     "            quest_frame = df_responses.copy()\n",
-    "            quest_frame = quest_frame[quest_frame[col] != \"\"]\n",
-    "            qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
+    "            \n",
+    "#             qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
+    "            qual_text = \"debug qual text later\"\n",
     "            plot_title = plot_title_no_quality+'\\n'+qual_text\n",
     "            \n",
-    "            labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
-    "            labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
-    "            values = quest_frame[col].value_counts(dropna=True).tolist()\n",
+    "            fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+    "            \n",
+    "            # We will have text results corresponding to the axes for simplicity and consistency\n",
+    "            \n",
+    "            quest_frame[col] = quest_frame[col].apply(lambda x: traslate_options(x, opt_dict))\n",
+    "#             quest_frame[col] = traslate_options(quest_frame[col], opt_dict)\n",
+    "            text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]  \n",
     "\n",
-    "            pie_chart_purpose(plot_title, labels, values, color_map, filename)\n",
-    "            alt_text = store_alt_text_pie(pd.DataFrame(values, labels), filename, plot_title)\n",
+    "            plot_df = quest_frame.groupby(['survey_name', col]).count().reset_index()\n",
+    "            plot_df = plot_df[plot_df['survey_name'] == survey_name]\n",
+    "            plot_df = plot_df.set_index(col)[['start']]\n",
+    "            \n",
+    "#             labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
+    "#             labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
+    "#             values = quest_frame[col].value_counts(dropna=True).tolist()       \n",
+    "#             plot_df = pd.DataFrame({\"label\": labels, \"value\": values}).set_index('label')\n",
+    "            \n",
+    "            plot_and_text_stacked_bar_chart(plot_df, \"Responses\", ax, text_results, color_map, debug_df)\n",
+    "            \n",
+    "            set_title_and_save(fig, text_results, plot_title, filename)\n",
+    "            \n",
     "\n",
     "        except:\n",
     "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
     "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e59ca0d5",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From 596886a8b3e2b8d0af3cfacf28d530acf9e675c5 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Sun, 5 May 2024 03:02:44 -0600
Subject: [PATCH 44/70] update translation of options - issues with numbers

---
 viz_scripts/survey_responses.ipynb | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 1289fbc3..b8aaa033 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -151,15 +151,22 @@
     "#output: translated to readable list, with multiples handled\n",
     "#the for loop will run n times, where num_options <= n < all possible combinations of options\n",
     "#if people are selecting many different combinations, could be large\n",
-    "def traslate_options(labels, opt_dict):\n",
-    "    for i in range(len(labels)):\n",
-    "        l_labels = str(labels[i]).split(\" \")\n",
+    "def traslate_options(label, opt_dict):\n",
+    "    try:\n",
+    "        l_labels = str(label).split(\" \")\n",
     "        for k in range(len(l_labels)):\n",
-    "            l_labels[k] = opt_dict[l_labels[k]]\n",
+    "            try:\n",
+    "                #workaround for the case wher we had \"5.0\" and need \"5\"\n",
+    "                l_labels[k] = opt_dict[str(int(float(l_labels[k])))]\n",
+    "            except:\n",
+    "                l_labels[k] = opt_dict[l_labels[k]]\n",
     "        sep = \"\\n\"\n",
-    "        labels[i] = sep.join(l_labels)\n",
-    "    \n",
-    "    return labels\n",
+    "        \n",
+    "        final = sep.join(l_labels)\n",
+    "        print(label, final)\n",
+    "        return final\n",
+    "    except:\n",
+    "        return label #probably a row without an answer\n",
     "\n",
     "#create a debug dataframe\n",
     "def generate_debug_df(program, include_test_users, full_df, labeled_df):\n",

From 8d0e285800c23dc14fb254a31165d040f8c81103 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Sun, 5 May 2024 03:03:23 -0600
Subject: [PATCH 45/70] update color mapping

under construction since this is breaking some of the charts currently
---
 viz_scripts/survey_responses.ipynb | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index b8aaa033..ea6014cc 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -239,7 +239,20 @@
     "print(\"Options dictionary:\\n\", opt_dict)\n",
     "\n",
     "#color dictionary\n",
-    "color_map = scaffolding.mapping_color_surveys(opt_dict)\n",
+    "from collections import OrderedDict\n",
+    "def mapping_color_surveys(dic_options):\n",
+    "    dictionary_values = (list(OrderedDict.fromkeys(dic_options.values())))\n",
+    "#     dictionary_values = list(dic_options.keys())\n",
+    "\n",
+    "    colors = {}\n",
+    "    for i in range(len(dictionary_values)):\n",
+    "        colors[dictionary_values[i]] = plt.cm.tab10.colors[i%10]\n",
+    "    \n",
+    "    return colors\n",
+    "\n",
+    "color_map = mapping_color_surveys(opt_dict)\n",
+    "\n",
+    "print(\"\\n\", color_map)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",

From fd16cad4cced603e4bbb3e4111fbf434f3e9fc12 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Sun, 5 May 2024 22:36:20 -0600
Subject: [PATCH 46/70] introduce workaround for missing colors

added a workaround for missing survey colors - this includes "averaging" the multiple response questions and adding a dummy color if a label is missing - potentially from churn on survey updates, this should probably not be permanent
---
 viz_scripts/survey_responses.ipynb | 110 ++++++++++++++++++++++-------
 1 file changed, 85 insertions(+), 25 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index ea6014cc..0eb8954d 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -163,7 +163,7 @@
     "        sep = \"\\n\"\n",
     "        \n",
     "        final = sep.join(l_labels)\n",
-    "        print(label, final)\n",
+    "        \n",
     "        return final\n",
     "    except:\n",
     "        return label #probably a row without an answer\n",
@@ -226,6 +226,37 @@
     "    survey_trips = pd.DataFrame()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52474e83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#TODO get the bluetooth modes\n",
+    "#HACK to get the bluetooth mode\n",
+    "\n",
+    "# import emission.storage.decorations.trip_queries as esdt\n",
+    "# import emission.storage.timeseries.abstract_timeseries as esta\n",
+    "\n",
+    "# def get_confirmed_mode(trip, ble_vehicle_mapping):\n",
+    "# #     ts = esta.TimeSeries.get_time_series(trip.user_id)\n",
+    "# #     trip_obj = ts.get_entry_from_id(\"analysis/cleaned_trip\", trip._id)\n",
+    "# #     matching_beacon = esdt.get_user_input_for_timeline_entry(ts, trip_obj, \"background/bluetooth_ble\")\n",
+    "    \n",
+    "#     matching_beacon = esdt.get_user_input_for_trip(\"analysis/confirmed_trip\", trip._id, trip.user_id, \"background/bluetooth_ble\")\n",
+    "    \n",
+    "\n",
+    "#     return matching_beacon\n",
+    "\n",
+    "# all_confirmed_trips.iloc[6].user_input\n",
+    "# all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
+    "# print(get_confirmed_mode(all_confirmed_trips.iloc[6], {}))\n",
+    "# all_confirmed_trips['confirmedMode'] = all_confirmed_trips.apply(lambda trip: get_confirmed_mode(trip, {}), axis=1)\n",
+    "\n",
+    "# print(all_confirmed_trips.confirmedMode.unique())"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -242,8 +273,7 @@
     "from collections import OrderedDict\n",
     "def mapping_color_surveys(dic_options):\n",
     "    dictionary_values = (list(OrderedDict.fromkeys(dic_options.values())))\n",
-    "#     dictionary_values = list(dic_options.keys())\n",
-    "\n",
+    "    \n",
     "    colors = {}\n",
     "    for i in range(len(dictionary_values)):\n",
     "        colors[dictionary_values[i]] = plt.cm.tab10.colors[i%10]\n",
@@ -252,8 +282,6 @@
     "\n",
     "color_map = mapping_color_surveys(opt_dict)\n",
     "\n",
-    "print(\"\\n\", color_map)\n",
-    "\n",
     "#format survey trips into responses dataframe\n",
     "df_responses = create_dataframe(survey_trips)\n",
     "file_suffix = scaffolding.get_file_suffix(year, month, program)"
@@ -266,25 +294,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "#TODO - work on the total dfs\n",
     "#create the total dfs\n",
-    "total_dfs = {}\n",
-    "#for conditional surveys!\n",
-    "wrapped_config = {'survey_info': survey_info}\n",
-    "if 'buttons' in survey_info.keys():\n",
-    "    all_composite_trips['survey_name_prompted'] = all_composite_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
+    "# total_dfs = {}\n",
+    "# #for conditional surveys!\n",
+    "# wrapped_config = {'survey_info': survey_info}\n",
+    "# if 'buttons' in survey_info.keys():\n",
+    "#     all_confirmed_trips['survey_name_prompted'] = all_confirmed_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
     "    \n",
-    "    for survey_name in list(sheet_list.keys()):\n",
-    "        if survey_name in all_composite_trips['survey_name_prompted'].unique():\n",
-    "            total_dfs[survey_name] = all_composite_trips[all_composite_trips['survey_name_prompted'] == survey_name]\n",
-    "        else:\n",
-    "            #never prompted\n",
-    "            total_dfs[survey_name] = pd.DataFrame()\n",
-    "else:\n",
-    "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
-    "    total_dfs[survey_name] = all_composite_trips\n",
+    "#     for survey_name in list(sheet_list.keys()):\n",
+    "#         if survey_name in all_confirmed_trips['survey_name_prompted'].unique():\n",
+    "#             total_dfs[survey_name] = all_confirmed_trips[all_confirmed_trips['survey_name_prompted'] == survey_name]\n",
+    "#         else:\n",
+    "#             #never prompted\n",
+    "#             total_dfs[survey_name] = pd.DataFrame()\n",
+    "# else:\n",
+    "#     survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
+    "#     total_dfs[survey_name] = all_confirmed_trips\n",
     "    \n",
-    "for key in total_dfs.keys():\n",
-    "    print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")"
+    "# for key in total_dfs.keys():\n",
+    "#     print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")"
    ]
   },
   {
@@ -300,6 +329,39 @@
     "df_responses = df_responses.groupby(level=0, axis=1).apply(lambda x: x.apply(sjoin, axis=1))"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4e91a23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#workaround missing colors\n",
+    "def get_survey_colors(labels, existing_map):\n",
+    "    color_map = {}\n",
+    "    for label in labels:\n",
+    "        l_labels = label.split(\"\\n\")\n",
+    "        color = (0,0,0)\n",
+    "        n = 0\n",
+    "        for i in range(len(l_labels)):\n",
+    "            try:\n",
+    "                color = tuple(map(lambda i, j: i + j, color, existing_map[l_labels[i]]))\n",
+    "            except:\n",
+    "                print(\"missing color\")\n",
+    "                color = tuple(map(lambda i, j: i + j, color, (0.1, 0.2, 0.5)))\n",
+    "            \n",
+    "            n += 1\n",
+    "            \n",
+    "        print(color, n)\n",
+    "        color = [x/n for x in color]\n",
+    "        \n",
+    "        color_map[label] = color\n",
+    "    \n",
+    "    print(color_map)\n",
+    "    \n",
+    "    return color_map"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -346,10 +408,8 @@
     "#             values = quest_frame[col].value_counts(dropna=True).tolist()       \n",
     "#             plot_df = pd.DataFrame({\"label\": labels, \"value\": values}).set_index('label')\n",
     "            \n",
-    "            plot_and_text_stacked_bar_chart(plot_df, \"Responses\", ax, text_results, color_map, debug_df)\n",
-    "            \n",
+    "            plot_and_text_stacked_bar_chart(plot_df, \"Responses\", ax, text_results, get_survey_colors(list(plot_df.index.values) ,color_map), debug_df)\n",
     "            set_title_and_save(fig, text_results, plot_title, filename)\n",
-    "            \n",
     "\n",
     "        except:\n",
     "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
@@ -373,7 +433,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

From 52bc509409b86f393a7f9189ebc08f3bac68d002 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Sun, 5 May 2024 22:36:38 -0600
Subject: [PATCH 47/70] add todo comments, remove old code

---
 viz_scripts/survey_responses.ipynb | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 0eb8954d..05709c36 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -374,6 +374,7 @@
     "#create one plot per question in the survey\n",
     "for survey_name in quest_dict.keys():\n",
     "    print(\"Charts for:\", survey_name)\n",
+    "    #TODO - fix debug df\n",
     "#     debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
     "    debug_df = pd.DataFrame()\n",
     "\n",
@@ -387,6 +388,7 @@
     "        try:\n",
     "            quest_frame = df_responses.copy()\n",
     "            \n",
+    "            #TODO - fix quality text\n",
     "#             qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
     "            qual_text = \"debug qual text later\"\n",
     "            plot_title = plot_title_no_quality+'\\n'+qual_text\n",
@@ -396,18 +398,12 @@
     "            # We will have text results corresponding to the axes for simplicity and consistency\n",
     "            \n",
     "            quest_frame[col] = quest_frame[col].apply(lambda x: traslate_options(x, opt_dict))\n",
-    "#             quest_frame[col] = traslate_options(quest_frame[col], opt_dict)\n",
     "            text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]  \n",
     "\n",
     "            plot_df = quest_frame.groupby(['survey_name', col]).count().reset_index()\n",
     "            plot_df = plot_df[plot_df['survey_name'] == survey_name]\n",
     "            plot_df = plot_df.set_index(col)[['start']]\n",
     "            \n",
-    "#             labels = quest_frame[col].value_counts(dropna=True).keys().tolist()\n",
-    "#             labels = traslate_options(quest_frame[col].value_counts(dropna=True).keys().tolist(), opt_dict)\n",
-    "#             values = quest_frame[col].value_counts(dropna=True).tolist()       \n",
-    "#             plot_df = pd.DataFrame({\"label\": labels, \"value\": values}).set_index('label')\n",
-    "            \n",
     "            plot_and_text_stacked_bar_chart(plot_df, \"Responses\", ax, text_results, get_survey_colors(list(plot_df.index.values) ,color_map), debug_df)\n",
     "            set_title_and_save(fig, text_results, plot_title, filename)\n",
     "\n",

From 548a449d6b46130a0441eb39b372adca0ee56459 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 12:10:11 -0600
Subject: [PATCH 48/70] update how the dataframe is formed

taking into account https://github.com/e-mission/em-public-dashboard/pull/124#discussion_r1590180144
---
 viz_scripts/survey_responses.ipynb | 37 ++++++++++++++++--------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 05709c36..809ffecc 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -9,6 +9,8 @@
    "source": [
     "year = 2024\n",
     "month = 4\n",
+    "year = None\n",
+    "month = None\n",
     "program = \"default\"\n",
     "study_type = \"study\"\n",
     "mode_of_interest = 'e-bike'\n",
@@ -16,6 +18,8 @@
     "dynamic_labels = {}\n",
     "use_imperial = False\n",
     "survey_info =  {}"
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-onboarding-v1.xml\",\n",
+    "        \"version\": 1,\n",
    ]
   },
   {
@@ -121,31 +125,30 @@
     "    return opt_dict, quest_dict\n",
     "\n",
     "\n",
+    "def get_response_to_normalize(row):\n",
+    "    data_key = list(row.user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]\n",
+    "    response = row.user_input['trip_user_input']['data']['jsonDocResponse'][data_key]\n",
+    "\n",
+    "    return response \n",
+    "\n",
     "#input: dataframe containing all trips that have non-blank user_input\n",
     "#output: dataframe with questions in the columns and answers in the rows\n",
     "#for loop will run n survey responses times (this could get big!)\n",
     "def create_dataframe(df_trips_w_surveys):\n",
     "    df = df_trips_w_surveys.reset_index()\n",
-    "    rows = []\n",
-    "    for i in range(len(df)):\n",
-    "        data_key = list(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'].keys())[0]\n",
-    "        row = pd.json_normalize(df.loc[i].user_input['trip_user_input']['data']['jsonDocResponse'][data_key])\n",
-    "        row['user_id'] = df.loc[i].user_id\n",
-    "        row['survey_name'] = df.loc[i]['survey_name']\n",
-    "        rows.append(row)\n",
-    "        \n",
-    "    if len(rows) > 0:\n",
-    "        df = pd.concat(rows)\n",
+    "    \n",
+    "    #normalize the survey responses\n",
+    "    normalized_responses = pd.json_normalize(df.apply(get_response_to_normalize))\n",
+    "    normalized_responses['survey_name'] = df.apply(lambda x: x.survey_name)\n",
     "\n",
-    "        rename_nests = {}\n",
-    "        for col in df.columns:\n",
-    "            rename_nests[col] = col.split('.')[-1]\n",
+    "    #update the column names\n",
+    "    rename_nests = {}\n",
+    "    for col in normalized_responses.columns:\n",
+    "        rename_nests[col] = normalized_responses.split('.')[-1]\n",
     "\n",
-    "        df = df.rename(columns=rename_nests)\n",
-    "    else:\n",
-    "        df = pd.DataFrame()\n",
+    "    normalized_responses = normalized_responses.rename(columns=rename_nests)\n",
     "\n",
-    "    return df\n",
+    "    return normalized_responses\n",
     "\n",
     "#input: list of labels that will end up on the chart\n",
     "#output: translated to readable list, with multiples handled\n",

From 7e64d1298009c395ec8c68474fbfaca9dc892134 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 12:43:19 -0600
Subject: [PATCH 49/70] update dataframe create

previous change to vecorized operations was incomplete
---
 viz_scripts/survey_responses.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 809ffecc..8524eff5 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -138,13 +138,13 @@
     "    df = df_trips_w_surveys.reset_index()\n",
     "    \n",
     "    #normalize the survey responses\n",
-    "    normalized_responses = pd.json_normalize(df.apply(get_response_to_normalize))\n",
-    "    normalized_responses['survey_name'] = df.apply(lambda x: x.survey_name)\n",
+    "    normalized_responses = pd.json_normalize(df.apply(get_response_to_normalize, axis=1))\n",
+    "    normalized_responses['survey_name'] = df['survey_name']\n",
     "\n",
     "    #update the column names\n",
     "    rename_nests = {}\n",
     "    for col in normalized_responses.columns:\n",
-    "        rename_nests[col] = normalized_responses.split('.')[-1]\n",
+    "        rename_nests[col] = col.split('.')[-1]\n",
     "\n",
     "    normalized_responses = normalized_responses.rename(columns=rename_nests)\n",
     "\n",

From feb80dc7f39734925ce08a3460b297601db4b929 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 18:32:15 -0600
Subject: [PATCH 50/70] bump up the emcommon version to latest release

---
 viz_scripts/docker/environment36.dashboard.additions.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viz_scripts/docker/environment36.dashboard.additions.yml b/viz_scripts/docker/environment36.dashboard.additions.yml
index 94e55988..1b63535f 100644
--- a/viz_scripts/docker/environment36.dashboard.additions.yml
+++ b/viz_scripts/docker/environment36.dashboard.additions.yml
@@ -8,4 +8,4 @@ dependencies:
 - pip:
   - nbparameterise==0.6
   - devcron==0.4
-  - git+https://github.com/JGreenlee/e-mission-common@0.4.0
+  - git+https://github.com/JGreenlee/e-mission-common@0.4.3

From 2dbd7235a8b74da3c1836dd67d92edbda38c9e35 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 18:34:11 -0600
Subject: [PATCH 51/70] reintroduce "assigned" surveys, update quality text

---
 viz_scripts/survey_responses.ipynb | 177 ++++++++++++-----------------
 1 file changed, 70 insertions(+), 107 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 8524eff5..e34085fb 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -140,6 +140,8 @@
     "    #normalize the survey responses\n",
     "    normalized_responses = pd.json_normalize(df.apply(get_response_to_normalize, axis=1))\n",
     "    normalized_responses['survey_name'] = df['survey_name']\n",
+    "    normalized_responses['user_id'] = df['user_id']\n",
+    "    \n",
     "\n",
     "    #update the column names\n",
     "    rename_nests = {}\n",
@@ -183,7 +185,43 @@
     "            },\n",
     "        orient='index', columns=[\"value\"])\n",
     "    \n",
-    "    return debug_df"
+    "    return debug_df\n",
+    "\n",
+    "#color dictionary\n",
+    "from collections import OrderedDict\n",
+    "def mapping_color_surveys(dic_options):\n",
+    "    dictionary_values = (list(OrderedDict.fromkeys(dic_options.values())))\n",
+    "    \n",
+    "    colors = {}\n",
+    "    for i in range(len(dictionary_values)):\n",
+    "        colors[dictionary_values[i]] = plt.cm.tab10.colors[i%10]\n",
+    "    \n",
+    "    return colors\n",
+    "\n",
+    "#workaround missing colors\n",
+    "def get_survey_colors(labels, existing_map):\n",
+    "    color_map = {}\n",
+    "    for label in labels:\n",
+    "        l_labels = label.split(\"\\n\")\n",
+    "        color = (0,0,0)\n",
+    "        n = 0\n",
+    "        for i in range(len(l_labels)):\n",
+    "            try:\n",
+    "                color = tuple(map(lambda i, j: i + j, color, existing_map[l_labels[i]]))\n",
+    "            except:\n",
+    "                print(\"missing color\")\n",
+    "                color = tuple(map(lambda i, j: i + j, color, (0.1, 0.2, 0.5)))\n",
+    "            \n",
+    "            n += 1\n",
+    "            \n",
+    "        print(color, n)\n",
+    "        color = [x/n for x in color]\n",
+    "        \n",
+    "        color_map[label] = color\n",
+    "    \n",
+    "    print(color_map)\n",
+    "    \n",
+    "    return color_map"
    ]
   },
   {
@@ -226,44 +264,15 @@
     "    survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
     "    \n",
     "else:\n",
-    "    survey_trips = pd.DataFrame()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "52474e83",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#TODO get the bluetooth modes\n",
-    "#HACK to get the bluetooth mode\n",
-    "\n",
-    "# import emission.storage.decorations.trip_queries as esdt\n",
-    "# import emission.storage.timeseries.abstract_timeseries as esta\n",
-    "\n",
-    "# def get_confirmed_mode(trip, ble_vehicle_mapping):\n",
-    "# #     ts = esta.TimeSeries.get_time_series(trip.user_id)\n",
-    "# #     trip_obj = ts.get_entry_from_id(\"analysis/cleaned_trip\", trip._id)\n",
-    "# #     matching_beacon = esdt.get_user_input_for_timeline_entry(ts, trip_obj, \"background/bluetooth_ble\")\n",
-    "    \n",
-    "#     matching_beacon = esdt.get_user_input_for_trip(\"analysis/confirmed_trip\", trip._id, trip.user_id, \"background/bluetooth_ble\")\n",
-    "    \n",
-    "\n",
-    "#     return matching_beacon\n",
-    "\n",
-    "# all_confirmed_trips.iloc[6].user_input\n",
-    "# all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
-    "# print(get_confirmed_mode(all_confirmed_trips.iloc[6], {}))\n",
-    "# all_confirmed_trips['confirmedMode'] = all_confirmed_trips.apply(lambda trip: get_confirmed_mode(trip, {}), axis=1)\n",
+    "    survey_trips = pd.DataFrame()\n",
     "\n",
-    "# print(all_confirmed_trips.confirmedMode.unique())"
+    "survey_trips.groupby('survey_name').count()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7b7001a9",
+   "id": "c751b118",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -272,17 +281,6 @@
     "print(\"Questions dictionary:\\n\",quest_dict)\n",
     "print(\"Options dictionary:\\n\", opt_dict)\n",
     "\n",
-    "#color dictionary\n",
-    "from collections import OrderedDict\n",
-    "def mapping_color_surveys(dic_options):\n",
-    "    dictionary_values = (list(OrderedDict.fromkeys(dic_options.values())))\n",
-    "    \n",
-    "    colors = {}\n",
-    "    for i in range(len(dictionary_values)):\n",
-    "        colors[dictionary_values[i]] = plt.cm.tab10.colors[i%10]\n",
-    "    \n",
-    "    return colors\n",
-    "\n",
     "color_map = mapping_color_surveys(opt_dict)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
@@ -297,26 +295,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#TODO - work on the total dfs\n",
     "#create the total dfs\n",
-    "# total_dfs = {}\n",
-    "# #for conditional surveys!\n",
-    "# wrapped_config = {'survey_info': survey_info}\n",
-    "# if 'buttons' in survey_info.keys():\n",
-    "#     all_confirmed_trips['survey_name_prompted'] = all_confirmed_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
-    "    \n",
-    "#     for survey_name in list(sheet_list.keys()):\n",
-    "#         if survey_name in all_confirmed_trips['survey_name_prompted'].unique():\n",
-    "#             total_dfs[survey_name] = all_confirmed_trips[all_confirmed_trips['survey_name_prompted'] == survey_name]\n",
-    "#         else:\n",
-    "#             #never prompted\n",
-    "#             total_dfs[survey_name] = pd.DataFrame()\n",
-    "# else:\n",
-    "#     survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
-    "#     total_dfs[survey_name] = all_confirmed_trips\n",
+    "total_dfs = {}\n",
+    "#for conditional surveys!\n",
+    "wrapped_config = {'survey_info': survey_info}\n",
+    "if 'buttons' in survey_info.keys():\n",
+    "    all_confirmed_trips = all_confirmed_trips[all_confirmed_trips['ble_sensed_summary'].notna()]\n",
+    "    all_confirmed_trips[\"confirmedMode_baseMode\"] = all_confirmed_trips.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n",
+    "    all_confirmed_trips['survey_name_prompted'] = all_confirmed_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
     "    \n",
-    "# for key in total_dfs.keys():\n",
-    "#     print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")"
+    "    for survey_name in list(sheet_list.keys()):\n",
+    "        if survey_name in all_confirmed_trips['survey_name_prompted'].unique():\n",
+    "            total_dfs[survey_name] = all_confirmed_trips[all_confirmed_trips['survey_name_prompted'] == survey_name]\n",
+    "        else:\n",
+    "            #never prompted\n",
+    "            total_dfs[survey_name] = pd.DataFrame()\n",
+    "else:\n",
+    "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
+    "    total_dfs[survey_name] = all_confirmed_trips\n",
+    "\n",
+    "for key in total_dfs.keys():\n",
+    "    if len(total_dfs[key]) > 0:\n",
+    "        print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")\n",
+    "    else:\n",
+    "        print(key, \":\", len(total_dfs[key]), \"trips\")"
    ]
   },
   {
@@ -332,39 +334,6 @@
     "df_responses = df_responses.groupby(level=0, axis=1).apply(lambda x: x.apply(sjoin, axis=1))"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d4e91a23",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#workaround missing colors\n",
-    "def get_survey_colors(labels, existing_map):\n",
-    "    color_map = {}\n",
-    "    for label in labels:\n",
-    "        l_labels = label.split(\"\\n\")\n",
-    "        color = (0,0,0)\n",
-    "        n = 0\n",
-    "        for i in range(len(l_labels)):\n",
-    "            try:\n",
-    "                color = tuple(map(lambda i, j: i + j, color, existing_map[l_labels[i]]))\n",
-    "            except:\n",
-    "                print(\"missing color\")\n",
-    "                color = tuple(map(lambda i, j: i + j, color, (0.1, 0.2, 0.5)))\n",
-    "            \n",
-    "            n += 1\n",
-    "            \n",
-    "        print(color, n)\n",
-    "        color = [x/n for x in color]\n",
-    "        \n",
-    "        color_map[label] = color\n",
-    "    \n",
-    "    print(color_map)\n",
-    "    \n",
-    "    return color_map"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -377,29 +346,23 @@
     "#create one plot per question in the survey\n",
     "for survey_name in quest_dict.keys():\n",
     "    print(\"Charts for:\", survey_name)\n",
-    "    #TODO - fix debug df\n",
-    "#     debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
-    "    debug_df = pd.DataFrame()\n",
+    "    debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
     "\n",
     "    for col in quest_dict[survey_name].keys():\n",
-    "        \n",
-    "        print(col)\n",
-    "        \n",
     "        filename = col + file_suffix\n",
     "        plot_title_no_quality = survey_name + \"\\n\" + quest_dict[survey_name][col]\n",
     "\n",
     "        try:\n",
     "            quest_frame = df_responses.copy()\n",
+    "            quest_frame = quest_frame[quest_frame['survey_name'] == survey_name]\n",
+    "            quest_frame[col].replace('', np.nan, inplace=True)\n",
+    "            quest_frame.dropna(subset=[col], inplace=True)\n",
     "            \n",
-    "            #TODO - fix quality text\n",
-    "#             qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
-    "            qual_text = \"debug qual text later\"\n",
+    "            qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
     "            plot_title = plot_title_no_quality+'\\n'+qual_text\n",
     "            \n",
     "            fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
-    "            \n",
-    "            # We will have text results corresponding to the axes for simplicity and consistency\n",
-    "            \n",
+    "\n",
     "            quest_frame[col] = quest_frame[col].apply(lambda x: traslate_options(x, opt_dict))\n",
     "            text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]  \n",
     "\n",

From b02c1de1e19c852ae7231cc6a981763697715ed5 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 19:08:56 -0600
Subject: [PATCH 52/70] update import conventions

---
 viz_scripts/survey_responses.ipynb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index e34085fb..c270aa50 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -29,13 +29,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from collections import defaultdict\n",
+    "import collections.defaultdict as defaultdict\n",
+    "import collections.OrderedDict as OrderedDict\n",
     "import urllib.request\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "from xml.dom import minidom\n",
+    "import xml.dom.minidom as minidom\n",
     "\n",
-    "from emcommon.survey import conditional_surveys\n",
+    "import emcommon.survey.conditional_surveys as conditional_surveys\n",
     "\n",
     "from plots import *\n",
     "import scaffolding\n",

From 5e79051325c4977c47e309943e9281033bda18fa Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 19:28:05 -0600
Subject: [PATCH 53/70] remove outdated code

introduced when relying on sections for survey filter, no longer in use
---
 viz_scripts/scaffolding.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index da6c3f12..a2cff03c 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -58,13 +58,6 @@ def load_all_confirmed_trips(tq):
     disp.display(all_ct.head())
     return all_ct
 
-def load_all_composite_trips(tq):
-    agg = esta.TimeSeries.get_aggregate_time_series()
-    all_ct = agg.get_data_df("analysis/composite_trip", tq)
-    print("Loaded all composite trips of length %s" % len(all_ct))
-    disp.display(all_ct.head())
-    return all_ct
-
 def load_all_participant_trips(program, tq, load_test_users):
     participant_list = get_participant_uuids(program, load_test_users)
     all_ct = load_all_confirmed_trips(tq)

From 7dbef2de440407611de4c4b05f403e0fbb7f6e45 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 19:28:37 -0600
Subject: [PATCH 54/70] update import styles

---
 viz_scripts/survey_responses.ipynb | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index c270aa50..e43bf83a 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -29,8 +29,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import collections.defaultdict as defaultdict\n",
-    "import collections.OrderedDict as OrderedDict\n",
+    "import collections\n",
     "import urllib.request\n",
     "import numpy as np\n",
     "import pandas as pd\n",
@@ -189,9 +188,8 @@
     "    return debug_df\n",
     "\n",
     "#color dictionary\n",
-    "from collections import OrderedDict\n",
     "def mapping_color_surveys(dic_options):\n",
-    "    dictionary_values = (list(OrderedDict.fromkeys(dic_options.values())))\n",
+    "    dictionary_values = (list(collections.OrderedDict.fromkeys(dic_options.values())))\n",
     "    \n",
     "    colors = {}\n",
     "    for i in range(len(dictionary_values)):\n",

From 39d1156f3136e722d231a13ae45cbda429a2bab8 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 22:47:40 -0600
Subject: [PATCH 55/70] remove reliance on "total" sets

due to ongoing debugging of the ble summaries and their processing, removing reliance on that for now. Concretely, this means that there is no quality text and the debug_df is just empty
---
 viz_scripts/survey_responses.ipynb | 54 ++++++++++++++++--------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index e43bf83a..fba6a32a 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -294,30 +294,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#create the total dfs\n",
-    "total_dfs = {}\n",
-    "#for conditional surveys!\n",
-    "wrapped_config = {'survey_info': survey_info}\n",
-    "if 'buttons' in survey_info.keys():\n",
-    "    all_confirmed_trips = all_confirmed_trips[all_confirmed_trips['ble_sensed_summary'].notna()]\n",
-    "    all_confirmed_trips[\"confirmedMode_baseMode\"] = all_confirmed_trips.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n",
-    "    all_confirmed_trips['survey_name_prompted'] = all_confirmed_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
+    "# #create the total dfs\n",
+    "# total_dfs = {}\n",
+    "# #for conditional surveys!\n",
+    "# wrapped_config = {'survey_info': survey_info}\n",
+    "# if 'buttons' in survey_info.keys():\n",
+    "#     all_confirmed_trips = all_confirmed_trips[all_confirmed_trips['ble_sensed_summary'].notna()]\n",
+    "#     all_confirmed_trips[\"confirmedMode_baseMode\"] = all_confirmed_trips.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n",
+    "#     all_confirmed_trips['survey_name_prompted'] = all_confirmed_trips.apply(lambda row: conditional_surveys.survey_prompted_for_trip(row.to_dict(), wrapped_config), axis=1)\n",
     "    \n",
-    "    for survey_name in list(sheet_list.keys()):\n",
-    "        if survey_name in all_confirmed_trips['survey_name_prompted'].unique():\n",
-    "            total_dfs[survey_name] = all_confirmed_trips[all_confirmed_trips['survey_name_prompted'] == survey_name]\n",
-    "        else:\n",
-    "            #never prompted\n",
-    "            total_dfs[survey_name] = pd.DataFrame()\n",
-    "else:\n",
-    "    survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
-    "    total_dfs[survey_name] = all_confirmed_trips\n",
+    "#     for survey_name in list(sheet_list.keys()):\n",
+    "#         if survey_name in all_confirmed_trips['survey_name_prompted'].unique():\n",
+    "#             total_dfs[survey_name] = all_confirmed_trips[all_confirmed_trips['survey_name_prompted'] == survey_name]\n",
+    "#         else:\n",
+    "#             #never prompted\n",
+    "#             total_dfs[survey_name] = pd.DataFrame()\n",
+    "# else:\n",
+    "#     survey_name = list(sheet_list.keys())[0] #there is only one if non-conditional\n",
+    "#     total_dfs[survey_name] = all_confirmed_trips\n",
     "\n",
-    "for key in total_dfs.keys():\n",
-    "    if len(total_dfs[key]) > 0:\n",
-    "        print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")\n",
-    "    else:\n",
-    "        print(key, \":\", len(total_dfs[key]), \"trips\")"
+    "# for key in total_dfs.keys():\n",
+    "#     if len(total_dfs[key]) > 0:\n",
+    "#         print(key, \":\", len(total_dfs[key]), \"trips\", total_dfs[key].user_id.nunique(), \"users\")\n",
+    "#     else:\n",
+    "#         print(key, \":\", len(total_dfs[key]), \"trips\")"
    ]
   },
   {
@@ -345,8 +345,9 @@
     "#create one plot per question in the survey\n",
     "for survey_name in quest_dict.keys():\n",
     "    print(\"Charts for:\", survey_name)\n",
-    "    debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
-    "\n",
+    "#     debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
+    "    debug_df = pd.DataFrame() #survey filtering still buggy, omitting all features of \"num trips survey presented\"\n",
+    "    \n",
     "    for col in quest_dict[survey_name].keys():\n",
     "        filename = col + file_suffix\n",
     "        plot_title_no_quality = survey_name + \"\\n\" + quest_dict[survey_name][col]\n",
@@ -357,8 +358,9 @@
     "            quest_frame[col].replace('', np.nan, inplace=True)\n",
     "            quest_frame.dropna(subset=[col], inplace=True)\n",
     "            \n",
-    "            qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
-    "            plot_title = plot_title_no_quality+'\\n'+qual_text\n",
+    "#             qual_text = scaffolding.get_quality_text(total_dfs[survey_name], quest_frame, mode_of_interest, include_test_users)\n",
+    "#             plot_title = plot_title_no_quality+'\\n'+qual_text\n",
+    "            plot_title = plot_title_no_quality\n",
     "            \n",
     "            fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
     "\n",

From 7c7a718e2bc02b0042de6f9504658dd65113b511 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Mon, 6 May 2024 23:15:22 -0600
Subject: [PATCH 56/70] push legend below chart if the first label is long

this puts the legend for longer-response surveys below the chart, while shorter ones remain to the right
---
 viz_scripts/plots.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py
index 7dcba2cc..1eaa2cb7 100644
--- a/viz_scripts/plots.py
+++ b/viz_scripts/plots.py
@@ -130,8 +130,13 @@ def plot_and_text_stacked_bar_chart(df, bar_label, ax, text_result, colors, debu
         ax.tick_params(axis='y', labelsize=18)
         ax.tick_params(axis='x', labelsize=18, rotation=90)
         ncols = len(df_only_small)//5 if len(df_only_small) % 5 == 0 else len(df_only_small)//5 + 1
-        ax.legend(bbox_to_anchor=(1, 0), loc='lower left', fancybox=True, shadow=True, fontsize=15)
-        # ax.legend(bbox_to_anchor=(1, 1), loc='upper left', fancybox=True, shadow=True, fontsize=15, ncols=ncols)
+        
+        if len(pd.unique(df_only_small['Label'])[0]) > 15:
+            ax.legend(bbox_to_anchor=(0.5, -0.5), loc='upper center', fancybox=True, shadow=True, fontsize=15)
+        else:
+            ax.legend(bbox_to_anchor=(1, 0), loc='lower left', fancybox=True, shadow=True, fontsize=15)
+            # ax.legend(bbox_to_anchor=(1, 1), loc='upper left', fancybox=True, shadow=True, fontsize=15, ncols=ncols)
+            
         # Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/
         ax.set_xlim(right=ax.get_xlim()[1] + 1.0, auto=True)
         text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label)

From 6c38e9727e1bcec91422bd681bc2b61fea3ce5a9 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 08:33:22 -0600
Subject: [PATCH 57/70] update frontend for metrics

make stacked bar plots larger, add the surveys to the list of stacked metrics so they can have "more info" buttons
---
 frontend/index.html                 | 11 ++++++-----
 frontend/metrics_study_surveys.html |  9 +++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index 761ab019..3d9b02c5 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -459,7 +459,7 @@
               load_file = "metrics_study_surveys.html"
               $.get(load_file, function (file) {
                 Object.entries(quest_dict).forEach(([key, value]) => {
-                  var text = '<option ' + 'value="' + key + '" data-sizex="4" data-sizey="4">' + value + '</option>';
+                  var text = '<option ' + 'value="' + key + '" data-sizex="10" data-sizey="4">' + value + '</option>';
                   file = file.concat('\n', text);
                 });
                 console.log("configuring units");
@@ -578,7 +578,7 @@
         }
       }).data('gridster');
 
-      $('.js-add-new').on('click', function () {
+      $('.js-add-new').on('click', async function () {
         const metric = $("#metric").val();
         const dateVal = $("#date").val();
         const program = $("#program").val();
@@ -596,10 +596,11 @@
         const htmlFile = "plots/" + metric + "_" + dateVal + program + ".html";
         const altTextFile = "plots/" + metric + "_" + dateVal + program + ".txt";
         const altText = loadFile(altTextFile);
-        const isStackedMetric = ['ntrips_total', 'ntrips_purpose', 'ntrips_under80', 'ntrips_commute_mode_confirm',
+        const quest_dict = await getDictionaryList(sheet_list)
+        const stackedMetrics = ['ntrips_total', 'ntrips_purpose', 'ntrips_under80', 'ntrips_commute_mode_confirm',
                                  'total_trip_length', 'total_trip_length_land',`ntrips_${mode_studied}_total`,
-                                 `ntrips_${mode_studied}_purpose`,`total_trip_length_${mode_studied}_replaced_mode`]
-                                 .includes(metric);
+                                 `ntrips_${mode_studied}_purpose`,`total_trip_length_${mode_studied}_replaced_mode`].concat(Object.keys(quest_dict));
+        const isStackedMetric = stackedMetrics.includes(metric);
         const jsonData = { metric, dateVal, program, metricLabel, dateLabel, programLabel, sizex, sizey };
 
         if (isStackedMetric){
diff --git a/frontend/metrics_study_surveys.html b/frontend/metrics_study_surveys.html
index 2eea75b6..d93fbc75 100644
--- a/frontend/metrics_study_surveys.html
+++ b/frontend/metrics_study_surveys.html
@@ -1,10 +1,11 @@
 <!-- htmnl options should be 1 per chart question -->
 <!-- <option value="question key" data-sizex="4" data-sizey="4">translated question</option> -->
 
-<option value="ntrips_sensed_mode" data-sizex="4" data-sizey="4">Number of trips (sensed)</option>
-<option value="ntrips_under10miles_sensed_mode" data-sizex="4" data-sizey="4">Trip count under 80th Percentile (sensed)</option>
-<option value="miles_sensed_mode" data-sizex="4" data-sizey="4">Trip distance (${data.display_config.use_imperial}) by mode (sensed)</option>
-<option value="miles_sensed_mode_land" data-sizex="4" data-sizey="4">Trip distance by land mode (sensed)</option>
+<option value="ntrips_total" data-sizex="10" data-sizey="4">Number of trips</option>
+<option value="ntrips_under80" data-sizex="10" data-sizey="4">Number of trips (under 80th percentile of total trips)</option>
+<option value="total_trip_length" data-sizex="10" data-sizey="4">Total trip length (${data.display_config.use_imperial}) covered by mode</option>
+<option value="total_trip_length_land" data-sizex="10" data-sizey="4">Total trip length (${data.display_config.use_imperial}) covered by mode in land</option>
+<option value="average_miles_mode_confirm" data-sizex="6" data-sizey="4">Average trip length (${data.display_config.use_imperial})</option>
 <option value="average_miles_sensed_mode" data-sizex="6" data-sizey="4">Average trip length (${data.display_config.use_imperial}) (sensed)</option>
 <option value="ntrips_per_day" data-sizex="6" data-sizey="4">Trip frequency</option>
 <option value="ntrips_sensed_per_day" data-sizex="6" data-sizey="4">Trip frequency (sensed)</option>

From a57a30f31e84d84e68421659c658709fb1bd4945 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 12:00:50 -0600
Subject: [PATCH 58/70] remove olde code

removed stale lines that were breaking notebook syntax!
---
 viz_scripts/survey_responses.ipynb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index fba6a32a..4450db24 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -18,8 +18,6 @@
     "dynamic_labels = {}\n",
     "use_imperial = False\n",
     "survey_info =  {}"
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-onboarding-v1.xml\",\n",
-    "        \"version\": 1,\n",
    ]
   },
   {

From b207efa5931f3b292d556d31c488510e0393f6c5 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 14:23:43 -0600
Subject: [PATCH 59/70] create survey_metrics notebook

create a dedicated notebook for handling metrics on survey deployments

currently option to show the "bluetooth sensed" mode is commented out -- this just shows a lot of unkowns, would this one day be the exact vehicles and how much they are used? or just car/ecar?
---
 viz_scripts/survey_metrics.ipynb | 277 +++++++++++++++++++++++++++++++
 1 file changed, 277 insertions(+)
 create mode 100644 viz_scripts/survey_metrics.ipynb

diff --git a/viz_scripts/survey_metrics.ipynb b/viz_scripts/survey_metrics.ipynb
new file mode 100644
index 00000000..392041dc
--- /dev/null
+++ b/viz_scripts/survey_metrics.ipynb
@@ -0,0 +1,277 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ed6ab331",
+   "metadata": {},
+   "source": [
+    "## Generate Static Graphs -- Metrics for Survey Deployments"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a3506947",
+   "metadata": {},
+   "source": [
+    "These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.\n",
+    "\n",
+    "Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4fb04713",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "year = 2024\n",
+    "month = 11\n",
+    "program = \"default\"\n",
+    "study_type = \"study\"\n",
+    "include_test_users = False\n",
+    "dynamic_labels = {}\n",
+    "use_imperial = True\n",
+    "sensed_algo_prefix = \"cleaned\"\n",
+    "bluetooth_only = True #current proxy for fleet status\n",
+    "survey_info = {}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "764463bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import defaultdict\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from plots import *\n",
+    "import scaffolding\n",
+    "\n",
+    "sns.set_style(\"whitegrid\")\n",
+    "sns.set()\n",
+    "%matplotlib inline\n",
+    "\n",
+    "# get metric vs imperial vars\n",
+    "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)\n",
+    "\n",
+    "# get color mappings\n",
+    "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels({}, {}, {}) #just need sensed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "083483be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do not run this notebook at all unless it is for a survey configuration; nbclient will run up through this cell\n",
+    "if not survey_info['trip-labels'] == 'ENKETO':\n",
+    "    ipython = get_ipython()\n",
+    "    ipython._showtraceback = scaffolding.no_traceback_handler\n",
+    "    raise Exception(\"The plots in this notebook are only relevant to deployments with trip-level surveys\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed44bc42",
+   "metadata": {},
+   "source": [
+    "## Collect Data From Database"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6805564",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = scaffolding.load_viz_notebook_sensor_inference_data(year,\n",
+    "                                                                            month,\n",
+    "                                                                            program,\n",
+    "                                                                            include_test_users,\n",
+    "                                                                            sensed_algo_prefix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9b54e16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#if fleet, replace primary_mode with primary_ble_sensed mode\n",
+    "#RESTORE WHEN BLUETOOTH MODE IS RELIABLE\n",
+    "# if bluetooth_only:\n",
+    "#     expanded_ct_sensed = expanded_ct_sensed[expanded_ct_sensed['ble_sensed_summary'].notna()]\n",
+    "#     expanded_ct_sensed[\"primary_mode\"] = expanded_ct_sensed.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "731c35e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n",
+    "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n",
+    "stacked_bar_quality_text_sensed"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa72afe9",
+   "metadata": {},
+   "source": [
+    "## Trips - count and distance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3be240e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_name = f'ntrips_total{file_suffix_sensed}'\n",
+    "plot_title_no_quality= \"Number of trips for each mode (selected by users)\"\n",
+    "\n",
+    "try:\n",
+    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    # We will have text results corresponding to the axes for simplicity and consistency\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Count of Trips\\n\"+stacked_bar_quality_text_sensed, ax[0], text_results[0], colors_sensed, debug_df_sensed)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Distance of Trips\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
+    "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+    "    plt.clf()\n",
+    "    generate_missing_plot(plot_title_no_quality, debug_df_sensed, file_name)\n",
+    "    alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality)        \n",
+    "    alt_html = store_alt_html_missing(debug_df_sensed, file_name, plot_title_no_quality)\n",
+    "except Exception as e:\n",
+    "    # TODO: Future cleanup can pass in just the figure and have the function choose the last axis\n",
+    "    fig, ax = plt.subplots()\n",
+    "    plot_and_text_error(e, ax, file_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a28891cc",
+   "metadata": {},
+   "source": [
+    "## Trips under 80%"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1beae73e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_name = f'ntrips_under80{file_suffix_sensed}'\n",
+    "\n",
+    "try:\n",
+    "    # Preprocess to find cutoff and filter below cutoff\n",
+    "    # For simplicity, and to aid in comparison, we have a single cutoff based on the total number of trips\n",
+    "    cutoff = expanded_ct_sensed.distance.quantile(0.8)\n",
+    "    if pd.isna(cutoff):\n",
+    "        cutoff = 0\n",
+    "    dist_threshold = expanded_ct_sensed[distance_col].quantile(0.8).round(1)\n",
+    "    dist_threshold = str(dist_threshold) \n",
+    "\n",
+    "    plot_title_no_quality=\"Number of trips per travel model under \" + dist_threshold + \" \" + label_units_lower\n",
+    "    plot_title_no_quality=plot_title_no_quality+\"\\n[\"+dist_threshold + \" \" + label_units_lower+\" represents 80th percentile of trip length]\"\n",
+    "\n",
+    "    expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n",
+    "    sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n",
+    "\n",
+    "    # Plot entries\n",
+    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax, text_results, colors_sensed, debug_df_sensed)\n",
+    "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
+    "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+    "    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
+    "    # here, our pre-processing only relies on sensed data, so we use the debug_df_sensed\n",
+    "    plt.clf()\n",
+    "    plot_title_default = \"Number of trips below 80th percentile in each mode\"\n",
+    "    generate_missing_plot(plot_title_default, debug_df_sensed, file_name)\n",
+    "    alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_default)\n",
+    "    alt_html = store_alt_html_missing(debug_df_sensed, file_name, plot_title_no_quality)\n",
+    "except Exception as e:\n",
+    "    fig, ax = plt.subplots()\n",
+    "    plot_and_text_error(e, ax, file_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5454e0f",
+   "metadata": {},
+   "source": [
+    "## Land mode distances"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39da6b85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_title_no_quality= \"Total trip length (\" + label_units_lower + \") covered by each mode by land\"\n",
+    "file_name =f\"total_trip_length_land{file_suffix_sensed}\"\n",
+    "\n",
+    "try:\n",
+    "    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
+    "    ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
+    "    sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
+    "    \n",
+    "    sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
+    "\n",
+    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
+    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    plot_and_text_stacked_bar_chart(sensed_land_trips_df.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax, text_results, colors_sensed, debug_df_sensed)\n",
+    "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
+    "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+    "    plt.clf()\n",
+    "    generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
+    "    alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)        \n",
+    "    alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n",
+    "except Exception as e:\n",
+    "    fig, ax = plt.subplots()\n",
+    "    plot_and_text_error(e, ax, file_name)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 695750093a627158c11634f102d2b562273df90f Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 14:33:04 -0600
Subject: [PATCH 60/70] add survey metrics to the front end

naming the charts from the specific notebook so they don't get overwritten by the other nbs with a _survey suffix
---
 frontend/metrics_study_surveys.html | 10 +++-------
 viz_scripts/docker/crontab          |  1 +
 viz_scripts/survey_metrics.ipynb    |  6 +++---
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/frontend/metrics_study_surveys.html b/frontend/metrics_study_surveys.html
index d93fbc75..8c9bd262 100644
--- a/frontend/metrics_study_surveys.html
+++ b/frontend/metrics_study_surveys.html
@@ -1,14 +1,10 @@
 <!-- htmnl options should be 1 per chart question -->
 <!-- <option value="question key" data-sizex="4" data-sizey="4">translated question</option> -->
 
-<option value="ntrips_total" data-sizex="10" data-sizey="4">Number of trips</option>
-<option value="ntrips_under80" data-sizex="10" data-sizey="4">Number of trips (under 80th percentile of total trips)</option>
-<option value="total_trip_length" data-sizex="10" data-sizey="4">Total trip length (${data.display_config.use_imperial}) covered by mode</option>
-<option value="total_trip_length_land" data-sizex="10" data-sizey="4">Total trip length (${data.display_config.use_imperial}) covered by mode in land</option>
-<option value="average_miles_mode_confirm" data-sizex="6" data-sizey="4">Average trip length (${data.display_config.use_imperial})</option>
+<option value="ntrips_total_survey" data-sizex="10" data-sizey="4">Number of trips</option>
+<option value="ntrips_under80_survey" data-sizex="10" data-sizey="4">Number of trips (under 80th percentile of total trips)</option>
+<option value="total_trip_length_land_survey" data-sizex="10" data-sizey="4">Total trip length (${data.display_config.use_imperial}) covered by mode in land</option>
 <option value="average_miles_sensed_mode" data-sizex="6" data-sizey="4">Average trip length (${data.display_config.use_imperial}) (sensed)</option>
-<option value="ntrips_per_day" data-sizex="6" data-sizey="4">Trip frequency</option>
 <option value="ntrips_sensed_per_day" data-sizex="6" data-sizey="4">Trip frequency (sensed)</option>
-<option value="ntrips_per_weekday" data-sizex="6" data-sizey="4">Trip frequency (weekday)</option>
 <option value="ntrips_sensed_per_weekday" data-sizex="6" data-sizey="4">Trip frequency (weekday, sensed)</option>
 <option value="ts_users" data-sizex="8" data-sizey="2">Timeseries of active users</option>
\ No newline at end of file
diff --git a/viz_scripts/docker/crontab b/viz_scripts/docker/crontab
index 2ffcec9b..ca37fe5a 100644
--- a/viz_scripts/docker/crontab
+++ b/viz_scripts/docker/crontab
@@ -6,5 +6,6 @@
 0 8 * * * python bin/generate_plots.py mode_specific_timeseries.ipynb default >> /var/log/intake.stdinout 2>&1
 0 8 * * * python bin/generate_plots.py energy_calculations.ipynb default >> /var/log/intake.stdinout 2>&1
 0 8 * * * python bin/generate_plots.py survey_responses.ipynb default >> /var/log/intake.stdinout 2>&1
+0 8 * * * python bin/generate_plots.py survey_metrics.ipynb default >> /var/log/intake.stdinout 2>&1
 # For testing only
 # */5 * * * * python bin/generate_plots.py mode_purpose_share.ipynb default >> /var/log/intake.stdinout 2>&1
diff --git a/viz_scripts/survey_metrics.ipynb b/viz_scripts/survey_metrics.ipynb
index 392041dc..13702793 100644
--- a/viz_scripts/survey_metrics.ipynb
+++ b/viz_scripts/survey_metrics.ipynb
@@ -142,7 +142,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "file_name = f'ntrips_total{file_suffix_sensed}'\n",
+    "file_name = f'ntrips_total_survey{file_suffix_sensed}'\n",
     "plot_title_no_quality= \"Number of trips for each mode (selected by users)\"\n",
     "\n",
     "try:\n",
@@ -178,7 +178,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "file_name = f'ntrips_under80{file_suffix_sensed}'\n",
+    "file_name = f'ntrips_under80_survey{file_suffix_sensed}'\n",
     "\n",
     "try:\n",
     "    # Preprocess to find cutoff and filter below cutoff\n",
@@ -229,7 +229,7 @@
    "outputs": [],
    "source": [
     "plot_title_no_quality= \"Total trip length (\" + label_units_lower + \") covered by each mode by land\"\n",
-    "file_name =f\"total_trip_length_land{file_suffix_sensed}\"\n",
+    "file_name =f\"total_trip_length_land_survey{file_suffix_sensed}\"\n",
     "\n",
     "try:\n",
     "    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",

From 2e2e2691242a15709962cc7bd0081a9a7d9c9a1f Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 15:57:52 -0600
Subject: [PATCH 61/70] remove testing-only code

---
 frontend/index.html                |  7 ----
 viz_scripts/survey_responses.ipynb | 67 ++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index 3d9b02c5..f63ee133 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -345,11 +345,6 @@
       var current_month = start_month;
       var current_year = start_year;
 
-      //testing dfc fermata .. doesn't start until April...
-      // if ((current_month >= end_month) && (current_year >= end_year)) {
-      //   current_month = current_month - 2; //dfc has not started yet...
-      // }
-
       dates.push([current_month, current_year]);
       while (!(current_month == end_month && current_year == end_year)) {
         current_month += 1;
@@ -444,8 +439,6 @@
             sheet_list = []
             for (name in survey_list) {
               form_path = data.survey_info.surveys[survey_list[name]].formPath;
-              //hard code the old survey
-              // form_path = 'https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/fermata-ev-return-trip-v0.xml';
               //THIS ASSUMES THE FILENAME IS THE SAME AS THE FORM PATH BUT WITH xml FILE TYPE
               l_path = form_path.split('.')
               l_path.splice(l_path.length -1, 1, 'xml');
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 4450db24..0a2d5363 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -7,17 +7,68 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = 2024\n",
-    "month = 4\n",
+    "year = None\n",
+    "month = None\n",
     "year = None\n",
     "month = None\n",
     "program = \"default\"\n",
     "study_type = \"study\"\n",
-    "mode_of_interest = 'e-bike'\n",
-    "include_test_users = False\n",
+    "mode_of_interest = None\n",
+    "include_test_users = True\n",
     "dynamic_labels = {}\n",
     "use_imperial = False\n",
-    "survey_info =  {}"
+    "survey_info =  {\n",
+    "    \"surveys\": {\n",
+    "      \"UserProfileSurvey\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-onboarding-v1.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/demographic_survey\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      },\n",
+    "      \"DfcEvReturnTrip\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-return-trip-v1.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      },\n",
+    "      \"DfcEvRoamingTrip\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-roaming-trip-v1.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      },\n",
+    "      \"DfcGasTrip\": {\n",
+    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-gas-trip-v1.xml\",\n",
+    "        \"version\": 1,\n",
+    "        \"compatibleWith\": 1,\n",
+    "        \"dataKey\": \"manual/trip_user_input\",\n",
+    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
+    "      }\n",
+    "    },\n",
+    "    \"buttons\": {\n",
+    "      \"trip-label\": [\n",
+    "        {\n",
+    "          \"surveyName\": \"DfcGasTrip\",\n",
+    "          \"not-filled-in-label\": { \"en\": \"Gas Car Survey\" },\n",
+    "          \"showsIf\": \"confirmedMode?.baseMode == 'CAR'\"\n",
+    "        },\n",
+    "        {\n",
+    "          \"surveyName\": \"DfcEvRoamingTrip\",\n",
+    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
+    "          \"showsIf\": \"confirmedMode?.baseMode == 'E_CAR' && !pointIsWithinBounds(end_loc['coordinates'], [[-105.118, 39.719], [-105.115, 39.717]])\"\n",
+    "        },\n",
+    "        {\n",
+    "          \"surveyName\": \"DfcEvReturnTrip\",\n",
+    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
+    "          \"showsIf\": \"confirmedMode?.baseMode == 'E_CAR' && pointIsWithinBounds(end_loc['coordinates'], [[-105.118, 39.719], [-105.115, 39.717]])\"\n",
+    "        }\n",
+    "      ]\n",
+    "    },\n",
+    "    \"trip-labels\": \"ENKETO\"\n",
+    "  }"
    ]
   },
   {
@@ -249,7 +300,7 @@
     "#we need to filter out trips (based on if including test users)\n",
     "all_confirmed_trips = scaffolding.filter_composite_trips(all_confirmed_trips, program, include_test_users)\n",
     "\n",
-    "if len(all_confirmed_trips) > 0:\n",
+    "try:\n",
     "    #remove blank inputs\n",
     "    survey_trips = all_confirmed_trips[all_confirmed_trips['user_input'] != {}]\n",
     "\n",
@@ -260,7 +311,7 @@
     "    #gather the cols needed for charts and text\n",
     "    survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
     "    \n",
-    "else:\n",
+    "except:\n",
     "    survey_trips = pd.DataFrame()\n",
     "\n",
     "survey_trips.groupby('survey_name').count()"
@@ -344,7 +395,7 @@
     "for survey_name in quest_dict.keys():\n",
     "    print(\"Charts for:\", survey_name)\n",
     "#     debug_df = generate_debug_df(program, include_test_users, total_dfs[survey_name], total_dfs[survey_name][total_dfs[survey_name]['user_input'] != {} if len(total_dfs[survey_name]) > 0 else pd.DataFrame()])\n",
-    "    debug_df = pd.DataFrame() #survey filtering still buggy, omitting all features of \"num trips survey presented\"\n",
+    "    debug_df = generate_debug_df(program, include_test_users, all_confirmed_trips, df_responses) #survey filtering still buggy, omitting all features of \"num trips survey presented\"\n",
     "    \n",
     "    for col in quest_dict[survey_name].keys():\n",
     "        filename = col + file_suffix\n",

From f9f3ba5056e3e1dd35751ba0f847ec96671be571 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 15:58:11 -0600
Subject: [PATCH 62/70] use correct debug frame

---
 viz_scripts/survey_metrics.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/viz_scripts/survey_metrics.ipynb b/viz_scripts/survey_metrics.ipynb
index 13702793..19bc0e0d 100644
--- a/viz_scripts/survey_metrics.ipynb
+++ b/viz_scripts/survey_metrics.ipynb
@@ -244,9 +244,9 @@
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
-    "    generate_missing_plot(plot_title_no_quality, merged_debug_df, file_name)\n",
-    "    alt_text = store_alt_text_missing(merged_debug_df, file_name, plot_title_no_quality)        \n",
-    "    alt_html = store_alt_html_missing(merged_debug_df, file_name, plot_title_no_quality)\n",
+    "    generate_missing_plot(plot_title_no_quality, debug_df_sensed, file_name)\n",
+    "    alt_text = store_alt_text_missing(debug_df_sensed, file_name, plot_title_no_quality)        \n",
+    "    alt_html = store_alt_html_missing(debug_df_sensed, file_name, plot_title_no_quality)\n",
     "except Exception as e:\n",
     "    fig, ax = plt.subplots()\n",
     "    plot_and_text_error(e, ax, file_name)"

From 3d4c5fbb5fefb80065f28931eec08b07903cfca9 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 16:01:30 -0600
Subject: [PATCH 63/70] restore default parameters

---
 viz_scripts/survey_responses.ipynb | 61 ++----------------------------
 1 file changed, 4 insertions(+), 57 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 0a2d5363..0221826f 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -7,68 +7,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = None\n",
-    "month = None\n",
-    "year = None\n",
-    "month = None\n",
+    "year = 2024\n",
+    "month = 11\n",
     "program = \"default\"\n",
     "study_type = \"study\"\n",
     "mode_of_interest = None\n",
     "include_test_users = True\n",
     "dynamic_labels = {}\n",
-    "use_imperial = False\n",
-    "survey_info =  {\n",
-    "    \"surveys\": {\n",
-    "      \"UserProfileSurvey\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-onboarding-v1.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/demographic_survey\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      },\n",
-    "      \"DfcEvReturnTrip\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-return-trip-v1.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      },\n",
-    "      \"DfcEvRoamingTrip\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-ev-roaming-trip-v1.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      },\n",
-    "      \"DfcGasTrip\": {\n",
-    "        \"formPath\": \"https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/survey_resources/dfc-fermata/dfc-gas-trip-v1.xml\",\n",
-    "        \"version\": 1,\n",
-    "        \"compatibleWith\": 1,\n",
-    "        \"dataKey\": \"manual/trip_user_input\",\n",
-    "        \"labelTemplate\": { \"en\": \"Answered\" }\n",
-    "      }\n",
-    "    },\n",
-    "    \"buttons\": {\n",
-    "      \"trip-label\": [\n",
-    "        {\n",
-    "          \"surveyName\": \"DfcGasTrip\",\n",
-    "          \"not-filled-in-label\": { \"en\": \"Gas Car Survey\" },\n",
-    "          \"showsIf\": \"confirmedMode?.baseMode == 'CAR'\"\n",
-    "        },\n",
-    "        {\n",
-    "          \"surveyName\": \"DfcEvRoamingTrip\",\n",
-    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
-    "          \"showsIf\": \"confirmedMode?.baseMode == 'E_CAR' && !pointIsWithinBounds(end_loc['coordinates'], [[-105.118, 39.719], [-105.115, 39.717]])\"\n",
-    "        },\n",
-    "        {\n",
-    "          \"surveyName\": \"DfcEvReturnTrip\",\n",
-    "          \"not-filled-in-label\": { \"en\": \"EV Survey\" },\n",
-    "          \"showsIf\": \"confirmedMode?.baseMode == 'E_CAR' && pointIsWithinBounds(end_loc['coordinates'], [[-105.118, 39.719], [-105.115, 39.717]])\"\n",
-    "        }\n",
-    "      ]\n",
-    "    },\n",
-    "    \"trip-labels\": \"ENKETO\"\n",
-    "  }"
+    "use_imperial = True\n",
+    "survey_info = {}"
    ]
   },
   {

From 40c1478374877f3ffb4c5c7ff59042926a2c9d5e Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 16:37:11 -0600
Subject: [PATCH 64/70] add survey metrics to list of stacked

---
 frontend/index.html | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/frontend/index.html b/frontend/index.html
index f63ee133..393e05e6 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -589,9 +589,9 @@
         const htmlFile = "plots/" + metric + "_" + dateVal + program + ".html";
         const altTextFile = "plots/" + metric + "_" + dateVal + program + ".txt";
         const altText = loadFile(altTextFile);
-        const quest_dict = await getDictionaryList(sheet_list)
-        const stackedMetrics = ['ntrips_total', 'ntrips_purpose', 'ntrips_under80', 'ntrips_commute_mode_confirm',
-                                 'total_trip_length', 'total_trip_length_land',`ntrips_${mode_studied}_total`,
+        const quest_dict = await getDictionaryList(sheet_list);
+        const stackedMetrics = ['ntrips_total', 'ntrips_total_survey', 'ntrips_purpose', 'ntrips_under80', 'ntrips_under80_survey', 'ntrips_commute_mode_confirm',
+                                 'total_trip_length', 'total_trip_length_land', 'total_trip_length_land_survey',`ntrips_${mode_studied}_total`,
                                  `ntrips_${mode_studied}_purpose`,`total_trip_length_${mode_studied}_replaced_mode`].concat(Object.keys(quest_dict));
         const isStackedMetric = stackedMetrics.includes(metric);
         const jsonData = { metric, dateVal, program, metricLabel, dateLabel, programLabel, sizex, sizey };

From 6a367707bc825cc715c1644a9822715be8ac692f Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 17:08:54 -0600
Subject: [PATCH 65/70] handle empty dataframes better

errors encountered while testing
---
 viz_scripts/survey_responses.ipynb | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 0221826f..4b55f1da 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -257,11 +257,11 @@
     "\n",
     "    #gather the cols needed for charts and text\n",
     "    survey_trips = survey_trips[['survey_name', 'user_id', 'user_input']]\n",
+    "    survey_trips.groupby('survey_name').count()\n",
     "    \n",
     "except:\n",
     "    survey_trips = pd.DataFrame()\n",
-    "\n",
-    "survey_trips.groupby('survey_name').count()"
+    "\n"
    ]
   },
   {
@@ -279,7 +279,12 @@
     "color_map = mapping_color_surveys(opt_dict)\n",
     "\n",
     "#format survey trips into responses dataframe\n",
-    "df_responses = create_dataframe(survey_trips)\n",
+    "if len(survey_trips) > 0:\n",
+    "    df_responses = create_dataframe(survey_trips)\n",
+    "else:\n",
+    "    df_responses = survey_trips.copy()\n",
+    "    \n",
+    "    \n",
     "file_suffix = scaffolding.get_file_suffix(year, month, program)"
    ]
   },

From 41db5cfbcca86d6d5c8d2219c2d2a71b09f01305 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 17:33:57 -0600
Subject: [PATCH 66/70] handle "Other" values in color map

---
 viz_scripts/scaffolding.py         | 2 ++
 viz_scripts/survey_responses.ipynb | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index a2cff03c..7acfdf49 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -233,6 +233,8 @@ def mapping_color_surveys(dic_options):
     colors = {}
     for i in range(len(dictionary_values)):
         colors[dictionary_values[i]] = plt.cm.tab10.colors[i%10]
+    
+    colors['Other'] = plt.cm.tab10.colors[(i+1)%10]
 
     return colors
 
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 4b55f1da..8498b834 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -214,7 +214,7 @@
     "        \n",
     "        color_map[label] = color\n",
     "    \n",
-    "    print(color_map)\n",
+    "    color_map['Other'] = existing_map['Other']\n",
     "    \n",
     "    return color_map"
    ]

From fb7bc70ed7db6dea9a65677e4e4f1c634f51b313 Mon Sep 17 00:00:00 2001
From: Abby Wheelis <abby.wheelis@gmail.com>
Date: Tue, 7 May 2024 17:35:56 -0600
Subject: [PATCH 67/70] tidy code

---
 viz_scripts/survey_responses.ipynb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 8498b834..08f205ba 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -137,7 +137,6 @@
     "    normalized_responses = pd.json_normalize(df.apply(get_response_to_normalize, axis=1))\n",
     "    normalized_responses['survey_name'] = df['survey_name']\n",
     "    normalized_responses['user_id'] = df['user_id']\n",
-    "    \n",
     "\n",
     "    #update the column names\n",
     "    rename_nests = {}\n",
@@ -241,7 +240,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#load all of the composite trips - require sections\n",
     "tq = scaffolding.get_time_query(year, month)\n",
     "all_confirmed_trips = scaffolding.load_all_confirmed_trips(tq)\n",
     "#we need to filter out trips (based on if including test users)\n",

From 334c95bfa19e720d73a9d1b7cb0ee604f0fdc722 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 7 May 2024 23:36:37 -0700
Subject: [PATCH 68/70] =?UTF-8?q?=F0=9F=94=A5=20Remove=20git=20and=20em-co?=
 =?UTF-8?q?mmon=20since=20they=20are=20in=20the=20server=20repo=20already?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As part of
https://github.com/e-mission/e-mission-server/pull/965
and
https://github.com/e-mission/e-mission-server/commit/d33ce2e7f75c3a9b2e5dd2a19d40faa9218b11b0
---
 viz_scripts/docker/environment36.dashboard.additions.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/viz_scripts/docker/environment36.dashboard.additions.yml b/viz_scripts/docker/environment36.dashboard.additions.yml
index 1b63535f..59d26ebc 100644
--- a/viz_scripts/docker/environment36.dashboard.additions.yml
+++ b/viz_scripts/docker/environment36.dashboard.additions.yml
@@ -4,8 +4,6 @@ channels:
 - defaults
 dependencies:
 - seaborn=0.11.1
-- git
 - pip:
   - nbparameterise==0.6
   - devcron==0.4
-  - git+https://github.com/JGreenlee/e-mission-common@0.4.3

From ef4786ebdec25bc0a8fcd71adcbc5839eddb17c2 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Wed, 8 May 2024 01:21:39 -0700
Subject: [PATCH 69/70] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20=20Upgrade=20base=20?=
 =?UTF-8?q?image?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We do not need to manually install em-common since it was already installed in the base server image as part of
https://github.com/e-mission/e-mission-server/pull/965
and
https://github.com/e-mission/e-mission-server/commit/d33ce2e7f75c3a9b2e5dd2a19d40faa9218b11b0

So we removed it in 334c95bfa19e720d73a9d1b7cb0ee604f0fdc722
But we do need to bump up the base image to include it
---
 viz_scripts/Dockerfile            | 2 +-
 viz_scripts/docker/Dockerfile.dev | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/viz_scripts/Dockerfile b/viz_scripts/Dockerfile
index d1edd037..96c97bf7 100644
--- a/viz_scripts/Dockerfile
+++ b/viz_scripts/Dockerfile
@@ -1,5 +1,5 @@
 # python 3
-FROM shankari/e-mission-server:master_2024-04-15--53-23
+FROM shankari/e-mission-server:master_2024-05-06--36-33
 
 VOLUME /plots
 
diff --git a/viz_scripts/docker/Dockerfile.dev b/viz_scripts/docker/Dockerfile.dev
index 4c1c6c97..7d22ecef 100644
--- a/viz_scripts/docker/Dockerfile.dev
+++ b/viz_scripts/docker/Dockerfile.dev
@@ -1,5 +1,5 @@
 # python 3
-FROM shankari/e-mission-server:master_2024-04-15--53-23
+FROM shankari/e-mission-server:master_2024-05-06--36-33
 
 VOLUME /plots
 

From 91cc8e7d69415c346f75606bb220edcd6b1416e8 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Wed, 8 May 2024 01:31:17 -0700
Subject: [PATCH 70/70] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20=20Minor=20fixes=20t?=
 =?UTF-8?q?o=20the=20survey=20metrics?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- use `.get` to check whether this is an enketo survey so that it works for
  older deployments that predate the `survey_info` functionality as well. This
  has no functional difference since, if there is no `survey_info`, this is not
  a survey. But we get a better exception and avoid confusion later.
- improve error handling for survey_responses, by splitting the attribute and
  name errors from the more complex errors
---
 viz_scripts/survey_metrics.ipynb   |  2 +-
 viz_scripts/survey_responses.ipynb | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/viz_scripts/survey_metrics.ipynb b/viz_scripts/survey_metrics.ipynb
index 19bc0e0d..60886caf 100644
--- a/viz_scripts/survey_metrics.ipynb
+++ b/viz_scripts/survey_metrics.ipynb
@@ -72,7 +72,7 @@
    "outputs": [],
    "source": [
     "# Do not run this notebook at all unless it is for a survey configuration; nbclient will run up through this cell\n",
-    "if not survey_info['trip-labels'] == 'ENKETO':\n",
+    "if not survey_info.get('trip-labels', None) == 'ENKETO':\n",
     "    ipython = get_ipython()\n",
     "    ipython._showtraceback = scaffolding.no_traceback_handler\n",
     "    raise Exception(\"The plots in this notebook are only relevant to deployments with trip-level surveys\")"
diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb
index 08f205ba..28d7c3f2 100644
--- a/viz_scripts/survey_responses.ipynb
+++ b/viz_scripts/survey_responses.ipynb
@@ -53,7 +53,7 @@
    "outputs": [],
    "source": [
     "# Do not run this notebook at all unless it is for a survey configuration; nbclient will run up through this cell\n",
-    "if not survey_info['trip-labels'] == 'ENKETO':\n",
+    "if not survey_info.get('trip-labels', None) == 'ENKETO':\n",
     "    ipython = get_ipython()\n",
     "    ipython._showtraceback = scaffolding.no_traceback_handler\n",
     "    raise Exception(\"The plots in this notebook are only relevant to deployments with trip-level surveys\")"
@@ -372,10 +372,14 @@
     "            \n",
     "            plot_and_text_stacked_bar_chart(plot_df, \"Responses\", ax, text_results, get_survey_colors(list(plot_df.index.values) ,color_map), debug_df)\n",
     "            set_title_and_save(fig, text_results, plot_title, filename)\n",
-    "\n",
-    "        except:\n",
+    "        except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
+    "            plt.clf()\n",
     "            generate_missing_plot(plot_title_no_quality, debug_df, filename)\n",
-    "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)"
+    "            alt_text = store_alt_text_missing(debug_df, filename, plot_title_no_quality)        \n",
+    "            alt_html = store_alt_html_missing(debug_df, filename, plot_title_no_quality)\n",
+    "        except Exception as e:\n",
+    "            fig, ax = plt.subplots()\n",
+    "            plot_and_text_error(e, ax, filename)"
    ]
   }
  ],