diff --git a/reshape.ipynb b/reshape.ipynb index e874b6c..1cb3c32 100644 --- a/reshape.ipynb +++ b/reshape.ipynb @@ -1857,7 +1857,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "f85ea9b9-c0cc-41d1-b60e-9abf6aa59b68", "metadata": {}, "outputs": [ @@ -1865,22 +1865,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processing folder: NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200801_161441_reflectance\n" + "Processing folder: NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_170802_reflectance\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "816df44aa6f94a9da252152bb66b25dc", + "model_id": "68d0e9b150a548e49532db11b53584ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Processing CSV: 0%| | 0/66 [00:00, ?chunk/s]" + "Processing CSV: 0%| | 0/54 [00:00, ?chunk/s]" ] }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No matching rows found in the large CSV.\n", + "Filtered data saved to NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_170802_reflectance/NEON_D13_NIWO_DP1_20200807_170802_reflectance_polygons.csv\n" + ] } ], "source": [ @@ -2011,11 +2019,679 @@ " print(f\"Error processing {folder_path}: {e}\")\n", " \n", "# Example usage:\n", - "folder_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200801_161441_reflectance\"\n", + "folder_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_170802_reflectance\"\n", "polygons_path = 'Datasets/niwot_aop_polygons_2023_12_8_23_analysis_ready_half_diam.gpkg'\n", "\n", "combined_processing_function(folder_path, polygons_path)\n" ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b8494229-143a-4dac-98bb-0bf84218dd1c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Merged data saved to NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200731_155024_reflectance/NEON_D13_NIWO_DP1_20200731_155024_reflectance_polygons_merged.csv\n" + ] + } + ], + "source": [ + "import os\n", + "import glob\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import rasterio\n", + "from rasterio.windows import from_bounds\n", + "from shapely.geometry import box\n", + "\n", + "def process_folder_data(folder_path, polygons_path):\n", + " \"\"\"\n", + " Processes data within a given folder: finds a raster matching '*_envi', merges data extracted based\n", + " on GeoPackage polygons with a CSV file matching '*_polygons.csv', and saves the result\n", + " in the same folder with the folder name appended by '_polygons_merged.csv'.\n", + "\n", + " Parameters:\n", + " - folder_path (str): Path to the folder containing the raster and CSV files matching '*_polygons.csv'.\n", + " - polygons_path (str): Path to the polygons file (GeoPackage).\n", + " \"\"\"\n", + " # Find the raster file ending with '_envi' but not '.hdr' or '.img'\n", + " raster_files = glob.glob(os.path.join(folder_path, \"*_envi\"))\n", + " raster_files = [f for f in raster_files if not f.endswith('.hdr') and not f.endswith('.img')]\n", + "\n", + " # Find the CSV file matching '*_polygons.csv' pattern\n", + " csv_files_to_merge = glob.glob(os.path.join(folder_path, \"*_polygons.csv\"))\n", + "\n", + " if not raster_files or not csv_files_to_merge:\n", + " print(\"Required raster or '*_polygons.csv' file not found.\")\n", + " return\n", + "\n", + " raster_path = raster_files[0] # Assuming only one raster file matches\n", + " csv_path_to_merge = csv_files_to_merge[0] # Assuming only one CSV file matches\n", + "\n", + " # Define the output CSV file path using the original polygons CSV file name\n", + " output_csv_path = csv_path_to_merge.replace(\"_polygons.csv\", \"_polygons_merged.csv\")\n", + "\n", + " # Load polygons from GeoPackage and extract data\n", + " polygons = gpd.read_file(polygons_path).to_crs(\"EPSG:32613\")\n", + "\n", + " all_data = []\n", + " with rasterio.open(raster_path) as src:\n", + " for _, poly in polygons.iterrows():\n", + " if poly.geometry.is_empty or not box(*src.bounds).intersects(poly.geometry):\n", + " continue\n", + "\n", + " window = from_bounds(*poly.geometry.bounds, transform=src.transform)\n", + " if window.width <= 0 or window.height <= 0:\n", + " continue\n", + "\n", + " attributes = poly.to_dict()\n", + " row_off, col_off = map(int, (window.row_off, window.col_off))\n", + " for row in range(row_off, row_off + int(window.height)):\n", + " for col in range(col_off, col_off + int(window.width)):\n", + " all_data.append({'Pixel_Row': row, 'Pixel_Col': col, **attributes})\n", + "\n", + " df_polygons_extracted = pd.DataFrame(all_data)\n", + " \n", + " # Load the '*_polygons.csv' to merge\n", + " df_polygons_csv = pd.read_csv(csv_path_to_merge)\n", + " \n", + " # Merge the extracted polygon data with the '*_polygons.csv'\n", + " merged_df = pd.merge(df_polygons_extracted, df_polygons_csv, on=['Pixel_Row', 'Pixel_Col'], how='inner')\n", + " \n", + " # Save the merged DataFrame to a new CSV file\n", + " merged_df.to_csv(output_csv_path, index=False)\n", + " print(f\"Merged data saved to {output_csv_path}\")\n", + "\n", + "\n", + "\n", + "# Example usage\n", + "\n", + "folder_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200731_155024_reflectance\"\n", + "polygons_path = 'Datasets/niwot_aop_polygons_2023_12_8_23_analysis_ready_half_diam.gpkg'\n", + "\n", + "\n", + "process_folder_data(folder_path, polygons_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "51d38ea8-5962-4c33-bfa9-9f954e780c91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing folder: NEON_D13_NIWO_DP1_20200801_161441_reflectance\n", + "Merged data saved to NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200801_161441_reflectance/NEON_D13_NIWO_DP1_20200801_161441_reflectance_polygons_merged.csv\n", + "Processing folder: NEON_D13_NIWO_DP1_20200731_155024_reflectance\n", + "Merged data saved to NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200731_155024_reflectance/NEON_D13_NIWO_DP1_20200731_155024_reflectance_polygons_merged.csv\n", + "Processing folder: NEON_D13_NIWO_DP1_20200807_155314_reflectance\n", + "Merged data saved to NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_155314_reflectance/NEON_D13_NIWO_DP1_20200807_155314_reflectance_polygons_merged.csv\n", + "Processing folder: NEON_D13_NIWO_DP1_20200807_163444_reflectance\n", + "Merged data saved to NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_163444_reflectance/NEON_D13_NIWO_DP1_20200807_163444_reflectance_polygons_merged.csv\n", + "Processing folder: NEON_D13_NIWO_DP1_20200807_170802_reflectance\n", + "Required raster or '*_polygons.csv' file not found.\n", + "Processing folder: .ipynb_checkpoints\n", + "Required raster or '*_polygons.csv' file not found.\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "def process_all_folders(parent_directory, polygons_path):\n", + " \"\"\"\n", + " Searches all folders within the given parent directory and applies data processing\n", + " to each folder found, using the previously defined processing function.\n", + "\n", + " Parameters:\n", + " - parent_directory (str): The parent directory containing folders to process.\n", + " - polygons_path (str): Path to the polygons file (GeoPackage) used in processing.\n", + " \"\"\"\n", + " # Iterate over all items in the parent directory\n", + " for item in os.listdir(parent_directory):\n", + " # Construct the full path of the item\n", + " full_path = os.path.join(parent_directory, item)\n", + " # Check if the item is a directory\n", + " if os.path.isdir(full_path):\n", + " print(f\"Processing folder: {item}\")\n", + " try:\n", + " # Call the main processing function for each folder\n", + " process_folder_data(full_path, polygons_path)\n", + " except Exception as e:\n", + " print(f\"An error occurred while processing {item}: {e}\")\n", + " else:\n", + " print(f\"Skipping non-directory item: {item}\")\n", + "\n", + "parent_directory = \"NIWOT_calibration_flight_08_2020\"\n", + "polygons_path = 'Datasets/niwot_aop_polygons_2023_12_8_23_analysis_ready_half_diam.gpkg'\n", + "process_all_folders(parent_directory, polygons_path)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "735e0d71-54c5-48e5-bafb-34b8137d341d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_111792/2608631090.py:26: DtypeWarning: Columns (7,10,13,14,19,21) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(csv_file)\n", + "/tmp/ipykernel_111792/2608631090.py:26: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(csv_file)\n", + "/tmp/ipykernel_111792/2608631090.py:26: DtypeWarning: Columns (7,10) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(csv_file)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Master CSV saved to NIWOT_calibration_flight_08_2020/polygon_spectra.csv\n" + ] + } + ], + "source": [ + "import os\n", + "import pandas as pd\n", + "import glob\n", + "\n", + "def consolidate_polygons_to_master_csv(parent_directory):\n", + " \"\"\"\n", + " Searches each subfolder within the given directory for files matching '*_polygons_merged.csv',\n", + " concatenates them into a master DataFrame with an additional column for the folder name,\n", + " and saves the consolidated DataFrame as 'polygon_spectra.csv' in the parent directory.\n", + "\n", + " Parameters:\n", + " - parent_directory (str): The directory containing subfolders to search.\n", + " \"\"\"\n", + " all_data_frames = []\n", + "\n", + " # Iterate through each subfolder in the parent directory\n", + " for folder_name in os.listdir(parent_directory):\n", + " folder_path = os.path.join(parent_directory, folder_name)\n", + " \n", + " if os.path.isdir(folder_path):\n", + " # Find all *_polygons_merged.csv files in the subfolder\n", + " csv_files = glob.glob(os.path.join(folder_path, '*_polygons_merged.csv'))\n", + " \n", + " for csv_file in csv_files:\n", + " # Load the CSV file into a DataFrame\n", + " df = pd.read_csv(csv_file)\n", + " # Add a column for the folder name\n", + " df['Flight_line'] = folder_name\n", + " # Append the DataFrame to the list\n", + " all_data_frames.append(df)\n", + "\n", + " # Concatenate all DataFrames into a master DataFrame\n", + " if all_data_frames:\n", + " master_df = pd.concat(all_data_frames, ignore_index=True)\n", + " # Save the master DataFrame to polygon_spectra.csv in the parent directory\n", + " output_csv_path = os.path.join(parent_directory, 'polygon_spectra.csv')\n", + " master_df.to_csv(output_csv_path, index=False)\n", + " print(f\"Master CSV saved to {output_csv_path}\")\n", + " else:\n", + " print(\"No *_polygons_merged.csv files found in subfolders.\")\n", + "\n", + "# Example usage\n", + "\n", + "consolidate_polygons_to_master_csv(parent_directory)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "63316dea-d8f8-4e42-bb84-d2cdfc8ae3c5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_111792/181018150.py:7: DtypeWarning: Columns (7,10,13,14,19,21) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(csv_file_path)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " | Pixel_Row | \n", + "Pixel_Col | \n", + "GlobalID | \n", + "CreationDate | \n", + "Creator | \n", + "EditDate | \n", + "Editor | \n", + "description_notes | \n", + "dbh | \n", + "tree_height | \n", + "... | \n", + "Landsat_8_band_4 | \n", + "Landsat_8_band_5 | \n", + "Landsat_8_band_6 | \n", + "Landsat_9_band_1 | \n", + "Landsat_9_band_2 | \n", + "Landsat_9_band_3 | \n", + "Landsat_9_band_4 | \n", + "Landsat_9_band_5 | \n", + "Landsat_9_band_6 | \n", + "Flight_line | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1222 | \n", + "554 | \n", + "{E9346797-777A-4D43-BD01-02A511C57DAA} | \n", + "2023-06-20 19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-20T19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2559.932534 | \n", + "729.276808 | \n", + "235.844050 | \n", + "150.485106 | \n", + "227.386952 | \n", + "144.670011 | \n", + "2559.932534 | \n", + "729.276808 | \n", + "235.844050 | \n", + "NEON_D13_NIWO_DP1_20200801_161441_reflectance | \n", + "
1 | \n", + "1222 | \n", + "555 | \n", + "{E9346797-777A-4D43-BD01-02A511C57DAA} | \n", + "2023-06-20 19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-20T19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2316.964286 | \n", + "677.671657 | \n", + "213.190474 | \n", + "130.234725 | \n", + "187.391979 | \n", + "114.967046 | \n", + "2316.964286 | \n", + "677.671657 | \n", + "213.190474 | \n", + "NEON_D13_NIWO_DP1_20200801_161441_reflectance | \n", + "
2 | \n", + "1223 | \n", + "554 | \n", + "{E9346797-777A-4D43-BD01-02A511C57DAA} | \n", + "2023-06-20 19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-20T19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "3274.149362 | \n", + "911.557229 | \n", + "295.761100 | \n", + "177.427782 | \n", + "295.826530 | \n", + "195.855862 | \n", + "3274.149362 | \n", + "911.557229 | \n", + "295.761100 | \n", + "NEON_D13_NIWO_DP1_20200801_161441_reflectance | \n", + "
3 | \n", + "1223 | \n", + "555 | \n", + "{E9346797-777A-4D43-BD01-02A511C57DAA} | \n", + "2023-06-20 19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-20T19:27:29+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "3048.000241 | \n", + "919.375727 | \n", + "302.883800 | \n", + "165.633962 | \n", + "261.292609 | \n", + "171.173423 | \n", + "3048.000241 | \n", + "919.375727 | \n", + "302.883800 | \n", + "NEON_D13_NIWO_DP1_20200801_161441_reflectance | \n", + "
4 | \n", + "1224 | \n", + "558 | \n", + "{54D1A59F-2B5C-4D76-913A-690314E314A7} | \n", + "2023-06-20 19:28:37+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-20T19:28:37+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2242.813980 | \n", + "852.598010 | \n", + "344.220227 | \n", + "165.620471 | \n", + "243.318364 | \n", + "176.134124 | \n", + "2242.813980 | \n", + "852.598010 | \n", + "344.220227 | \n", + "NEON_D13_NIWO_DP1_20200801_161441_reflectance | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
14485 | \n", + "3895 | \n", + "799 | \n", + "{C7FA2D22-8033-4D92-A756-1299D67986B4} | \n", + "2023-06-13 16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-13T16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2608.009818 | \n", + "1120.970482 | \n", + "574.358940 | \n", + "209.718434 | \n", + "450.654481 | \n", + "350.527043 | \n", + "2608.009818 | \n", + "1120.970482 | \n", + "574.358940 | \n", + "NEON_D13_NIWO_DP1_20200807_163444_reflectance | \n", + "
14486 | \n", + "3895 | \n", + "800 | \n", + "{C7FA2D22-8033-4D92-A756-1299D67986B4} | \n", + "2023-06-13 16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-13T16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2618.926013 | \n", + "1125.762646 | \n", + "576.686649 | \n", + "210.389938 | \n", + "452.442482 | \n", + "352.189134 | \n", + "2618.926013 | \n", + "1125.762646 | \n", + "576.686649 | \n", + "NEON_D13_NIWO_DP1_20200807_163444_reflectance | \n", + "
14487 | \n", + "3895 | \n", + "801 | \n", + "{C7FA2D22-8033-4D92-A756-1299D67986B4} | \n", + "2023-06-13 16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-13T16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2259.085294 | \n", + "1435.186943 | \n", + "765.169186 | \n", + "239.016707 | \n", + "474.551501 | \n", + "423.244853 | \n", + "2259.085294 | \n", + "1435.186943 | \n", + "765.169186 | \n", + "NEON_D13_NIWO_DP1_20200807_163444_reflectance | \n", + "
14488 | \n", + "3895 | \n", + "802 | \n", + "{C7FA2D22-8033-4D92-A756-1299D67986B4} | \n", + "2023-06-13 16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-13T16:03:03+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "1696.776515 | \n", + "734.542505 | \n", + "326.745070 | \n", + "149.602721 | \n", + "321.140541 | \n", + "232.987153 | \n", + "1696.776515 | \n", + "734.542505 | \n", + "326.745070 | \n", + "NEON_D13_NIWO_DP1_20200807_163444_reflectance | \n", + "
14489 | \n", + "3892 | \n", + "792 | \n", + "{4E5FD762-1FAA-4E10-9D08-97B812020D01} | \n", + "2023-06-13 16:03:49+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "2023-06-13T16:03:49+00:00 | \n", + "Tyler.L.McIntosh_ucboulder | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "... | \n", + "2134.770854 | \n", + "2362.043863 | \n", + "1549.487657 | \n", + "320.706987 | \n", + "553.750150 | \n", + "620.544923 | \n", + "2134.770854 | \n", + "2362.043863 | \n", + "1549.487657 | \n", + "NEON_D13_NIWO_DP1_20200807_163444_reflectance | \n", + "
14490 rows × 904 columns
\n", + "