Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for OBS Lite binary recording import #367

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "api/proto"]
path = api/proto
url = https://github.com/openbikesensor/proto.git
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN npm run build
# Build the API and add the built frontend to it
#############################################

FROM python:3.11.3-bullseye
FROM python:3.12.5-bullseye

RUN apt-get update &&\
apt-get install -y \
Expand Down
14 changes: 13 additions & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
FROM python:3.11.3-bullseye
FROM python:3.12.5-bullseye

# install protobuf-compiler (protoc) to /opt/protobuf, but don't use the debian
# one, as it is horribly outdated
RUN wget -O /tmp/protoc.zip \
https://github.com/protocolbuffers/protobuf/releases/download/v26.0/protoc-26.0-linux-x86_64.zip \
&& unzip -d /opt/protobuf /tmp/protoc.zip

WORKDIR /opt/obs/proto

ADD proto/ /opt/obs/proto/
RUN PATH=/opt/protobuf/bin:$PATH make && pip install .

WORKDIR /opt/obs/api

ADD requirements.txt /opt/obs/api/
RUN pip install -r requirements.txt

ADD setup.py /opt/obs/api/
ADD obs /opt/obs/api/obs/
RUN pip install -e .
Expand Down
102 changes: 74 additions & 28 deletions api/obs/api/process/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
import asyncio
from datetime import datetime
from functools import partial
import hashlib
import json
import logging
import os
import json
import asyncio
import hashlib
import struct
import pytz
from os.path import join
from datetime import datetime
import re
import struct

import numpy
import pytz
from shapely import Point
from shapely.wkb import dumps as dump_wkb
from sqlalchemy import delete, func, select, and_
from sqlalchemy.orm import joinedload
from haversine import Unit, haversine_vector
from geopy import distance

from .snapping import snap_to_roads, wsg84_to_mercator
from .obs_csv import import_csv
from haversine import Unit, haversine_vector
import numpy
from sqlalchemy import and_, delete, func, select
from sqlalchemy.orm import joinedload

from obs.api.db import OvertakingEvent, RoadUsage, Track, UserDevice, make_session
from obs.api.app import app
from obs.api.db import OvertakingEvent, RoadUsage, Track, UserDevice, make_session
from .obs_binary import process_binary
from .obs_csv import process_csv
from .snapping import wsg84_to_mercator

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -120,14 +121,11 @@ async def process_track(session, track):
)
os.makedirs(output_dir, exist_ok=True)

(
df,
event_rows,
track_metadata,
events,
track_json,
track_raw_json,
) = await process_track_file(session, original_file_path)
df, track_metadata = await process_track_file(
session, original_file_path, track.original_file_name
)

event_rows, events, track_json, track_raw_json = convert_result_dataframe(df)

for output_filename, data in [
("events.json", events),
Expand Down Expand Up @@ -223,13 +221,40 @@ def fix_nan(v):
return v


async def process_track_file(session, track_file):
log.info("Load CSV file at %s", track_file)
df, track_metadata = import_csv(track_file)
def guess(track_file, original_file_name):
# This is pretty sure a binary file
if re.match(r".+\.(obsr?(\.gz)?|protobuf|cobs|bin)$", original_file_name):
log.debug("Trying binary import due to filename %r.", original_file_name)
return [process_binary]

# Snap track to roads from the database, adding latitude_snapped and longitude_snapped
df = await snap_to_roads(session, df)
# This is pretty sure a binary file
if re.match(r"\.csv$", original_file_name):
log.debug(
"Trying CSV import, then binary, due to filename %r.", original_file_name
)
return [process_csv, process_binary]

# TODO: see if it looks like a CSV
try:
with open(track_file, "rb") as f:
start = f.read(256)
if b"OBSDataFormat" in start:
log.debug(
"Trying CSV import due to file strat containing 'OBSDataFormat'."
)
return [process_csv]
except:
pass

# not sure, no magic
log.debug(
"Trying binary import, then CSV, because nothing else matched the filename %s.",
original_file_name,
)
return [process_binary, process_csv]


def convert_result_dataframe(df):
# remove entries with missing data
event_rows = df[df["confirmed"] & ~numpy.isnan(df["distance_overtaker"])]

Expand Down Expand Up @@ -273,7 +298,28 @@ async def process_track_file(session, track_file):
},
}

return df, event_rows, track_metadata, events, track_json, track_raw_json
return event_rows, events, track_json, track_raw_json


async def process_track_file(session, track_file, original_file_name):
log.info(
"Loading track file at %s, original file name %r.",
track_file,
original_file_name,
)

process_functions = guess(track_file, original_file_name)

for i, process_function in enumerate(process_functions):
try:
return await process_function(session, track_file)
except:
if i < len(process_functions) - 1:
log.warning("Import failed, trying next format.", exc_info=True)
else:
raise

raise ValueError("No import successful.")


async def clear_track_data(session, track):
Expand Down
Loading