Skip to content

Commit

Permalink
Temp file delete (#73)
Browse files Browse the repository at this point in the history
* temp file delete

* temp file delete
  • Loading branch information
ita9naiwa authored Sep 14, 2023
1 parent ee01d5b commit c589786
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 0 deletions.
15 changes: 15 additions & 0 deletions buffalo/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, opt, *args, **kwargs):
self.prepro = getattr(prepro, self.opt.data.value_prepro.name)(self.opt.data.value_prepro)
self.value_prepro = self.prepro
self.data_type = None
self.temp_file_list = []

@abc.abstractmethod
def create_database(self, filename, **kwargs):
Expand Down Expand Up @@ -166,6 +167,12 @@ def close(self):
self.handle = None
self.header = None

def temp_file_clear(self):
for path in self.temp_file_list:
if os.path.isfile(path):
os.remove(path)
self.temp_file_list = []

def _create_database(self, path, **kwargs):
# Create database structure
if os.path.exists(path):
Expand Down Expand Up @@ -469,6 +476,7 @@ def is_valid_option(self, opt) -> bool:
class DataReader(object):
def __init__(self, opt):
self.opt = opt
self.temp_file_list = []

def get_main_path(self):
return self.opt.input.main
Expand All @@ -484,6 +492,7 @@ def _get_temporary_id_list_path(self, obj, name):
if hasattr(self, field_name):
return getattr(self, field_name)
tmp_path = aux.get_temporary_file(self.opt.data.tmp_dir)
self.temp_file_list.append(tmp_path)
with open(tmp_path, "w") as fout:
if isinstance(obj, np.ndarray,) and obj.ndim == 1:
fout.write("\n".join(map(str, obj.tolist())))
Expand All @@ -493,3 +502,9 @@ def _get_temporary_id_list_path(self, obj, name):
raise RuntimeError(f"Unexpected data type for id list: {type(obj)}")
setattr(self, field_name, tmp_path)
return tmp_path

def temp_file_clear(self):
for path in self.temp_file_list:
if os.path.isfile(path):
os.remove(path)
self.temp_file_list = []
4 changes: 4 additions & 0 deletions buffalo/data/mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def get_main_path(self):

log.get_logger("MatrixMarketDataReader").debug("creating temporary matrix-market data from numpy-kind array")
tmp_path = aux.get_temporary_file(self.opt.data.tmp_dir)
self.temp_file_list.append(tmp_path)
with open(tmp_path, "wb") as fout:
if isinstance(main, (np.ndarray,)) and main.ndim == 2:
main = scipy.sparse.csr_matrix(main)
Expand Down Expand Up @@ -172,6 +173,7 @@ def _create_working_data(self, db, source_path, ignore_lines):
vali_indexes = [] if "vali" not in db else db["vali"]["indexes"]
vali_lines = []
file_path = aux.get_temporary_file(self.opt.data.tmp_dir)
self.temp_file_list.append(file_path)
with open(file_path, "w") as w:
fin = open(source_path, mode="r")
file_size = fin.seek(0, 2)
Expand Down Expand Up @@ -272,4 +274,6 @@ def create(self) -> h5py.File:
if os.path.isfile(self.path):
os.remove(self.path)
raise
self.reader.temp_file_clear()
self.temp_file_clear()
self.logger.info("DB built on %s" % data_path)
4 changes: 4 additions & 0 deletions buffalo/data/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def _build_sppmi(self, db, working_data_path, sppmi_total_lines, k):
self.logger.debug("sort working_data")
aux.psort(working_data_path, key=1)
w_path = aux.get_temporary_file(root=self.opt.data.tmp_dir)
self.temp_file_list.append(w_path)
self.logger.debug(f"build sppmi in_parallel. w: {w_path}")
num_workers = psutil.cpu_count()
nnz = parallel_build_sppmi(working_data_path, w_path, sppmi_total_lines, sz, k, num_workers)
Expand Down Expand Up @@ -207,7 +208,9 @@ def _create_working_data(self, db, stream_main_path, itemids,
warnings.simplefilter("ignore", ResourceWarning)
if with_sppmi:
w_sppmi = open(aux.get_temporary_file(root=self.opt.data.tmp_dir), "w")
self.temp_file_list.append(w_sppmi)
file_path = aux.get_temporary_file(root=self.opt.data.tmp_dir)
self.temp_file_list.append(file_path)
with open(stream_main_path) as fin, open(file_path, "w") as w:
total_index = 0
internal_data_type = self.opt.data.internal_data_type
Expand Down Expand Up @@ -308,4 +311,5 @@ def create(self) -> h5py.File:
if os.path.isfile(self.path):
os.remove(self.path)
raise
self.temp_file_clear()
self.logger.info("DB built on %s" % data_path)

0 comments on commit c589786

Please sign in to comment.