Source code for suvtk.utils

"""
utils.py
========

This script provides utility functions for executing shell commands, reading CSV files safely,
and determining the number of available CPUs. These utilities are used across various modules
in the project.

Functions
---------
Exec(CmdLine, fLog=None, capture=False)
    Execute a shell command and optionally log or capture the output.

safe_read_csv(path, **kwargs)
    Read a CSV file with ASCII encoding and handle UnicodeDecodeError.

get_available_cpus()
    Get the number of available CPUs for the current process.
"""

import os
import subprocess
import sys

import pandas as pd
import rich_click as click


# Adapted from https://github.com/rcedgar/palm_annot/blob/77ac88ef7454dd3be9e5cbdb55792ce1ed7db95c/py/palm_annot.py#L121-L132
[docs] def Exec(CmdLine, fLog=None, capture=False, raise_on_error=True): """ Execute a shell command with optional logging and output capture. Parameters ---------- CmdLine : str The command line string to execute in the shell. fLog : file-like object, optional A file object (opened for writing) to which stdout and stderr will be written. If `None`, no file logging is performed. capture : bool, default=False If True, captures stdout and stderr and returns stdout upon success. If False, outputs are printed directly to the console. raise_on_error : bool, default=True If True, raises a `subprocess.CalledProcessError` when the command exits with a non-zero return code. If False, returns a dictionary containing the command results instead. Returns ------- str or None or dict - If `capture` is True and the command succeeds, returns the captured stdout as a string. - If `capture` is False and the command succeeds, returns None. - If `raise_on_error` is False and the command fails, returns a dict with keys: - 'returncode' : int Exit status of the process. - 'stdout' : str or None Captured standard output, if any. - 'stderr' : str or None Captured standard error, if any. - 'cmd' : str The command that was executed. Raises ------ subprocess.CalledProcessError If the command returns a non-zero exit code and `raise_on_error` is True. The exception includes the return code, command, stdout, and stderr. Notes ----- - When `capture` is False, stdout and stderr are streamed directly to the console instead of being captured. - Both stdout and stderr are written to `fLog` if provided, regardless of the `capture` setting. """ def write_log(message, is_error=False): # Always write to fLog if provided if fLog and message: fLog.write(message) # Print to console only when not capturing if not capture and message: stream = sys.stderr if is_error else sys.stdout stream.write(message) # Set up pipes explicitly to control capture behavior stdout_pipe = subprocess.PIPE if capture else None stderr_pipe = subprocess.PIPE if capture else None result = subprocess.run( CmdLine, shell=True, stdout=stdout_pipe, stderr=stderr_pipe, text=True, check=False, ) # Log outputs if we have them if result.stdout: write_log(result.stdout) if result.stderr: write_log(result.stderr, is_error=True) if result.returncode == 0: return result.stdout if capture else None # Handle non-zero exit if raise_on_error: # Preserve legacy behavior: raise with captured data if available raise subprocess.CalledProcessError( result.returncode, CmdLine, output=result.stdout, stderr=result.stderr ) # Caller can inspect this without exceptions return { "returncode": result.returncode, "stdout": result.stdout, "stderr": result.stderr, "cmd": CmdLine, }
[docs] def safe_read_csv(path, **kwargs): """ Reads a CSV file using ASCII encoding. If a UnicodeDecodeError occurs, raises a ClickException showing the offending character. Parameters ---------- path : str Path to the CSV file. **kwargs : dict Additional arguments to pass to `pandas.read_csv`. Returns ------- pandas.DataFrame The contents of the CSV file. Raises ------ click.ClickException If the file contains non-ASCII characters. """ try: return pd.read_csv(path, encoding="ascii", **kwargs) except UnicodeDecodeError as e: offending_bytes = e.object[e.start : e.end] # Try decoding using UTF-8 to show the offending character try: offending_char = offending_bytes.decode("utf-8") except Exception: offending_char = repr(offending_bytes) raise click.ClickException( f"Only ASCII characters are allowed in file '{path}'. " f"Offending character: {offending_char}. Error: {str(e)}" )
# Copied from https://github.com/EricDeveaud/genomad/blob/030ab6434654435ce75243347c97be6f40ea175b/genomad/cli.py#L250-L257
[docs] def get_available_cpus(): """ Get the number of available CPUs for the current process. Returns ------- int The number of available CPUs. """ try: # Try to get the number of cores available to this process CPUS = len(os.sched_getaffinity(0)) except AttributeError: # Windows / MacOS probably don't have this functionality CPUS = os.cpu_count() return CPUS