Source code for viewclust.get_users_run

from viewclust.job_use import job_use
import pandas as pd


[docs]def get_users_run(jobs, d_from, target, d_to='', use_unit='cpu', serialize_running=''): """Takes a DataFrame full of job information and returns usage for each "user" uniquely based on specified unit. This function operates as a stepping stone for plotting usage figures and returns various series and frames for several different uses. Parameters ------- jobs: DataFrame Job DataFrame typically generated by slurm/sacct_jobs or the ccmnt package. use_unit: str, optional Usage unit to examine. One of: {'cpu', 'cpu-eqv', 'gpu', 'gpu-eqv'}. Defaults to 'cpu'. d_from: date str Beginning of the query period, e.g. '2019-04-01T00:00:00'. target: int-like Typically a cpu allocation or core eqv value for a particular acount. Often 50. d_to: date str, optional End of the query period, e.g. '2020-01-01T00:00:00'. Defaults to now if empty. serialize_running: str, optional Pickle given structure with argument as a name. If left empty, pickle procedure is skipped. Defaults to empty. Returns ------- user_running_cat: Frame of running resources for each of the unique "users" in the jobs data frame. """ users = jobs.user.unique() user_count = 0 for user in users: user_mask = jobs['user'].str.match(user) user_jobs = jobs[user_mask].copy() _, user_queued, user_running, _ = job_use(user_jobs, d_from, target, d_to=d_to, use_unit=use_unit) user_queued=user_queued[d_from:d_to] user_running=user_running[d_from:d_to] if user_count == 0: user_running_cat = pd.Series(user_running, index=user_running.index, name=user) else: user_running_ser = pd.Series(user_running, index=user_running.index, name=user) user_running_cat = pd.concat([user_running_cat, user_running_ser], axis=1) user_count = user_count + 1 if user_count == 1: user_running_cat = user_running_cat.to_frame() if serialize_running != '': user_running_cat.to_pickle(serialize_running) return user_running_cat