wandb_utils.commands.slurm

Module Contents

wandb_utils.commands.slurm.logger
wandb_utils.commands.slurm.SBATCH_TEMPLATE = Multiline-String
Show Value
 1#!/bin/bash
 2{% if num_gpus -%}
 3#SBATCH --gres=gpu:{{ num_gpus }}
 4{% endif -%}
 5{% if partition -%}
 6#SBATCH --partition={{ partition }}
 7{% endif -%}
 8#SBATCH --cpus-per-task={{ cpus_per_task | default('3', true) }}
 9#SBATCH --mem={{ mem | default('12GB', true) }}
10{%- if signals %}
11#SBATCH --signal=B:{{ signals[0] }}@{{ inform_before_time | default('60', true) }}
12{%- endif %}
13{%- set complete_sweep_id_elements = [] %}
14{%- if entity %}{% do complete_sweep_id_elements.append(entity) %}{% endif %}
15{%- if project %}{% do complete_sweep_id_elements.append(project) %}{% endif %}
16{%- do complete_sweep_id_elements.append(sweep) %}
17
18#SBATCH --job-name={{ sweep }}
19{%- if num_agents > 1 and not chain %}
20#SBATCH --array=1-{{num_agents}}
21#SBATCH --output={{ job_dir }}/%A_%a.out
22{%- else %}
23#SBATCH --output={{ job_dir }}/%j.out
24{%- endif %}
25{%- for arg in verbatim_args %}
26#SBATCH --{{ arg }}
27{%- endfor %}
28
29{%- if signals %}
30# trap the signal to the main BATCH script here.
31sig_handler()
32{
33 echo "BATCH interrupted"
34 wait # wait for all children, this is important!
35}
36{%- set signals_fullname = [] %}
37{%- for sig in signals %}{% do signals_fullname.append('SIG'+sig) %}{% endfor %}
38trap 'sig_handler' {{ signals_fullname |join(' ')}}
39{%- endif %}
40
41srun wandb agent {% if run_count %}--count {{ run_count }} {% endif %}{{ complete_sweep_id_elements | join('/') }}
class wandb_utils.commands.slurm.WandbUtilsSlurm(api: wandb.PublicApi, entity: Optional[str], project: Optional[str], sweep: Optional[str], directory: pathlib.Path, sbatch_template: Optional[pathlib.Path])

Bases: object

wandb_utils.commands.slurm.wandb_slurm(ctx: click.Context, entity: Optional[str], project: Optional[str], sweep: Optional[str], directory: pathlib.Path, sbatch_template: Optional[pathlib.Path]) pandas.DataFrame
wandb_utils.commands.slurm.start_agents_command(slurm: WandbUtilsSlurm, inform_before_time: int, signals: List[str], mem: str, run_count: Optional[int], cpus_per_task: int, partition: Optional[str], num_gpus: Optional[int], num_agents: int, edit: bool, chain: bool, dependency: Optional[str], verbatim_args: List, dry_run: bool, confirm: bool) None