| Log | ||
|---|---|---|
| Branch | feat/nnx-trainstate-and-training-loop |
|
| Commit | 8bb919bd5 |
|
| Date | 2026-04-20 15:40 | |
| Run ID | feat_nnx_trainstate_and_training_loop_20260420 |
|
| Hardware | V6e-8 TPU (8 devices, 31.25 GiB/device) | |
| Python | Python 3.12.12 | |
| JAX | 0.9.2 | |
| jaxlib | 0.9.2 | |
| Flax | 0.12.6 | |
| Optax | 0.2.6 | |
| Orbax | 0.11.28 | |
| NNX flags | pure_nnx=False pure_nnx_decoder=False enable_nnx=False |
26/26 passed.
| Test | Result | Log |
|---|---|---|
01_base |
PASS | log |
02_synthetic |
PASS | log |
03_dropout |
PASS | log |
04_int8 |
PASS | log |
05_fp8 |
PASS | log |
06_grad_accum |
PASS | log |
07_eval |
PASS | log |
08_checkpoint_async_true_save |
PASS | log |
08_checkpoint_async_true_resume |
PASS | log |
09_pdb_lt_1 |
PASS | log |
10_shardy_true |
PASS | log |
10_shardy_false |
PASS | log |
11_optimizer_offload_false |
PASS | log |
11_optimizer_offload_true |
PASS | log |
13_scan_layers_false |
PASS | log |
13_scan_layers_true |
PASS | log |
14_async_ckpt_false_save |
PASS | log |
14_async_ckpt_false_resume |
PASS | log |
15_ocdbt_true_save |
PASS | log |
15_ocdbt_true_resume |
PASS | log |
15_ocdbt_false_save |
PASS | log |
15_ocdbt_false_resume |
PASS | log |
16_zarr3_true_save |
PASS | log |
16_zarr3_true_resume |
PASS | log |
16_zarr3_false_save |
PASS | log |
16_zarr3_false_resume |
PASS | log |
| Log | ||
|---|---|---|
| Logs | ~/maxtext/venv_runs/feat_nnx_trainstate_and_training_loop_20260420_142345/linen/logs/ |
|
| GCS output (checkpoints + profiler) | gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345/ |
To reproduce any individual test, activate the venv and set env vars first:
source ~/maxtext/maxtext_venv/bin/activate
export PYTHONPATH=src
export DECOUPLE_GCLOUD=TRUE
Then run the command for the desired test:
01_basepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_01_base
02_syntheticpython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_02_synthetic\
dataset_type=synthetic
03_dropoutpython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_03_dropout\
dropout_rate=0.02\
enable_dropout=True\
max_target_length=128
04_int8python3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_04_int8\
quantization=int8
05_fp8python3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_05_fp8\
quantization=fp8\
steps=20\
learning_rate_schedule_steps=20\
sharding_tolerance=0.10
06_grad_accumpython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_06_grad_accum\
gradient_accumulation_steps=4\
dataset_type=synthetic
07_evalpython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_07_eval\
eval_interval=5\
eval_steps=2
08_checkpoint_async_true_savepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_08_checkpoint_async_true\
enable_checkpointing=True\
checkpoint_period=5\
steps=10\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
async_checkpointing=True
08_checkpoint_async_true_resumepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_08_checkpoint_async_true\
enable_checkpointing=True\
checkpoint_period=5\
steps=20\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
async_checkpointing=True
09_pdb_lt_1python3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_09_pdb_lt_1\
per_device_batch_size=0.25\
ici_tensor_parallelism=4\
ici_fsdp_parallelism=2
10_shardy_truepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_10_shardy_true\
shardy=True
10_shardy_falsepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_10_shardy_false\
shardy=False
11_optimizer_offload_falsepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_11_optimizer_offload_false\
optimizer_memory_host_offload=False\
dataset_type=synthetic
11_optimizer_offload_truepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_11_optimizer_offload_true\
optimizer_memory_host_offload=True\
dataset_type=synthetic
13_scan_layers_falsepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_13_scan_layers_false\
scan_layers=False\
dataset_type=synthetic
13_scan_layers_truepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_13_scan_layers_true\
scan_layers=True\
dataset_type=synthetic
14_async_ckpt_false_savepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_14_async_ckpt_false\
enable_checkpointing=True\
checkpoint_period=5\
steps=10\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
async_checkpointing=False
14_async_ckpt_false_resumepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_14_async_ckpt_false\
enable_checkpointing=True\
checkpoint_period=5\
steps=20\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
async_checkpointing=False
15_ocdbt_true_savepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_15_ocdbt_true\
enable_checkpointing=True\
checkpoint_period=5\
steps=10\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
checkpoint_storage_use_ocdbt=True
15_ocdbt_true_resumepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_15_ocdbt_true\
enable_checkpointing=True\
checkpoint_period=5\
steps=20\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
checkpoint_storage_use_ocdbt=True
15_ocdbt_false_savepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_15_ocdbt_false\
enable_checkpointing=True\
checkpoint_period=5\
steps=10\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
checkpoint_storage_use_ocdbt=False
15_ocdbt_false_resumepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_15_ocdbt_false\
enable_checkpointing=True\
checkpoint_period=5\
steps=20\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler=''\
checkpoint_storage_use_ocdbt=False
16_zarr3_true_savepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_16_zarr3_true\
enable_checkpointing=True\
checkpoint_period=5\
steps=10\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler='' checkpoint_storage_use_zarr3=True
16_zarr3_true_resumepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_16_zarr3_true\
enable_checkpointing=True\
checkpoint_period=5\
steps=20\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler='' checkpoint_storage_use_zarr3=True
16_zarr3_false_savepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_16_zarr3_false\
enable_checkpointing=True\
checkpoint_period=5\
steps=10\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler='' checkpoint_storage_use_zarr3=False
16_zarr3_false_resumepython3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml\
pure_nnx=False\
pure_nnx_decoder=False\
enable_nnx=False\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
dataset_path=tests/assets/local_datasets/c4_en_dataset_minimal\
dataset_name=c4/en:3.1.0\
eval_dataset_name=c4/en:3.1.0\
steps=10\
learning_rate_schedule_steps=10\
enable_checkpointing=True\
checkpoint_period=10\
enable_goodput_recording=False\
per_device_batch_size=1\
ici_fsdp_parallelism=8\
log_config=False\
tokenizer_path=src/maxtext/assets/tokenizers/tokenizer.llama2\
skip_jax_distributed_system=True\
use_vertex_tensorboard=False\
vertex_tensorboard_project=\
vertex_tensorboard_region=\
enable_pathways_goodput=False\
enable_gcp_goodput_metrics=False\
monitor_goodput=False\
attention=dot_product\
profiler=xplane\
debug_sharding=True\
base_output_directory=gs://wanglance-maxtext/linen_ckpt_feat_nnx_trainstate_and_training_loop_20260420_142345\
run_name=linen_feat_nnx_trainstate_and_training_loop_20260420_142345_16_zarr3_false\
enable_checkpointing=True\
checkpoint_period=5\
steps=20\
learning_rate_schedule_steps=20\
dataset_type=synthetic\
profiler='' checkpoint_storage_use_zarr3=False