diff --git a/reasoning_gym/algorithmic/spell_backward.py b/reasoning_gym/algorithmic/spell_backward.py
index 0de8d5f2..ab26f843 100644
--- a/reasoning_gym/algorithmic/spell_backward.py
+++ b/reasoning_gym/algorithmic/spell_backward.py
@@ -98,7 +98,7 @@ class SpellBackwardCurriculum(BaseCurriculum):
         self._define_attributes(
             RangeAttributeDefinition(
                 name="word_len",
-                levels=list(range(3, 11, 1)),
+                levels=list(range(3, 13, 2)),
                 description="Word length",
                 lower_field_name="min_word_len",
                 upper_field_name="max_word_len",
diff --git a/reasoning_gym/coaching/base_curriculum.py b/reasoning_gym/coaching/base_curriculum.py
index 2a6553c5..c4755141 100644
--- a/reasoning_gym/coaching/base_curriculum.py
+++ b/reasoning_gym/coaching/base_curriculum.py
@@ -19,11 +19,22 @@ class RangeAttributeMode(StrEnum):
 
     UPPER_BOUND = "upper_bound"  # only use the highest range segment
     INCLUSIVE = "inclusive"  # include all previous levels
+    LAST_K = "last_k"  # use only the last *k* difficulty levels
 
 
 class DefaultCurriculumContext(CurriculumContext):
-    def __init__(self, mode: RangeAttributeMode = RangeAttributeMode.INCLUSIVE):
+    def __init__(self, mode: RangeAttributeMode = RangeAttributeMode.INCLUSIVE, k: int | None = None):
+        """Create a CurriculumContext used when turning a curriculum into a concrete configuration.
+
+        Args:
+            mode: Strategy for translating a RangeAttributeDefinition level into a concrete range.
+            k:   When ``mode`` is ``RangeAttributeMode.LAST_K`` this parameter indicates how many of the
+                 most-recent difficulty levels (counting backwards from the current one) should be kept.
+                 If ``k`` is ``None`` the behaviour falls back to ``INCLUSIVE`` (i.e. keep everything).  The
+                 parameter is ignored for other modes.
+        """
         self.mode = mode
+        self.k = k  # window size used for LAST_K mode
 
     def get_range_attr_value(self, curriculum, attr: RangeAttributeDefinition) -> Any:
         level = curriculum.get_attr_level(attr.name)
@@ -39,6 +50,12 @@ class DefaultCurriculumContext(CurriculumContext):
             elif self.mode == RangeAttributeMode.INCLUSIVE:
                 lo_index = 0
                 hi_index = min(level + 1, len(attr.levels) - 1)
+
+            elif self.mode == RangeAttributeMode.LAST_K:
+                hi_index = min(level, len(attr.levels) - 1)
+                window = self.k if self.k is not None else hi_index + 1
+                lo_index = max(0, hi_index - window + 1)
+
         else:
             if self.mode == RangeAttributeMode.UPPER_BOUND:
                 hi_index = min(level, len(attr.levels) - 1)
@@ -48,6 +65,17 @@ class DefaultCurriculumContext(CurriculumContext):
                 lo_index = 0
                 hi_index = min(level, len(attr.levels) - 1)
 
+            elif self.mode == RangeAttributeMode.LAST_K:
+                hi_index = min(level, len(attr.levels) - 1)
+                window = self.k if self.k is not None else hi_index + 1
+                lo_index = max(0, hi_index - window + 1)
+
+        # Additional handling for LAST_K when ensure_interval is True (above branch)
+        if attr.ensure_interval and self.mode == RangeAttributeMode.LAST_K:
+            # Re-compute indices so that we always return at least a two-value interval
+            if hi_index == lo_index:
+                lo_index = max(0, hi_index - 1)
+
         lo = attr.get_level_value(lo_index)
         hi = attr.get_level_value(hi_index)
 
diff --git a/reasoning_gym/data/holdout_words.txt b/reasoning_gym/data/holdout_words.txt
new file mode 100644
index 00000000..1ccdea73
--- /dev/null
+++ b/reasoning_gym/data/holdout_words.txt
@@ -0,0 +1,400 @@
+jib
+fam
+Jos
+rel
+gun
+Abu
+jet
+bis
+poi
+led
+Fin
+dim
+hei
+sha
+mau
+nep
+nob
+joe
+oft
+kou
+pow
+yea
+mum
+twa
+now
+Mwa
+wae
+Pia
+Bud
+hie
+rea
+bes
+Ree
+bog
+duo
+fey
+zac
+Jef
+Tho
+pet
+yez
+tji
+bot
+nap
+Ben
+mal
+Mon
+Huk
+aid
+jot
+pimp
+adda
+duel
+lees
+oven
+dean
+bhoy
+tret
+Etta
+tolu
+mesh
+punt
+Beck
+mule
+buff
+brae
+gowl
+does
+bego
+tave
+Vote
+Ahir
+Mahi
+Boer
+soft
+Kuba
+bort
+pavy
+even
+unci
+laur
+hatt
+pipi
+rada
+Dane
+thin
+keno
+yerd
+lori
+Coos
+Leto
+Diau
+nife
+hath
+fury
+thus
+date
+nast
+cush
+spot
+clung
+Mbuba
+Gippy
+spent
+crowl
+waugh
+nibby
+urine
+snaky
+pyche
+filet
+lohan
+kedge
+atman
+other
+dooms
+ariel
+adlay
+Brule
+covid
+booty
+Chiot
+blend
+chewy
+shune
+stema
+renky
+twirk
+guyer
+lewis
+palmy
+xylem
+helve
+semen
+scobs
+salep
+mosey
+Sabra
+taqua
+thats
+seave
+Nambe
+flume
+antal
+tubig
+Aimee
+wanle
+unmet
+undog
+acold
+heppen
+lovely
+Ixodes
+manbot
+simlin
+unduly
+whilst
+tremor
+seraph
+streng
+richen
+brutal
+solute
+unique
+motory
+denier
+Andrea
+pinery
+eatery
+Turkic
+vennel
+Elohim
+tampon
+uracil
+untold
+pigdan
+nonene
+router
+japing
+calved
+Selago
+digram
+utinam
+fidate
+Arundo
+hubbub
+rerobe
+Alfirk
+Argive
+buzzle
+papist
+omagra
+arrest
+lucken
+crotyl
+rantan
+greund
+cipher
+maraca
+blenny
+hyoidal
+Regulus
+sphenic
+werefox
+Dagomba
+unsonsy
+reslide
+Himawan
+almadie
+doarium
+Barbara
+gunyang
+ecology
+unvoted
+dropout
+shedded
+neotype
+wriggly
+Zuludom
+ruffled
+runtish
+cantlet
+vitreal
+distome
+modulus
+curlike
+eveweed
+waddler
+akmudar
+dibhole
+lignose
+copyist
+addable
+torques
+acridyl
+deraign
+setline
+preform
+rarebit
+lyncine
+tarnish
+pentace
+lastage
+gleaner
+spiller
+aplasia
+trommel
+goldish
+stadium
+unplied
+grizzler
+seabeard
+slipcase
+cobbling
+guruship
+antipope
+hydremic
+Seleucid
+otosteon
+islander
+lacunose
+nasiform
+chloasma
+indicium
+Seidlitz
+Bisharin
+scission
+moulinet
+frampold
+Macropus
+overwake
+stannate
+gallbush
+bakeoven
+Cytherea
+unrising
+voltzite
+unspared
+Mongolic
+Coccyzus
+systolic
+toilinet
+everyone
+alangine
+perioeci
+diapalma
+parillin
+binodose
+unevaded
+shillety
+Andorran
+apodosis
+goodyism
+capitoul
+peaceman
+anticous
+obeisant
+pulmonar
+emeritus
+apolysis
+mismanage
+hopscotch
+anodynous
+tetarcone
+demilance
+acuminose
+unimbibed
+typophile
+rhagionid
+bloodwort
+splenulus
+Argentine
+resurface
+kingdomed
+outsnatch
+octometer
+morphemic
+praepubis
+unexcised
+maliceful
+waganging
+monosperm
+nailsmith
+Volutidae
+phenolate
+delapsion
+cabureiba
+coxcombic
+mesically
+focimetry
+spearwood
+multirate
+unteeming
+forehatch
+synedrial
+commingle
+grassweed
+pelecypod
+lodgerdom
+phacocele
+orthopedy
+reticulum
+recushion
+pyromucyl
+monkeynut
+Carduelis
+brotherly
+luminesce
+plumiform
+orrisroot
+anthochlor
+rememberer
+unslipping
+militation
+dextrorsal
+mesomorphy
+unsmutched
+Hopkinsian
+neuterness
+termlessly
+cryptogram
+pinipicrin
+overdrench
+otherworld
+multilobed
+iconolatry
+survigrous
+semiuncial
+chromatoid
+precedence
+gillhooter
+antiplague
+Girellidae
+nestiatria
+enthraldom
+elasticize
+claudetite
+cryptopine
+postmeatal
+habitually
+breathable
+nonshedder
+beneficial
+undersweep
+billposter
+extraovate
+rouvillite
+anagenesis
+hydrologic
+lifesaving
+shadowland
+laboratory
+permeative
+copatentee
+schizocyte
+perihelial
+approacher
+cancrizans
+prosthetic
+barramundi
diff --git a/tests/test_spell_backward.py b/tests/test_spell_backward.py
index 022b8228..4495c383 100644
--- a/tests/test_spell_backward.py
+++ b/tests/test_spell_backward.py
@@ -76,7 +76,7 @@ def test_spell_backward_curriculum():
     # test incrementing attribute levels
     curriculum.increment_attr_level("word_len")
     increased_cfg = curriculum.generate_configuration(base_value)
-    assert increased_cfg.min_word_len == 3 and increased_cfg.max_word_len == 4
+    assert increased_cfg.min_word_len == 3 and increased_cfg.max_word_len == 5
 
     # test decrementing attribute levels
     curriculum.decrement_attr_level("word_len")
diff --git a/training/configs/curriculum/spell_backward.yaml b/training/configs/curriculum/spell_backward.yaml
new file mode 100644
index 00000000..2c295ba0
--- /dev/null
+++ b/training/configs/curriculum/spell_backward.yaml
@@ -0,0 +1,211 @@
+hydra:
+  searchpath:
+    - file:///home/ubuntu/verl/verl/trainer/config
+
+defaults:
+  - ppo_trainer
+  - _self_
+
+reasoning_gym:
+  dataset_size: 20000
+  developer_prompt: DeepSeekZero
+  datasets:
+curriculum:
+    enabled: True
+    schedule:
+      automatic: False
+      update_steps: 30 # automatic curriculum updating after 50 steps
+    last_k: 20
+    success_threshold: 0.70
+    failure_threshold: 0.10
+    curricula:
+      spell_backward:
+        attribute_levels:
+          word_len: 0
+reward:
+  use_accuracy: True
+  secondary_rewards:
+   - name: cosine
+     scaling_factor: 0.3
+   - name: format
+     scaling_factor: 0.2
+     kwargs:
+        preappend_thinking_token: False
+
+data:
+  tokenizer: null
+  train_files: train.parquet
+  val_files: test.parquet
+  prompt_key: prompt
+  max_prompt_length: 512
+  max_response_length: 1024
+  train_batch_size: 32
+  val_batch_size: 64
+  return_raw_chat: True
+  return_raw_input_ids: True
+actor_rollout_ref:
+  hybrid_engine: True
+  model:
+    path: Qwen/Qwen2.5-3B-Instruct
+    external_lib: null
+    override_config: { }
+    enable_gradient_checkpointing: True
+    use_remove_padding: True
+  actor:
+    strategy: fsdp  # This is for backward-compatibility
+    ppo_mini_batch_size: 16
+    ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
+    ppo_micro_batch_size_per_gpu: 4
+    use_dynamic_bsz: False
+    ppo_max_token_len_per_gpu: 12288 # n * ${data.max_prompt_length} + ${data.max_response_length}
+    grad_clip: 1.0
+    clip_ratio: 0.2
+    entropy_coeff: 0.001
+    use_kl_loss: True # True for GRPO
+    kl_loss_coef: 0.001 # for grpo
+    kl_loss_type: low_var_kl # for grpo
+    ppo_epochs: 1
+    shuffle: False
+    ulysses_sequence_parallel_size: 1 # sp size
+    optim:
+      lr: 1e-6
+      lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+      min_lr_ratio: null   # only useful for warmup with cosine
+      warmup_style: constant  # select from constant/cosine
+      total_training_steps: 400  # must be override by program
+    fsdp_config:
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+      param_offload: False
+      optimizer_offload: False
+      fsdp_size: -1
+  ref:
+    fsdp_config:
+      param_offload: True
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+    log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
+    log_prob_micro_batch_size_per_gpu: 160
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+    ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
+  rollout:
+    name: vllm
+    temperature: 1.0
+    top_k: -1 # 0 for hf rollout, -1 for vllm rollout
+    top_p: 1
+    prompt_length: ${data.max_prompt_length}  # not use for opensource
+    response_length: ${data.max_response_length}
+    # for vllm rollout
+    dtype: bfloat16 # should align with FSDP
+    gpu_memory_utilization: 0.7
+    ignore_eos: False
+    enforce_eager: True
+    free_cache_engine: True
+    load_format: dummy_dtensor
+    tensor_model_parallel_size: 4
+    max_num_batched_tokens: 12288
+    max_num_seqs: 1024
+    log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
+    log_prob_micro_batch_size_per_gpu: 160
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+    disable_log_stats: True
+    enable_chunked_prefill: True # could get higher throughput
+    # for hf rollout
+    do_sample: True
+    use_fire_sampling: False
+    max_model_len: 12288
+    # number of responses (i.e. num sample times)
+    n: 8 # > 1 for grpo
+    val_kwargs:
+      do_sample: True
+
+algorithm:
+  gamma: 1.0
+  lam: 1.0
+  adv_estimator: grpo
+  kl_penalty: kl  # how to estimate kl divergence
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.001
+verbose: True
+trainer:
+  balance_batch: True
+  total_epochs: 1
+  total_training_steps: 400
+  project_name: rg-test
+  experiment_name: intra_reasoning_algebra_qwen_3b_composite
+  logger: [ 'console', 'wandb' ]
+  val_generations_to_log_to_wandb: 0
+  nnodes: 1
+  n_gpus_per_node: 4
+  save_freq: 100
+  # auto: find the last ckpt to resume. If can't find, start from scratch
+  resume_mode: auto # or auto or resume_path if
+  resume_from_path: False
+  test_freq: 100
+  critic_warmup: 0
+  default_hdfs_dir: null
+  remove_previous_ckpt_in_save: False
+  del_local_ckpt_after_load: False
+  default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
+
+
+critic:
+  strategy: fsdp
+  optim:
+    lr: 1e-5
+    lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+    min_lr_ratio: null   # only useful for warmup with cosine
+    warmup_style: constant  # select from constant/cosine
+    total_training_steps: -1  # must be override by program
+  model:
+    path: ~/models/deepseek-llm-7b-chat
+    tokenizer_path: ${actor_rollout_ref.model.path}
+    override_config: { }
+    external_lib: ${actor_rollout_ref.model.external_lib}
+    enable_gradient_checkpointing: True
+    use_remove_padding: False
+    fsdp_config:
+      param_offload: False
+      optimizer_offload: False
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+      fsdp_size: -1
+  ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
+  ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
+  ppo_micro_batch_size_per_gpu: null
+  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
+  forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
+  use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+  ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
+  forward_max_token_len_per_gpu: ${critic.ppo_max_token_len_per_gpu}
+  ulysses_sequence_parallel_size: 1 # sp size
+  ppo_epochs: ${actor_rollout_ref.actor.ppo_epochs}
+  shuffle: ${actor_rollout_ref.actor.shuffle}
+  grad_clip: 1.0
+  cliprange_value: 0.5
+
+# Reward model not used for GRPO
+reward_model:
+  enable: False
+  strategy: fsdp
+  model:
+    input_tokenizer: ${actor_rollout_ref.model.path}
+    path: ~/models/FsfairX-LLaMA3-RM-v0.1
+    external_lib: ${actor_rollout_ref.model.external_lib}
+    use_remove_padding: False
+    fsdp_config:
+      min_num_params: 0
+      param_offload: False
+      fsdp_size: -1
+  micro_batch_size: null
+  micro_batch_size_per_gpu: null
+  max_length: null
+  ulysses_sequence_parallel_size: 1
+  use_dynamic_bsz: ${critic.use_dynamic_bsz}
+  forward_max_token_len_per_gpu: ${critic.forward_max_token_len_per_gpu}
diff --git a/training/trainers/ray_grpo_trainer.py b/training/trainers/ray_grpo_trainer.py
index 7c414ccb..acbaa740 100644
--- a/training/trainers/ray_grpo_trainer.py
+++ b/training/trainers/ray_grpo_trainer.py
@@ -367,12 +367,11 @@ class RayGRPOTrainer(RayPPOTrainer):
                             if self.global_steps % self.config.curriculum.schedule.update_steps == 0:
                                 self.train_dataset.experiment.update_difficulty(dataset_name, method="increment")
                     else:
-                        print(grouped_scores)
                         for dataset_name in grouped_scores.keys():
                             if (
                                 grouped_scores[dataset_name]["results"] > self.config.curriculum.success_threshold
                             ) and (grouped_scores[dataset_name]["total_samples"] >= self.config.curriculum.last_k):
-                                self.train_dataset.update_experiment_difficulty(dataset_name, method="increment")
+                                self.train_dataset.experiment.update_difficulty(dataset_name, method="increment")
 
                 metrics.update(compute_data_metrics(batch=batch, use_critic=self.use_critic))
                 metrics.update(compute_timing_metrics(batch=batch, timing_raw=timing_raw))