fix(training): disable DataLoader workers in subprocess training
DataLoader worker subprocesses crash inside Docker due to multiprocessing fork restrictions. Pass --workers 0 to both ketos train and ketos segtrain so data loading runs in the main process. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -366,7 +366,8 @@ async def train_model(
|
|||||||
os.makedirs(checkpoint_dir, exist_ok=True)
|
os.makedirs(checkpoint_dir, exist_ok=True)
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"ketos", "train",
|
"ketos", "--workers", "0",
|
||||||
|
"train",
|
||||||
"-f", "path",
|
"-f", "path",
|
||||||
"-o", checkpoint_dir,
|
"-o", checkpoint_dir,
|
||||||
"-q", "fixed",
|
"-q", "fixed",
|
||||||
@@ -456,7 +457,8 @@ async def segtrain_model(
|
|||||||
os.makedirs(checkpoint_dir, exist_ok=True)
|
os.makedirs(checkpoint_dir, exist_ok=True)
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"ketos", "segtrain",
|
"ketos", "--workers", "0",
|
||||||
|
"segtrain",
|
||||||
"-o", checkpoint_dir,
|
"-o", checkpoint_dir,
|
||||||
"-q", "fixed",
|
"-q", "fixed",
|
||||||
"-N", "50",
|
"-N", "50",
|
||||||
|
|||||||
Reference in New Issue
Block a user