Parameter Tuning Guide • TissueField

library(TissueField)
library(sf)
library(ggplot2)
library(patchwork)
set.seed(2024)

This vignette covers the parameters that most strongly affect the output of estimate_concentration_field(). They are ordered from most impactful to least:

Diffusion length $L$ — controls spatial scale (shape)
$D$ and $\lambda$ independently — control amplitude
weight_col (UMI weighting) — rescales source strength per transcript
include_external — whether transcripts outside the mask contribute
grid_resolution — accuracy vs. speed
normalize and log_transform — output rescaling

Shared setup

# Kidney-bean mask
in_bean <- function(x, y)
  (x/9)^2 + (y/7)^2 < 1 & sqrt((x - 4.5)^2 + y^2) >= 3.5

cands <- data.frame(x = runif(20000, -11, 11), y = runif(20000, -9, 9))
tissue_pts <- cands[in_bean(cands$x, cands$y), ][1:3000, ]

if (requireNamespace("TissueMask", quietly = TRUE)) {
  mask <- TissueMask::fit_spatial_mask(
    tissue_pts, method = "raster",
    raster_resolution = 200L, raster_sigma = 0.8,
    raster_threshold  = 0.15, verbose = FALSE
  )
} else {
  pts_sf <- st_as_sf(tissue_pts, coords = c("x", "y"))
  mask   <- st_convex_hull(st_union(pts_sf))
}
mu <- st_union(mask)

inside_fn <- function(d)
  as.logical(st_within(st_as_sf(d, coords = c("x","y")), mu, sparse = FALSE)[,1])

tc_cl <- data.frame(x = rnorm(280, -3, 1.2), y = rnorm(280, 3.5, 0.9))
tc_bg_all <- data.frame(x = runif(600, -10, 10), y = runif(600, -8, 8))
tc <- rbind(tc_cl[inside_fn(tc_cl), ],
            tc_bg_all[inside_fn(tc_bg_all), ][1:80, ])
tc$umi <- c(rpois(sum(inside_fn(tc_cl)), 18),
            rpois(80, 3))

1. Diffusion length L

$L = \sqrt{D/\lambda}$ is the single most important parameter. It determines how far a transcript’s influence extends in the tissue.

Use diffusion_length to set $L$ directly, bypassing the $D$ / $\lambda$ decomposition:

L_vals <- c(0.5, 1.5, 3.5, 7.0, 12.0, 20.0)

L_plots <- lapply(L_vals, function(Lv) {
  r  <- estimate_concentration_field(
    mask, tc, diffusion_length = Lv, D = 1,
    method = "fd", fd_solver = "direct",
    grid_resolution = 128L, verbose = FALSE
  )
  df  <- field_to_df(r)
  rng <- range(df$field, na.rm = TRUE)
  df$fn <- (df$field - rng[1]) / diff(rng)
  ggplot(df, aes(x = x, y = y, fill = fn)) +
    geom_raster(interpolate = TRUE) +
    scale_fill_viridis_c(option = "magma", limits = c(0, 1),
                          name = "Norm.\nConc.", na.value = "transparent") +
    coord_equal() + theme_minimal(base_size = 8) +
    labs(title = sprintf("L = %g", Lv),
         subtitle = sprintf("lambda = %.4f", 1/Lv^2),
         x = NULL, y = NULL)
})

wrap_plots(L_plots, nrow = 2) +
  plot_annotation(
    title    = "Diffusion Length Sweep (normalised per panel)",
    subtitle = "Small L: tight peaks  |  Large L: tissue-wide field",
    theme    = theme(plot.title    = element_text(face = "bold", size = 12),
                     plot.subtitle = element_text(size = 9, color = "grey40"))
  )

Peak concentration vs diffusion length (D=1 throughout)
L	lambda	peak_concentration
0.5	4.00000	9.228
1.5	0.44444	32.900
3.5	0.08163	57.040
7.0	0.02041	69.230
12.0	0.00694	73.240
20.0	0.00250	74.750

How to choose L in practice:

Scenario	Suggested L
Tight spatial domains, distinguish adjacent clusters	10–20% of inter-cluster distance
Moderate-range signalling (e.g. paracrine cytokines)	20–50% of tissue diameter
Tissue-wide gradient (e.g. oxygen, morphogens)	> 50% of tissue diameter
Quick exploration	Try L = 1, 3, 10 and compare

Shortcut: use sweep_diffusion_length(c(1, 3, 10), mask, tc) to get a combined data frame for all three values.

2. D and lambda: shape vs amplitude

The spatial shape of the field depends only on $L = \sqrt{D/\lambda}$ . Changing $D$ and $\lambda$ while keeping their ratio constant produces identical spatial patterns — only the amplitude changes, scaling as $C \propto 1/D$ .

# L = 2 throughout: D * lambda = 1/L^2 = 0.25
dl_cases <- list(
  list(D = 0.1,  lam = 0.025, lbl = "D=0.1,  lam=0.025"),
  list(D = 1.0,  lam = 0.25,  lbl = "D=1,    lam=0.25"),
  list(D = 5.0,  lam = 1.25,  lbl = "D=5,    lam=1.25"),
  list(D = 20.0, lam = 5.0,   lbl = "D=20,   lam=5")
)

dl_plots <- lapply(dl_cases, function(co) {
  r  <- estimate_concentration_field(
    mask, tc, D = co$D, lambda = co$lam,
    method = "fd", fd_solver = "direct",
    grid_resolution = 128L, verbose = FALSE
  )
  pk <- signif(max(r$field, na.rm = TRUE), 3)
  df <- field_to_df(r)
  ggplot(df, aes(x = x, y = y, fill = field)) +
    geom_raster(interpolate = TRUE) +
    scale_fill_viridis_c(option = "magma", name = "Conc.",
                          na.value = "transparent") +
    coord_equal() + theme_minimal(base_size = 8) +
    labs(title    = co$lbl,
         subtitle = sprintf("L=2 fixed | peak=%g", pk),
         x = NULL, y = NULL)
})

wrap_plots(dl_plots, nrow = 1) +
  plot_annotation(
    title    = "Fixed L=2, Varying D and lambda",
    subtitle = "SHAPE is identical. Amplitude scales as 1/D.",
    theme    = theme(plot.title    = element_text(face = "bold", size = 12),
                     plot.subtitle = element_text(size = 9, color = "grey40"))
  )

Practical implication: When fitting to data, first determine $L$ (the spatial scale), then fix $D$ based on biophysical priors (or leave at 1 if only relative concentrations matter). $\lambda$ is then determined as $\lambda = D/L^2$ .

3. UMI weighting (weight_col)

Each transcript can carry a per-molecule count (UMI) that scales its contribution to the source term. Pass the column name to weight_col:

# Without weighting
res_unweighted <- estimate_concentration_field(
  mask, tc, D = 1, lambda = 0.2,
  method = "fd", grid_resolution = 128L, verbose = FALSE
)

# With UMI weighting
res_weighted <- estimate_concentration_field(
  mask, tc, D = 1, lambda = 0.2, weight_col = "umi",
  method = "fd", grid_resolution = 128L, verbose = FALSE
)

panel2 <- function(result, title = "") {
  df <- field_to_df(result)
  ggplot(df, aes(x = x, y = y, fill = field)) +
    geom_raster(interpolate = TRUE) +
    scale_fill_viridis_c(option = "magma", name = "Conc.",
                          na.value = "transparent") +
    coord_equal() + theme_minimal(base_size = 9) +
    labs(title = title, x = "X", y = "Y") +
    theme(plot.title = element_text(face = "bold"))
}

(panel2(res_unweighted, "Unweighted (all transcripts = 1)") +
 panel2(res_weighted,   "UMI-weighted (cluster UMI ~18, bg ~3)")) +
plot_annotation(
  title = "Effect of UMI Weighting",
  theme = theme(plot.title = element_text(face = "bold", size = 12))
)

Peak concentration and total source strength
Version	Peak	Total_source
Unweighted	44.76	359
UMI-weighted	794.80	5273

When to use weight_col: When your data has UMI counts that reflect true mRNA abundance differences. Unweighted fields treat each detected molecule equally; weighted fields amplify contribution from high-UMI transcripts (often genuine high-expressers).

4. External transcripts (include_external)

By default, transcripts outside the mask are discarded. Setting include_external = TRUE includes them as sources, which can create elevated concentration near the tissue boundary from external signal.

# Add some transcripts outside the mask
tc_ext <- rbind(tc,
  data.frame(x = c(-12, -11, 13), y = c(0, 5, -2), umi = c(50, 40, 30)))

res_excl <- estimate_concentration_field(
  mask, tc_ext, D = 1, lambda = 0.2,
  include_external = FALSE,
  grid_resolution = 128L, verbose = FALSE
)

res_incl <- estimate_concentration_field(
  mask, tc_ext, D = 1, lambda = 0.2,
  include_external = TRUE,
  grid_resolution = 128L, verbose = FALSE
)

(panel2(res_excl, "include_external = FALSE (default)") +
 panel2(res_incl, "include_external = TRUE")) +
plot_annotation(
  title    = "Effect of include_external",
  subtitle = "External sources can elevate concentration near edges",
  theme    = theme(plot.title    = element_text(face = "bold", size = 12),
                   plot.subtitle = element_text(size = 9, color = "grey40"))
)

5. Grid resolution

grid_resolution sets the number of cells per axis ( $N \times N$ ). Higher resolution gives smoother, more accurate fields but increases memory and solve time.

res_times <- sapply(c(64L, 128L, 256L, 512L), function(N) {
  t0 <- proc.time()["elapsed"]
  estimate_concentration_field(
    mask, tc, D = 1, lambda = 0.2,
    method = "fd", fd_solver = "direct",
    grid_resolution = N, verbose = FALSE
  )
  proc.time()["elapsed"] - t0
})

Wall time vs grid resolution (direct solver)
N	Grid_cells	Time_s
64	4096	0.02
128	16384	0.13
256	65536	1.03
512	262144	6.89

res_low  <- estimate_concentration_field(
  mask, tc, D=1, lambda=0.2, method="fd",
  grid_resolution = 32L, verbose = FALSE
)
res_med  <- estimate_concentration_field(
  mask, tc, D=1, lambda=0.2, method="fd",
  grid_resolution = 128L, verbose = FALSE
)
res_high <- estimate_concentration_field(
  mask, tc, D=1, lambda=0.2, method="fd",
  grid_resolution = 256L, verbose = FALSE
)

(panel2(res_low,  "N = 32") +
 panel2(res_med,  "N = 128") +
 panel2(res_high, "N = 256")) +
plot_annotation(
  title = "Grid Resolution Comparison",
  theme = theme(plot.title = element_text(face = "bold", size = 12))
)

Practical guidance:

N = 64--128: fast exploration, reasonable accuracy
N = 256: good balance; default
N = 512+: high-resolution publication figures; use fd_solver = "iterative" or method = "green" to manage memory

6. Normalise and log-transform

Two output rescaling options:

normalize = TRUE: linearly rescales the field to $[0, 1]$ . Useful when comparing fields across genes with different expression levels.
log_transform = TRUE: applies log1p() after solving. Compresses wide dynamic range (useful for highly focal sources).

res_raw  <- estimate_concentration_field(
  mask, tc, D = 1, lambda = 0.2, grid_resolution = 128L, verbose = FALSE
)
res_norm <- estimate_concentration_field(
  mask, tc, D = 1, lambda = 0.2, normalize = TRUE,
  grid_resolution = 128L, verbose = FALSE
)
res_log  <- estimate_concentration_field(
  mask, tc, D = 1, lambda = 0.2, log_transform = TRUE,
  grid_resolution = 128L, verbose = FALSE
)

(panel2(res_raw,  "Raw field") +
 panel2(res_norm, "normalize = TRUE (field in [0,1])") +
 panel2(res_log,  "log_transform = TRUE (log1p applied)")) +
plot_annotation(
  title = "Output Rescaling Options",
  theme = theme(plot.title = element_text(face = "bold", size = 12))
)

Field range under different output options
Setting	Min	Max
Raw	0.03081	44.76265
normalize=TRUE	0.00000	1.00000
log_transform=TRUE	0.03034	3.82347

Parameter quick-reference

Parameter	Controls	Default
diffusion_length	Spatial scale of field (shape)	NULL (computed from D and lambda)
D	Amplitude (C proportional to 1/D)	1.0
lambda	Clearance rate (determines L with D)	0.1
weight_col	Per-transcript source weight	NULL
include_external	Whether to use off-mask transcripts	FALSE
grid_resolution	Grid density (N x N cells)	256
normalize	Rescale output to [0, 1]	FALSE
log_transform	Apply log1p() to output	FALSE
boundary_condition	Dirichlet (absorbing) or Neumann (reflecting)	dirichlet
fd_solver	Direct (LU) or iterative (SOR)	direct