Source code for gratools.gfa_extract

# Standard library imports
from functools import wraps
from pathlib import Path
from typing import Any  # For type hinting kwargs

# Third-party imports
import click
from cloup import command, option, option_group
from cloup.constraints import ErrorFmt, RequireAtLeast

# Local application/library specific imports
from .Gratools import Gratools  # Assuming Gratools is in the same directory or a submodule
from .common_decorators import common_options_decorator, options_for_gfa_handling  # Your custom decorators
from .useful_function import CustomCommand  # Your custom command class

# Constraint for mutually exclusive options in FASTA-related commands
mutually_exclusive_fasta_options = RequireAtLeast(0).rephrased(  # Allow 0 or 1, but not both
    help="""The --samples-list and --all-samples options are mutually exclusive.If neither is provided, sequences might be extracted only for the query sample.""",
    error=f"The following parameters are mutually exclusive:\n{ErrorFmt.param_list}",
)


[docs] def common_extraction_options_decorator(func: Any) -> Any: # Renamed for clarity """ A decorator to apply common option groups for subgraph/sequence extraction commands. Includes options for GFA input, query region, sample selection, and merge distance. """ # This decorator groups common options often needed for extraction tasks. # It uses `options_for_gfa_handling` for GFA path, threads, etc., and adds specific # options for defining the extraction query. @option_group( "Extraction Query Options", # Group name *options_for_gfa_handling, # Assumes this provides --gfa, --threads, --outdir, --suffix, etc. option( "--sample-query", # Changed from --sample for clarity, to distinguish from --samples-list "-sq", # Short option type=str, required=True, help="Name of the primary query sample to define the region.", ), option( "--chrom-query", "-chr", type=str, required=True, help="Name of the chromosome for the query region.", ), option( "--start-query", "-s", type=click.IntRange(min=0), # Start can be 0 (0-based) default=0, show_default=True, help="Start position of the query region on the chromosome (0-based).", ), option( "--stop-query", # Changed from --end "-e", type=click.IntRange(min=1), # End must be at least 1 if start is 0 required=False, # Making it optional, Gratools can default to chrom end help="Stop position of the query region on the chromosome (exclusive). Defaults to chromosome end if not provided.", ), option( "--merge-dist", "-d", type=click.IntRange(min=-1), # Allow -1 for auto-calculation default=-1, # Default to auto-calculate based on query region size show_default=True, help="Merge distance for 'bedtools merge -d'. If -1, uses 10% of query region length. 0 for abutting. See bedtools merge docs.", ), # Mutually exclusive options for selecting additional samples mutually_exclusive_fasta_options( # Applied constraint option( "--samples-list", "-sl", # Short option type=click.Path( exists=True, file_okay=True, dir_okay=False, # Ensure it's a file readable=True, resolve_path=True, path_type=Path, # Converts to Path object ), required=False, # Not always required help="Path to a file listing additional sample names (one per line) to include in the extraction. Mutually exclusive with --all-samples.", ), option( "--all-samples", "-as", is_flag=True, default=False, show_default=True, help="Include all samples from the GFA in the extraction (relative to the query region). Mutually exclusive with --samples-list.", ), ), ) @wraps(func) # Preserves metadata of the wrapped function def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) return wrapper
@command( "extract_subgraph", cls=CustomCommand, short_help="Extracts a subgraph from a GFA file based on a query region.", help=""" This command extracts a specific region from the GFA, defined by a query sample, chromosome, and start/end coordinates. The extracted subgraph, containing all paths traversing this region (for the query sample and any other specified samples), is saved as a new GFA file. Optionally, a corresponding FASTA file of the sequences in the subgraph can be generated. This command relies on a pre-existing GraTools index of the input GFA. For more details, see the full documentation: https://gratools.readthedocs.io/en/latest/commands/extract_subgraph.html """, no_args_is_help=True, ) @common_extraction_options_decorator # Applies all common extraction options @option_group( "Subgraph Specific Output Options", # Options specific to this command option( "--build-fasta/--no-build-fasta", "generate_fasta_for_subgraph", # Explicit dest name is_flag=True, default=False, show_default=True, help="Generate a FASTA file from the sequences within the extracted subgraph.", ), ) @common_options_decorator # Applies global options like verbosity, log_path @click.pass_context def extract_subgraph_command(ctx: click.Context, **kwargs: Any) -> None: """ CLI command to extract a subgraph from a GFA file. Args: ctx (click.Context): The Click context object. **kwargs (Any): Keyword arguments from Click options. """ # Instantiate Gratools with parameters from kwargs # Gratools constructor will use these to set up its state. gratools_instance = Gratools( gfa_path=kwargs.get("gfa_file_path"), threads=kwargs.get("num_threads"), outdir=kwargs.get("output_directory"), suffix=kwargs.get("output_file_suffix"), # Pass user-defined suffix if provided sample_name_query=kwargs.get("sample_query"), chromosome_query=kwargs.get("chrom_query"), start_query=kwargs.get("start_query"), stop_query=kwargs.get("stop_query"), merge=kwargs.get("merge_dist"), meta=kwargs, # For verbosity, log_path, etc. index_links=kwargs.get("index_links", False), # From options_for_gfa_handling or default build_fasta_flag=kwargs.get("generate_fasta_for_subgraph"), # Use dest name ) # Call the main extraction logic in Gratools gratools_instance.extract_subgraph( # Corrected method name samples_list_path=kwargs.get("samples_list"), all_samples_flag=kwargs.get("all_samples") # Use dest name ) @command( "get_fasta", cls=CustomCommand, short_help="Extracts sequences for a specific genomic region in FASTA format.", help=""" Extracts sequences corresponding to a defined genomic region (query sample, chromosome, start/end) for the query sample and any additional specified samples. The output is a FASTA file containing these sequences. This command effectively runs a subgraph extraction focused on sequence output. Relies on a pre-existing GraTools index. For more details, see the full documentation: https://gratools.readthedocs.io/en/latest/commands/get_fasta.html """, no_args_is_help=True, show_constraints=True, # Show constraints like mutually_exclusive_fasta_options ) @common_extraction_options_decorator # Applies all common extraction options @common_options_decorator # Applies global options @click.pass_context def get_fasta_command(ctx: click.Context, **kwargs: Any) -> None: """ CLI command to extract sequences for a specific genomic region in FASTA format. Args: ctx (click.Context): The Click context object. **kwargs (Any): Keyword arguments from Click options. """ gratools_instance = Gratools( gfa_path=kwargs.get("gfa_file_path"), threads=kwargs.get("num_threads"), outdir=kwargs.get("output_directory"), suffix=kwargs.get("output_file_suffix"), sample_name_query=kwargs.get("sample_query"), chromosome_query=kwargs.get("chrom_query"), start_query=kwargs.get("start_query"), stop_query=kwargs.get("stop_query"), merge=kwargs.get("merge_dist"), meta=kwargs, index_links=kwargs.get("index_links", False), build_fasta_flag = True, # Always true for get_fasta command ) # For "get_fasta", build_fasta_flag is implicitly True. gratools_instance.extract_subgraph( samples_list_path=kwargs.get("samples_list"), all_samples_flag=kwargs.get("all_samples") )