pysradb package#

Submodules#

pysradb.basedb module#

class pysradb.basedb.BASEdb(sqlite_file)[source]#

Bases: object

all_row_counts()[source]#

Get row counts of all tables in the db file.

Returns:
row_counts: DataFrame

A dataframe with table names and corresponding row count.

close()[source]#

Close sqlite connection.

desc_table(table)[source]#

Describe all fields in a table.

Parameters:
table: string

Table name. See list_tables for getting all table names

Returns:
table_desc: DataFrame

A DataFrame with field name and its schema description

get_row_count(table)[source]#

Get row counts for a table.

Parameters:
table: string

Table name. See list_tables for getting all table names

Returns:
row_count: int

Number of rows in table

list_fields(table)[source]#

List all fields in a given table.

Parameters:
table: string

Table name. See list_tables for getting all table names

Returns:
field_list: list

A list of field names for the table

list_tables()[source]#

List all tables in the sqlite file.

Returns:
table_list: list

List of all table names

open()[source]#

Open sqlite connection.

query(sql_query)[source]#

Run SQL query.

Parameters:
sql_query: string

SQL query string

Returns:
results: DataFrame

Query results formatted as dataframe

pysradb.cli module#

Command line interface for pysradb

class pysradb.cli.ArgParser(prog=None, usage=None, description=None, epilog=None, parents=[], formatter_class=<class 'argparse.HelpFormatter'>, prefix_chars='-', fromfile_prefix_chars=None, argument_default=None, conflict_handler='error', add_help=True, allow_abbrev=True)[source]#

Bases: ArgumentParser

error(message: string)[source]#

Prints a usage message incorporating the message to stderr and exits.

If you override this in a subclass, it should not return – it should either exit or raise an exception.

class pysradb.cli.CustomFormatterArgP(prog, indent_increment=2, max_help_position=24, width=None)[source]#

Bases: ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter

pysradb.cli.download(out_dir, srx, srp, geo, skip_confirmation, col='public_url', use_ascp=False, threads=1)[source]#
pysradb.cli.get_geo_search_info()[source]#
pysradb.cli.gse_to_gsm(gse_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.gse_to_srp(gse_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.gsm_to_gse(gsm_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.gsm_to_srp(gsm_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.gsm_to_srr(gsm_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.gsm_to_srs(gsm_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.gsm_to_srx(gsm_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.metadata(srp_id, assay, desc, detailed, expand, saveto)[source]#
pysradb.cli.parse_args(args=None)[source]#

Argument parser

pysradb.cli.pretty_print_df(df, include_header=True)[source]#
pysradb.cli.search(saveto, db, verbosity, return_max, fields)[source]#
pysradb.cli.srp_to_gse(srp_id, saveto, detailed, desc, expand)[source]#
pysradb.cli.srp_to_srr(srp_id, saveto, detailed, desc, expand)[source]#
pysradb.cli.srp_to_srs(srp_id, saveto, detailed, desc, expand)[source]#
pysradb.cli.srp_to_srx(srp_id, saveto, detailed, desc, expand)[source]#
pysradb.cli.srr_to_gsm(srr_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srr_to_srp(srr_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srr_to_srs(srr_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srr_to_srx(srr_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srs_to_gsm(srs_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srs_to_srx(srs_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srx_to_srp(srx_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srx_to_srr(srx_ids, saveto, detailed, desc, expand)[source]#
pysradb.cli.srx_to_srs(srx_ids, saveto, detailed, desc, expand)[source]#

pysradb.download module#

Utility function to download data

pysradb.download.download_file(url, file_path, md5_hash=None, timeout=10, block_size=1048576, show_progress=False)[source]#

Resumable download. Expect the server to support byte ranges.

Parameters:
url: string

URL

file_path: string

Local file path to store the downloaded file

md5_hash: string

Expected MD5 string of downloaded file

timeout: int

Seconds to wait before terminating request

block_size: int

Chunkx of bytes to read (default: 1024 * 1024 = 1MB)

show_progress: bool

Show progress bar

pysradb.download.get_file_size(row, url_col)[source]#

Get size of file to be downloaded.

Parameters:
row: pd.DataFrame row
url_col: str

url_column

Returns:
content_length: int
pysradb.download.md5_validate_file(file_path, md5_hash)[source]#

Check file containt against an MD5.

Parameters:
file_path: string

Path to file

md5_hash: string

Expected md5 hash

Returns:
valid: bool

True if expected and observed md5 match

pysradb.download.millify(n)[source]#

Convert integer to human readable format.

Parameters:
nint
Returns:
millidxstr

Formatted integer

pysradb.exceptions module#

This file contains custom Exceptions for pysradb

exception pysradb.exceptions.IncorrectFieldException[source]#

Bases: Exception

Exception raised when the user enters incorrect inputs for a flag.

exception pysradb.exceptions.MissingQueryException[source]#

Bases: Exception

Exception raised when the user did not supply any query fields.

Attributes:
message: string

Error message for this Exception

pysradb.filter_attrs module#

pysradb.filter_attrs.expand_sample_attribute_columns(metadata_df)[source]#

Expand sample attribute columns to individual columns.

Since the sample_attribute column content can be different for differnt rows even if coming from the same project (SRP), we explicitly iterate through the rows to first determine what additional columns need to be created.

Parameters:
metadata_df: DataFrame

Dataframe as obtained from sra_metadata or equivalent

Returns:
expanded_df: DataFrame

Dataframe with additionals columns pertaining to sample_attribute appended

pysradb.filter_attrs.guess_cell_type(sample_attribute)[source]#

Guess possible cell line from sample_attribute data.

Parameters:
sample_attribute: string

sample_attribute string as in the metadata column

Returns:
cell_type: string

Possible cell type of sample. Returns None if no match found.

pysradb.filter_attrs.guess_strain_type(sample_attribute)[source]#

Guess strain type from sample_attribute data.

Parameters:
sample_attribute: string

sample_attribute string as in the metadata column

Returns:
strain_type: string

Possible cell type of sample. Returns None if no match found.

pysradb.filter_attrs.guess_tissue_type(sample_attribute)[source]#

Guess tissue type from sample_attribute data.

Parameters:
sample_attribute: string

sample_attribute string as in the metadata column

Returns:
tissue_type: string

Possible cell type of sample. Returns None if no match found.

pysradb.geodb module#

Methods to interact with SRA

class pysradb.geodb.GEOdb(sqlite_file)[source]#

Bases: BASEdb

geo_convert(from_acc)[source]#

Convert one GEO accession to other.

Parameters:
from_acc: string

GPL/GSE/GSM accession ID

Returns:
mapping_df: DataFrame

A dataframe with relevant mappings

gse_metadata(gse)[source]#

Get metadata for GSE ID.

Parameters:
gse: string

GSE ID

Returns:
metadata_df: DataFrame

A dataframe with relevant fields

gse_to_gsm(gse)[source]#

Fetch GSMs for a GSE.

Parameters:
gse: string

GSE ID

Returns:
mapping_df: DataFrame

A dataframe with relevant mappings

gsm_metadata(gsm)[source]#

Get metadata for GSM ID.

Parameters:
gsm: string

GSM ID

Returns:
metadata_df: DataFrame

A dataframe with relevant fields

gsm_to_gse(gsm)[source]#

Fetch GSE for a GSM.

Parameters:
gsm: string

GSM ID

Returns:
mapping_df: DataFrame

A dataframe with relevant mappings

guess_srp_from_gse(gse)[source]#

Convert GSE to SRP id.

Parameters:
gse: string

GSE ID

Returns:
srp: string

SRP ID

pysradb.geodb.download_geodb_file(download_dir='/github/workspace/docs', overwrite=True)[source]#

Download GEOmetadb.sqlite file.

Parameters:
download_dir: string

Directory to download SRAmetadb.sqlite

overwrite: bool

overwrite existing file(s). Set to True by default.

pysradb.search module#

This file contains the search classes for the search feature.

class pysradb.search.EnaSearch(verbosity=2, return_max=20, query=None, accession=None, organism=None, layout=None, mbases=None, publication_date=None, platform=None, selection=None, source=None, strategy=None, title=None, suppress_validation=False)[source]#

Bases: QuerySearch

Subclass of QuerySearch that implements search via querying ENA API

See also

QuerySearch

Superclass of EnaSearch

Methods

search()

sends the user query via requests to ENA API and stores search result as an instance attribute in the form of a pandas dataframe

show_result_statistics()

Shows summary information about search results.

visualise_results()

Generate graphs that visualise the search results.

get_plot_objects():

Get the plot objects for plots generated.

_format_query_string()

formats the input user query into a string

_format_request()

formats the request payload

_format_result(content)

formats the search query output and converts it into a pandas dataframe

search()[source]#
class pysradb.search.GeoSearch(verbosity=2, return_max=20, query=None, accession=None, organism=None, layout=None, mbases=None, publication_date=None, platform=None, selection=None, source=None, strategy=None, title=None, geo_query=None, geo_dataset_type=None, geo_entry_type=None, suppress_validation=False)[source]#

Bases: SraSearch

Subclass of SraSearch that can query both GEO DataSets and SRA API.

See also

GeoSearch.info

GeoSearch usage details

SraSearch

Superclass of GeoSearch

QuerySearch

Superclass of SraSearch

Methods

search()

sends the user query via requests to SRA, GEO DataSets, or both depending on the search query. If query is sent to both APIs, the intersection of the two sets of query results are returned.

show_result_statistics()

Shows summary information about search results.

visualise_results()

Generate graphs that visualise the search results.

get_plot_objects():

Get the plot objects for plots generated.

_format_geo_query_string()

formats the GEO DataSets portion of the input user query into a string.

_format_geo_request()

formats the GEO DataSets request payload

_format_result(content)

formats the search query output and converts it into a pandas dataframe

classmethod info()[source]#

Information on how to use GeoSearch.

Displays information on how to query GEO DataSets / SRA via GeoSearch, including accepted inputs for geo_query, geo_dataset_type and geo_entry_type.

Returns:
info: str

Information on how to use GeoSearch.

search()[source]#
class pysradb.search.QuerySearch(verbosity=2, return_max=20, query=None, accession=None, organism=None, layout=None, mbases=None, publication_date=None, platform=None, selection=None, source=None, strategy=None, title=None, suppress_validation=False)[source]#

Bases: object

This is the base class for the search feature.

This class takes as input the user’s search query, which has been tokenized by the ArgParser. The query will be sent to either SRA or ENA depending on the user’s input, and the results will be returned as a pandas dataframe.

Parameters:
verbosityinteger

The level of details of the search result.

return_maxint

The maximum number of entries to be returned.

querystr

The main query string.

accessionstr

A relevant study / experiment / sample / run accession number.

organismstr

Scientific name of the sample organism

layoutstr

Library layout. Possible inputs: single, paired

mbasesint

Size of the sample of interest rounded to the nearest megabase.

publication_datestr

The publication date of the run in the format dd-mm-yyyy. If a date range is desired, input should be in the format of dd-mm-yyyy:dd-mm-yyyy

platformstr

Sequencing platform used for the run. Some possible inputs include: illumina, ion torrent, oxford nanopore

selectionstr

Library selection. Some possible inputs: cdna, chip, dnase, pcr

sourcestr

Library source. Some possible inputs: genomic, metagenomic, transcriptomic

strategystr

Library Preparation strategy. Some possible inputs: wgs, amplicon, rna seq

titlestr

Title of the experiment associated with the run

suppress_validation: bool

Defaults to False. If this is set to True, the user input format checks will be skipped. Setting this to True may cause the program to behave in unexpected ways, but allows the user to search queries that does not pass the format check.

Attributes:
self.df: Pandas DataFrame

The search result belonging to this search instance

Methods

get_df()

Returns the dataframe storing this search result.

search()

Executes the search.

show_result_statistics()

Shows summary information about search results.

visualise_results()

Generate graphs that visualise the search results.

get_plot_objects():

Get the plot objects for plots generated.

get_df()[source]#

Getter for the search result dataframe.

get_plot_objects()[source]#

Get the plot objects for plots generated.

search()[source]#
show_result_statistics()[source]#

Shows search result statistics.

visualise_results(graph_types=('all',), show=False, saveto='./search_plots/')[source]#

Generate graphs that visualise the search results.

This method will only work if the optional dependency, matplotlib, is installed in the system.

Parameters:
graph_typestuple

tuple containing strings representing types of graphs to generate. Possible strings: all, daterange, organism, source, selection, platform, basecount

savetostr

directory name where the generated graphs are saved.

showbool

Whether plotted graphs are immediately shown.

class pysradb.search.SraSearch(verbosity=2, return_max=20, query=None, accession=None, organism=None, layout=None, mbases=None, publication_date=None, platform=None, selection=None, source=None, strategy=None, title=None, suppress_validation=False)[source]#

Bases: QuerySearch

Subclass of QuerySearch that implements search by querying NCBI Entrez API

See also

QuerySearch

Superclass of SraSearch

Methods

search()

sends the user query via requests to NCBI Entrez API and returns search results as a pandas dataframe.

show_result_statistics()

Shows summary information about search results.

visualise_results()

Generate graphs that visualise the search results.

get_plot_objects():

Get the plot objects for plots generated.

get_uids():

Get NCBI uids retrieved during this search query.

_format_query_string()

formats the input user query into a string

_format_request()

formats the request payload

_format_result(content)

formats the search query output.

get_uids()[source]#

Get NCBI uids retrieved during this search query.

Note: There is a chance that some uids retrieved do not appear in the search result output (Refer to #88)

search()[source]#

pysradb.sradb module#

Methods to interact with SRA

class pysradb.sradb.SRAdb(sqlite_file)[source]#

Bases: BASEdb

download(srp=None, df=None, url_col='public_url', out_dir=None, filter_by_srx=[], use_ascp=False, ascp_dir=None, ascp_bin=None, skip_confirmation=False, threads=1)[source]#

Download SRA files.

Parameters:
srp: string

SRP ID (optional)

df: Dataframe

A dataframe as obtained from sra_metadata

url_col: string

Column of df to use for downloading

out_dir: string

Directory location for download

filter_by_srx: list

List of SRX ids to filter

protocol: string

[‘fasp’/’ftp’] fasp => faster download, ftp => slower

ascp_dir: string

Location of ascp directory

gse_to_gsm(gses, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert GSE to GSM

Parameters:
gses: string or list

List of GSE ID

Returns:
gse_to_gsm_df: DataFrame
gse_to_srp(gses, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRP to GSE

Parameters:
gses: string or list

List of GSE ID

Returns:
gse_to_srp_df: DataFrame
gsm_to_gse(gsms, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert GSM to GSE

Parameters:
gsms: string or list

List of GSM ID

Returns:
gsm_to_gse_df: DataFrame
gsm_to_srp(gsms, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert GSM to SRP.

Parameters:
gsms: string or list

List of GSM ID

Returns:
gsm_to_srp_df: DataFrame
gsm_to_srr(gsms, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert GSMs to SRR.

Parameters:
gsms: string or list

List of GSM id

sample_attribute: bool

Include sample_attribute column

Returns:
gsm_to_srr_df: DataFrame

DataFrame with two columns for GSM/SRR

gsm_to_srs(gsms, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert GSM to SRS.

Parameters:
gsms: list or string

List of gsms

Returns:
gsm_to_srs_df: DataFrame
gsm_to_srx(gsms, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert GSM to SRX.

Parameters:
srx: string

SRX ID

Returns:
srs_to_srx_df: DataFrame
search_by_expt_id(srx)[source]#

Search for a SRX/GSM id in the experiments.

Parameters:
srx: string

SRX (experiment_accession) ID

Returns:
results: dict

Dictionary with relevant hits

search_sra(search_str, out_type=['study_accession', 'experiment_accession', 'sample_accession', 'run_accession'], assay=False, sample_attribute=False, detailed=False, expand_sample_attributes=False, output_read_lengths=False)[source]#

Search SRA for any search term.

Parameters:
search_str: string

SQL like text string to search. SQL like text => For example, terms in quotes “” enforce an exact search.

Returns:
query_df: DataFrame

Dataframe with relevant query results

sra_metadata(acc, out_type=['study_accession', 'experiment_accession', 'sample_accession', 'run_accession'], assay=False, sample_attribute=False, detailed=False, expand_sample_attributes=False, output_read_lengths=False, acc_is_searchstr=False)[source]#

Get metadata for the provided SRA accession.

Parameters:
acc: string or list

SRA accession ID

out_type: list

List of columns to output

assay: bool

True if assay should be outputted

sample_attribute: bool

True if sample_attribute should be outputted

detailed: bool

True if full metadata tables should be outputted

expand_sample_attributes: bool

Should sample_attribute column be expanded?

output_read_lengths: bool

True if read lengths should be calculated

acc_is_searchstr: bool

True if acc is a search string

Returns:
metadata_df: DataFrame

A dataframe with all relevant columns

srp_to_gse(srp, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRP to GSE

Parameters:
srp: string

SRP ID

Returns:
srp_to_srr_df: DataFrame
srp_to_srr(srp, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRP to SRR.

Parameters:
srp: string

SRP ID

Returns:
srp_to_srr_df: DataFrame
srp_to_srs(srp, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRP to SRS.

Parameters:
srp: string

SRP ID

Returns:
srp_to_srs_df: DataFrame

DataFrame with two columns for SRS

srp_to_srx(srp, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRP to SRX/SRR.

Parameters:
srp: string

SRP ID

Returns:
srp_to_srx_df: DataFrame

DataFrame with two columns for SRX/SRR

srr_to_gsm(srrs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRR to GSM

Parameters:
gses: string or list

List of SRR

Returns:
srr_to_gsm_df: DataFrame
srr_to_srp(srrs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRR to SRP.

Parameters:
srr: list of string

List of SRR IDs

Returns:
srr_to_srp_df: DataFrame
srr_to_srs(srrs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRR to SRS.

Parameters:
srr: list of string

List of SRR IDs

Returns:
srp_to_srs_df: DataFrame
srr_to_srx(srrs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRR to SRX.

Parameters:
srrs: string or list

List of SRR id

sample_attribute: bool

Include sample_attribute column

Returns:
srr_to_srx_df: DataFrame

DataFrame with two columns for SRX/SRR

srs_to_gsm(srss, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRS to GSM.

Parameters:
srss: list or string

List of SRS ID

Returns:
srs_to_gsm_df: DataFrame
srs_to_srx(srss, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRS to SRX.

Parameters:
srx: string

SRX ID

Returns:
srs_to_srx_df: DataFrame
srx_to_srp(srxs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRXs to SRP.

Parameters:
srxs: string or list

List of SRX id

sample_attribute: bool

Include sample_attribute column

Returns:
srx_to_srp_df: DataFrame

DataFrame with two columns for SRX

srx_to_srr(srxs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRXs to SRR/SRP.

Parameters:
srxs: string or list

List of SRX id

sample_attribute: bool

Include sample_attribute column

Returns:
srx_to_srp_df: DataFrame

DataFrame with two columns for SRX/SRR

srx_to_srs(srxs, sample_attribute=False, detailed=False, expand_sample_attributes=False)[source]#

Convert SRX to SRS.

Parameters:
srx: string

SRX ID

Returns:
srp_to_srs_df: DataFrame
pysradb.sradb.download_sradb_file(download_dir='/github/workspace/docs', overwrite=True, keep_gz=False)[source]#

Download SRAdb.sqlite file.

Parameters:
download_dir: string

Directory to download SRAmetadb.sqlite

overwrite: bool

overwrite existing file(s). Set to True by default.

keep_gz: bool

Delete .gz file after extraction is complete

pysradb.sraweb module#

Utilities to interact with SRA online

class pysradb.sraweb.SRAweb(api_key=None)[source]#

Bases: SRAdb

close()[source]#

Close sqlite connection.

create_esummary_params(esearchresult, db='sra')[source]#
fetch_ena_fastq(srp)[source]#

Fetch FASTQ records from ENA (EXPERIMENTAL)

Parameters:
srp: string

Srudy accession

Returns:
srr_url: list

List of SRR fastq urls

fetch_gds_results(gse, **kwargs)[source]#
static format_xml(string)[source]#

Create a fake root to make ‘string’ a valid xml

Parameters:
string: str
Returns:
xml: str
get_efetch_response(db, term, usehistory='y')[source]#
get_esummary_response(db, term, usehistory='y')[source]#
gse_to_gsm(gse, **kwargs)[source]#

Convert GSE to GSM

Parameters:
gses: string or list

List of GSE ID

Returns:
gse_to_gsm_df: DataFrame
gse_to_srp(gse, **kwargs)[source]#

Convert SRP to GSE

Parameters:
gses: string or list

List of GSE ID

Returns:
gse_to_srp_df: DataFrame
gsm_to_gse(gsm, **kwargs)[source]#

Convert GSM to GSE

Parameters:
gsms: string or list

List of GSM ID

Returns:
gsm_to_gse_df: DataFrame
gsm_to_srp(gsm, **kwargs)[source]#

Convert GSM to SRP.

Parameters:
gsms: string or list

List of GSM ID

Returns:
gsm_to_srp_df: DataFrame
gsm_to_srr(gsm, **kwargs)[source]#

Convert GSMs to SRR.

Parameters:
gsms: string or list

List of GSM id

sample_attribute: bool

Include sample_attribute column

Returns:
gsm_to_srr_df: DataFrame

DataFrame with two columns for GSM/SRR

gsm_to_srs(gsm, **kwargs)[source]#

Get SRS for a GSM

gsm_to_srx(gsm, **kwargs)[source]#

Get SRX for a GSM

search(*args, **kwargs)[source]#
sra_metadata(srp, sample_attribute=False, detailed=False, expand_sample_attributes=False, output_read_lengths=False, **kwargs)[source]#

Get metadata for the provided SRA accession.

Parameters:
acc: string or list

SRA accession ID

out_type: list

List of columns to output

assay: bool

True if assay should be outputted

sample_attribute: bool

True if sample_attribute should be outputted

detailed: bool

True if full metadata tables should be outputted

expand_sample_attributes: bool

Should sample_attribute column be expanded?

output_read_lengths: bool

True if read lengths should be calculated

acc_is_searchstr: bool

True if acc is a search string

Returns:
metadata_df: DataFrame

A dataframe with all relevant columns

srp_to_gse(srp, **kwargs)[source]#

Get GSE for a SRP

srp_to_srr(srp, **kwargs)[source]#

Get SRR for a SRP

srp_to_srs(srp, **kwargs)[source]#

Get SRS for a SRP

srp_to_srx(srp, **kwargs)[source]#

Get SRX for a SRP

srr_to_gsm(srr, **kwargs)[source]#

Get GSM for a SRR

srr_to_srp(srr, **kwargs)[source]#

Get SRP for a SRR

srr_to_srs(srr, **kwargs)[source]#

Get SRS for a SRR

srr_to_srx(srr, **kwargs)[source]#

Get SRX for a SRR

srs_to_gsm(srs, **kwargs)[source]#

Get GSM for a SRS

srs_to_srx(srs, **kwargs)[source]#

Get SRX for a SRS

srx_to_gsm(srx, **kwargs)[source]#
srx_to_srp(srx, **kwargs)[source]#

Get SRP for a SRX

srx_to_srr(srx, **kwargs)[source]#

Get SRR for a SRX

srx_to_srs(srx, **kwargs)[source]#

Get SRS for a SRX

static xml_to_json(xml)[source]#

Convert xml to json.

Parameters:
xml: str

Input XML

Returns:
xml_dict: dict

Parsed xml as dict

pysradb.sraweb.get_retmax(n_records, retmax=500)[source]#

Get retstart and retmax till n_records are exhausted

pysradb.sraweb.xmlescape(data)[source]#

pysradb.taxid2name module#

pysradb.utils module#

class pysradb.utils.TqdmUpTo(*_, **__)[source]#

Bases: tqdm

Alternative Class-based version of the above. Provides update_to(n) which uses tqdm.update(delta_n). Inspired by [twine#242](pypa/twine#242), [here](pypa/twine).

Credits: tqdm/tqdm

update_to(b=1, bsize=1, tsize=None)[source]#
bint, optional

Number of blocks transferred so far [default: 1].

bsizeint, optional

Size of each block (in tqdm units) [default: 1].

tsizeint, optional

Total size (in tqdm units). If [default: None] remains unchanged.

pysradb.utils.confirm(preceeding_text)[source]#

Confirm user input.

Parameters:
preceeding_text: str

Text to print

Returns:
response: bool
pysradb.utils.copyfileobj(fsrc, fdst, bufsize=16384, filesize=None, desc='')[source]#

Copy file object with a progress bar.

Parameters:
fsrc: filehandle

Input file handle

fdst: filehandle

Output file handle

bufsize: int

Length of output buffer

filesize: int

Input file file size

desc: string

Description for tqdm status

pysradb.utils.get_gzip_uncompressed_size(filepath)[source]#

Get uncompressed size of a .gz file

Parameters:
filepath: string

Path to input file

Returns:
filesize: int

Uncompressed file size

pysradb.utils.mkdir_p(path)[source]#

Python version mkdir -p

Parameters:
pathstring

Path to directory to create

pysradb.utils.order_dataframe(df, columns)[source]#

Order a dataframe

Order a dataframe by moving the columns in the front

Parameters:
df: Dataframe

Dataframe

columns: list

List of columns that need to be put in front

pysradb.utils.path_leaf(path)[source]#

Get path’s tail from a filepath.

Parameters:
path: string

Filepath

Returns:
tail: string

Filename

pysradb.utils.requests_3_retries()[source]#

Generates a requests session object that allows 3 retries.

Returns:
session: requests.Session

requests session object that allows 3 retries for server-side errors.

pysradb.utils.run_command(command, verbose=False)[source]#

Run a shell command

pysradb.utils.scientific_name_to_taxid(name)[source]#

Converts a scientific name to its corresponding taxonomy ID.

Parameters:
name: str

Scientific name of interest.

Returns:
taxid: str

Taxonomy Id of the Scientific name.

Raises:
IncorrectFieldException

If the scientific name cannot be found.

pysradb.utils.unique(sequence)[source]#

Get unique elements from a list maintaining the order.

Parameters:
input_list: list
Returns:
unique_list: list

List with unique elements maintaining the order

Module contents#

Top-level package for pysradb.