class MiGA::Cli::Action::Doctor

Public Class Methods

OPERATIONS() click to toggle source

All supported operations

# File lib/miga/cli/action/doctor.rb, line 58
def OPERATIONS
  @@OPERATIONS
end

Public Instance Methods

check_bidir(cli) click to toggle source

Perform bidirectional operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 117
def check_bidir(cli)
  cli.say 'Checking if reference distances are bidirectional'
  ref_ds = cli.load_project.each_dataset.select(&:ref?)
  ref_names = ref_ds.map(&:name)
  n = ref_ds.size
  (0 .. cli[:threads] - 1).map do |i|
    Process.fork do
      k = 0
      ref_ds.each do |d|
        k += 1
        cli.advance('Datasets:', k, n, false) if i == 0
        next unless k % cli[:threads] == i

        saved = saved_targets(d)
        next if saved.nil?

        (ref_names - saved).each do |k|
          save_bidirectional(cli.load_project.dataset(k), d)
        end
      end
    end
  end
  Process.waitall
  cli.say
end
check_cds(cli) click to toggle source

Perform cds operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 185
def check_cds(cli)
  cli.say 'Looking for unzipped genes or proteins'
  n, k = cli.load_project.dataset_names.size, 0
  cli.load_project.each_dataset do |d|
    cli.advance('Datasets:', k += 1, n, false)
    res = d.result(:cds) or next
    changed = false
    %[genes proteins gff3 gff2 tab].each do |f|
      file = res.file_path(f) or next
      if file !~ /\.gz/
        cli.say "  > Gzipping #{d.name} #{f}   "
        cmdo = %xgzip -9 '#{file}'`.chomp
        warn(cmdo) unless cmdo.empty?
        changed = true
      end
    end
    if changed
      d.add_result(:cds, true, force: true)
      sr = d.result(:stats) and sr.remove!
    end
  end
  cli.say
end
check_db(cli) click to toggle source

Perform databases operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 85
def check_db(cli)
  cli.say 'Checking integrity of databases'
  p = cli.load_project
  n = p.dataset_names.size
  (0 .. cli[:threads] - 1).map do |i|
    Process.fork do
      k = 0
      p.each_dataset do |d|
        k += 1
        cli.advance('Datasets:', k, n, false) if i == 0
        next unless k % cli[:threads] == i
        each_database_file(d) do |db_file, metric, result|
          check_sqlite3_database(db_file, metric) do
            cli.say(
              "  > Removing malformed database from #{d.name}:#{result}   "
            )
            File.unlink(db_file)
            r = d.result(result) or next
            [r.path(:done), r.path].each do |f|
              File.unlink(f) if File.exist?(f)
            end
          end
        end
      end
    end
  end
  Process.waitall
  cli.say
end
check_dist(cli) click to toggle source

Perform distances operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 145
def check_dist(cli)
  p = cli.load_project
  %[ani aai].each do |dist|
    res = p.result("#{dist}_distances")
    next if res.nil?

    cli.say "Checking #{dist} table for consistent datasets"
    notok, fix = check_dist_eval(cli, p, res)
    check_dist_fix(cli, p, fix)
    check_dist_recompute(cli, res, notok)
  end
end
check_ess(cli) click to toggle source

Perform essential-genes operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 211
def check_ess(cli)
  cli.say 'Looking for unarchived essential genes'
  cli.load_project.each_dataset do |d|
    res = d.result(:essential_genes)
    next if res.nil?

    dir = res.file_path(:collection)
    if dir.nil?
      cli.say "  > Removing #{d.name}:essential_genes"
      res.remove!
      sr = d.result(:stats) and sr.remove!
      next
    end
    next if Dir["#{dir}/*.faa"].empty?

    cli.say "  > Fixing #{d.name}"
    cmdo = %xcd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
    warn(cmdo) unless cmdo.empty?
  end
end
check_files(cli) click to toggle source

Perform files operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 160
def check_files(cli)
  cli.say 'Looking for outdated files in results'
  n, k = cli.load_project.dataset_names.size, 0
  cli.load_project.each_dataset do |d|
    cli.advance('Datasets:', k += 1, n, false)
    d.each_result do |r_k, r|
      ok = true
      r.each_file do |_f_sym, _f_rel, f_abs|
        unless File.exist? f_abs
          ok = false
          break
        end
      end
      unless ok
        cli.say "  > Registering again #{d.name}:#{r_k}   "
        d.add_result(r_k, true, force: true)
        sr = d.result(:stats) and sr.remove!
      end
    end
  end
  cli.say
end
check_mts(cli) click to toggle source

Perform mytaxa-scan operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 234
def check_mts(cli)
  cli.say 'Looking for unarchived MyTaxa Scan runs'
  cli.load_project.each_dataset do |d|
    res = d.result(:mytaxa_scan)
    next if res.nil?

    dir = res.file_path(:regions)
    fix = false
    unless dir.nil?
      if Dir.exist? dir
        cmdo = %xcd '#{dir}/..' \
              && tar -zcf '#{d.name}.reg.tar.gz' '#{d.name}.reg' \
              && rm -r '#{d.name}.reg'`.chomp
        warn(cmdo) unless cmdo.empty?
      end
      fix = true
    end
    %[blast mytaxain wintax gene_ids region_ids].each do |ext|
      file = res.file_path(ext)
      unless file.nil?
        FileUtils.rm(file) if File.exist? file
        fix = true
      end
    end
    if fix
      cli.say "  > Fixing #{d.name}"
      d.add_result(:mytaxa_scan, true, force: true)
    end
  end
end
check_start(cli) click to toggle source

Perform start operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 267
def check_start(cli)
  cli.say 'Looking for legacy .start files lingering'
  cli.load_project.each_dataset do |d|
    d.each_result do |r_k, r|
      if File.exist? r.path(:start)
        cli.say "  > Registering again #{d.name}:#{r_k}"
        r.save
      end
    end
  end
end
check_status(cli) click to toggle source

Perform status operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 65
def check_status(cli)
  cli.say 'Updating metadata status'
  p = cli.load_project
  n = p.dataset_names.size
  (0 .. cli[:threads] - 1).map do |i|
    Process.fork do
      k = 0
      cli.load_project.each_dataset do |d|
        k += 1
        cli.advance('Datasets:', k, n, false) if i == 0
        d.recalculate_status if k % cli[:threads] == i
      end
    end
  end
  Process.waitall
  cli.say
end
check_tax(cli) click to toggle source

Perform taxonomy operation with MiGA::Cli cli

# File lib/miga/cli/action/doctor.rb, line 281
def check_tax(cli)
  # cli.say 'o Checking for taxonomy/distances consistency'
  # TODO: Find 95%ANI clusters with entries from different species
  # TODO: Find different 95%ANI clusters with genomes from the same species
  # TODO: Find AAI values too high or too low for each LCA rank
end
parse_cli() click to toggle source
# File lib/miga/cli/action/doctor.rb, line 9
def parse_cli
  cli.defaults = { threads: 1 }
  cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
  cli.parse do |opt|
    operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
    cli.opt_object(opt, [:project])
    opt.on(
      '--ignore TASK1,TASK2', Array,
      'Do not perform the task(s) listed. Available tasks are:',
      * @@OPERATIONS.values.map { |v| "~ #{v[0]}: #{v[1]}" }
    ) { |v| v.map { |i| cli[operation_n[i]] = false } }
    opt.on(
      '--only TASK',
      'Perform only the specified task (see --ignore)'
    ) do |v|
      op_k = @@OPERATIONS.find { |_, i| i[0] == v.downcase }.first
      @@OPERATIONS.each_key { |i| cli[i] = false }
      cli[op_k] = true
    end
    opt.on(
      '-t', '--threads INT', Integer,
      "Concurrent threads to use. By default: #{cli[:threads]}"
    ) { |v| cli[:threads] = v }
  end
end
perform() click to toggle source
# File lib/miga/cli/action/doctor.rb, line 35
def perform
  p = cli.load_project
  @@OPERATIONS.keys.each do |k|
    send("check_#{k}", cli) if cli[k]
  end
end