Helper module including specific functions handle datasets.
Add dataset identified by name
and return MiGA::Dataset.
# File lib/miga/project/dataset.rb, line 73 def add_dataset(name) unless metadata[:datasets].include? name d = MiGA::Dataset.new(self, name) @metadata[:datasets] << name @dataset_names_hash[name] = true if @dataset_names_hash @dataset_names_set << name if @dataset_names_set @dataset_ref_active = nil save if d.ref? && d.active? recalculate_tasks("Reference dataset added: #{d.name}") end pull_hook(:on_add_dataset, name) end dataset(name) end
Returns MiGA::Dataset
# File lib/miga/project/dataset.rb, line 52 def dataset(name) name = name.to_s.miga_name return nil unless MiGA::Dataset.exist?(self, name) @datasets ||= {} @datasets[name] ||= MiGA::Dataset.new(self, name) @datasets[name] end
Returns Array of String (without evaluating dataset objects)
# File lib/miga/project/dataset.rb, line 15 def dataset_names metadata[:datasets] end
Returns Hash of +{ String => true }+.
Similar to dataset_names
but as Hash for efficiency
# File lib/miga/project/dataset.rb, line 22 def dataset_names_hash warn 'The Project#dataset_names_hash method will be deprecated soon' @dataset_names_hash ||= Hash[dataset_names.map { |i| [i, true] }] end
Returns Set of Strings. Similar to dataset_names
but as Set
for efficiency
# File lib/miga/project/dataset.rb, line 30 def dataset_names_set @dataset_names_set ||= Set.new(dataset_names) end
Cache for the special set of datasets which are both reference and active,
returned as an Array. Use carefully, as it doesn't get recalculated
upon dataset (in)activation once loaded. To force recalculating, use
dataset_ref_active!
# File lib/miga/project/dataset.rb, line 39 def dataset_ref_active @dataset_ref_active ||= dataset_ref_active! end
Force recalculation of dataset_ref_active
and returns the
Array of MiGA::Dataset objects
# File lib/miga/project/dataset.rb, line 46 def dataset_ref_active! @dataset_ref_active = datasets.select(&:ref?).select(&:active?) end
Returns Array of MiGA::Dataset
# File lib/miga/project/dataset.rb, line 9 def datasets metadata[:datasets].map { |name| dataset(name) } end
Are all the datasets in the project preprocessed? Save intermediate results
if save
(until the first incomplete dataset is reached).
# File lib/miga/project/dataset.rb, line 172 def done_preprocessing?(save = false) !each_dataset.any? do |d| d.ref? && d.active? && !d.done_preprocessing?(save) end end
Iterate through datasets (MiGA::Dataset)
# File lib/miga/project/dataset.rb, line 63 def each_dataset(&blk) if block_given? metadata[:datasets].each { |name| blk.call(dataset(name)) } else to_enum(:each_dataset) end end
Call blk
passing the result of MiGA::Dataset::Result#profile_advance
for each registered dataset.
# File lib/miga/project/dataset.rb, line 194 def each_dataset_profile_advance(&blk) each_dataset { |ds| blk.call(ds.profile_advance) } end
Import the dataset ds
, a MiGA::Dataset, using method
which
is any method supported by File#generic_transfer.
# File lib/miga/project/dataset.rb, line 110 def import_dataset(ds, method = :hardlink) raise "Impossible to import dataset, it already exists: #{ds.name}." if MiGA::Dataset.exist?(self, ds.name) # Import dataset results ds.each_result do |task, result| # import result files result.each_file do |file| File.generic_transfer( File.join(result.dir, file), File.join(path, 'data', MiGA::Dataset.RESULT_DIRS[task], file), method ) end # import result metadata %w(json start done).each do |suffix| if File.exist? File.join(result.dir, "#{ds.name}.#{suffix}") File.generic_transfer( File.join(result.dir, "#{ds.name}.#{suffix}"), File.join( path, 'data', MiGA::Dataset.RESULT_DIRS[task], "#{ds.name}.#{suffix}" ), method ) end end end # Import dataset metadata File.generic_transfer( File.join(ds.project.path, 'metadata', "#{ds.name}.json"), File.join(self.path, 'metadata', "#{ds.name}.json"), method ) # Save dataset self.add_dataset(ds.name) end
Returns a two-dimensional matrix (Array of Array) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:
0: Before execution.
1: Done (or not required).
2: To do.
# File lib/miga/project/dataset.rb, line 185 def profile_datasets_advance advance = [] each_dataset_profile_advance { |adv| advance << adv } advance end
Unlink dataset identified by name
and return MiGA::Dataset.
# File lib/miga/project/dataset.rb, line 91 def unlink_dataset(name) d = dataset(name) return nil if d.nil? @dataset_names_hash = nil @dataset_names_set = nil @dataset_ref_active = nil self.metadata[:datasets].delete(name) save if d.ref? && d.active? recalculate_tasks("Reference dataset unlinked: #{d.name}") end pull_hook(:on_unlink_dataset, name) d end
Find all datasets with (potential) result files but are yet unregistered.
# File lib/miga/project/dataset.rb, line 150 def unregistered_datasets datasets = [] MiGA::Dataset.RESULT_DIRS.values.each do |dir| dir_p = "#{path}/data/#{dir}" next unless Dir.exist? dir_p Dir.entries(dir_p).each do |file| next unless file =~ %r{ \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$ } m = /([^\.]+)/.match(file) datasets << m[1] unless m.nil? or m[1] == "miga-project" end end datasets.uniq - metadata[:datasets] end