Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import htsjdk.samtools.util.StringUtil;
import org.apache.commons.io.IOUtils;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.data.Container;
import org.labkey.api.data.ContainerManager;
Expand All @@ -32,8 +31,11 @@
import org.labkey.api.exp.api.ExpData;
import org.labkey.api.exp.api.ExperimentService;
import org.labkey.api.files.FileContentService;
import org.labkey.api.security.Crypt;
import org.labkey.api.security.User;
import org.labkey.api.util.FileUtil;
import org.labkey.api.util.MemTracker;
import org.labkey.api.util.logging.LogHelper;
import org.labkey.api.writer.PrintWriters;

import java.io.File;
Expand All @@ -55,7 +57,9 @@
*/
public class RefNtSequenceModel implements Serializable
{
private static final Logger _log = LogManager.getLogger(RefNtSequenceModel.class);
private static final Logger _log = LogHelper.getLogger(RefNtSequenceModel.class, "Messages related to Reference NT Sequences");

public static String BASE_DIRNAME = ".sequences";

private int _rowid;
private String _name;
Expand Down Expand Up @@ -414,7 +418,7 @@ public byte[] getSequenceBases()

public void createFileForSequence(User u, String sequence, @Nullable File outDir) throws IOException
{
File output = getExpectedSequenceFile(outDir);
File output = getExpectedSequenceFile();
if (output.exists())
{
output.delete();
Expand All @@ -439,9 +443,9 @@ public void createFileForSequence(User u, String sequence, @Nullable File outDir
Table.update(u, ti, this, _rowid);
}

private File getExpectedSequenceFile(@Nullable File outDir) throws IllegalArgumentException
public File getExpectedSequenceFile() throws IllegalArgumentException
{
return new File(getSequenceDir(true, outDir), _rowid + ".txt.gz");
return FileUtil.appendName(getHashedDir(true), _rowid + ".txt.gz");
}

private Container getLabKeyContainer()
Expand All @@ -455,33 +459,17 @@ private Container getLabKeyContainer()
return c;
}

private File getSequenceDir(boolean create, @Nullable File outDir) throws IllegalArgumentException
private File getBaseSequenceDir() throws IllegalArgumentException
{
Container c = getLabKeyContainer();
File ret = outDir == null ? getReferenceSequenceDir(c) : outDir;
if (create && !ret.exists())
{
ret.mkdirs();
}

return ret;
}

private File getReferenceSequenceDir(Container c) throws IllegalArgumentException
{
FileContentService fileService = FileContentService.get();
File root = fileService == null ? null : fileService.getFileRoot(c, FileContentService.ContentType.files);
if (root == null)
{
throw new IllegalArgumentException("File root not defined for container: " + c.getPath());
}

return new File(root, ".sequences");
}

public void writeSequence(Writer writer, int lineLength) throws IOException
{
writeSequence(writer, lineLength, null, null);
return FileUtil.appendName(root, BASE_DIRNAME);
}

public void writeSequence(Writer writer, int lineLength, Integer start, Integer end) throws IOException
Expand Down Expand Up @@ -562,6 +550,26 @@ public File getOffsetsFile()
return null;
}

return new File(d.getFile().getParentFile(), getRowid() + "_offsets.txt");
return FileUtil.appendName(d.getFile().getParentFile(), getRowid() + "_offsets.txt");
}

private File getHashedDir(boolean create)
{
File baseDir = getBaseSequenceDir();
String digest = Crypt.MD5.digest(String.valueOf(getRowid()));

baseDir = FileUtil.appendName(baseDir, digest.substring(0,4));
baseDir = FileUtil.appendName(baseDir, digest.substring(4,8));
baseDir = FileUtil.appendName(baseDir, digest.substring(8,12));
baseDir = FileUtil.appendName(baseDir, digest.substring(12,20));
baseDir = FileUtil.appendName(baseDir, digest.substring(20,28));
baseDir = FileUtil.appendName(baseDir, digest.substring(28,32));

if (create)
{
baseDir.mkdirs();
}

return baseDir;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT core.executeJavaUpgradeCode('migrateSequenceDirs');
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EXEC core.executeJavaUpgradeCode 'migrateSequenceDirs';
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -301,10 +302,10 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel
{
//first sequences
log.debug("Inspecting sequences");
File sequenceDir = new File(root.getRootPath(), ".sequences");
File sequenceDir = FileUtil.appendName(root.getRootPath(), ".sequences");
TableInfo tableRefNtSequences = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES);
TableSelector ntTs = new TableSelector(tableRefNtSequences, new SimpleFilter(FieldKey.fromString("container"), c.getId()), null);
final Set<String> expectedSequences = new HashSet<>(10000, 1000);
final Set<File> expectedSequences = new HashSet<>(10000, 1000);
ntTs.forEach(RefNtSequenceModel.class, m -> {
if (m.getSequenceFile() == null || m.getSequenceFile() == 0)
{
Expand All @@ -319,26 +320,23 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel
return;
}

if (!d.getFile().exists())
{
log.error("expected sequence file does not exist for sequence: " + m.getRowid() + " " + m.getName() + ", expected: " + d.getFile().getPath());
return;
}

if (d.getFile().getAbsolutePath().toLowerCase().startsWith(sequenceDir.getAbsolutePath().toLowerCase()))
{
expectedSequences.add(d.getFile().getName());
expectedSequences.add(d.getFile());
}
});

if (sequenceDir.exists())
{
for (File child : sequenceDir.listFiles())
inspectSequenceDir(sequenceDir, expectedSequences, log);
}

if (!expectedSequences.isEmpty())
{
for (File missing : expectedSequences)
{
if (!expectedSequences.contains(child.getName()))
{
deleteFile(child, log);
}
log.error("expected sequence file does not exist: " + missing.getPath());
return;
}
}

Expand Down Expand Up @@ -446,12 +444,12 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel
continue;
}

deleteFile(new File(child, fileName), log);
deleteFile(FileUtil.appendName(child, fileName), log);
}
}

//check/verify tracks
File trackDir = new File(child, "tracks");
File trackDir = FileUtil.appendName(child, "tracks");
if (trackDir.exists())
{
Set<String> expectedTracks = new HashSet<>();
Expand Down Expand Up @@ -486,7 +484,7 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel
}

//check/verify chainFiles
File chainDir = new File(child, "chainFiles");
File chainDir = FileUtil.appendName(child, "chainFiles");
if (chainDir.exists())
{
Set<String> expectedChains = new HashSet<>();
Expand Down Expand Up @@ -555,7 +553,7 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel
}
}

File sequenceOutputsDir = new File(root.getRootPath(), "sequenceOutputs");
File sequenceOutputsDir = FileUtil.appendName(root.getRootPath(), "sequenceOutputs");
if (sequenceOutputsDir.exists())
{
for (File child : sequenceOutputsDir.listFiles())
Expand All @@ -576,6 +574,24 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel
}
}

private void inspectSequenceDir(File sequenceDir, Set<File> expectedSequences, Logger log) throws IOException
{
for (File child : Objects.requireNonNull(sequenceDir.listFiles()))
{
if (child.isDirectory())
{
inspectSequenceDir(child, expectedSequences, log);
}
else
{
if (!expectedSequences.remove(child))
{
deleteFile(child, log);
}
}
}
}

private void deleteFile(File f, Logger log) throws IOException
{
log.info("deleting sequence file: " + f.getPath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ public String getName()
@Override
public Double getSchemaVersion()
{
return 12.331;
return 12.332;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,69 @@ public void updateBarcodeRC(final ModuleContext moduleContext)
});
}
}

/** called at 12.331-12.332*/
@SuppressWarnings({"UnusedDeclaration"})
@DeferredUpgrade
public void migrateSequenceDirs(final ModuleContext moduleContext)
{
try
{
TableInfo ti = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES);
TableSelector ts = new TableSelector(ti);
List<RefNtSequenceModel> nts = ts.getArrayList(RefNtSequenceModel.class);
_log.info(nts.size() + " total sequences to migrate");
int processed = 0;
for (RefNtSequenceModel nt : nts)
{
processed++;

if (processed % 1000 == 0)
{
_log.info("{} of {} sequence files migrated", processed, nts.size());
}

ExpData legacyExpData = ExperimentService.get().getExpData(nt.getSequenceFile());
if (legacyExpData == null)
{
_log.error("Missing ExpData for NT sequence: {}", nt.getSequenceFile());
continue;
}

File legacyFile = legacyExpData.getFile();
if (!legacyFile.exists())
{
_log.error("Missing file for NT sequence: {}", legacyFile.getPath());
continue;
}

if (!RefNtSequenceModel.BASE_DIRNAME.equals(legacyFile.getParentFile().getName()))
{
_log.error("Sequence appears to have already been migrated, this might indicate a retry after a failed move: {}", legacyFile.getPath());
continue;
}

File newLocation = nt.getExpectedSequenceFile();
if (!newLocation.getParentFile().exists())
{
newLocation.getParentFile().mkdirs();
}

if (newLocation.exists())
{
_log.error("Target location for migrated sequence file exists, this might indicate a retry after a filed move: {}", newLocation.getPath());
continue;
}

FileUtils.copyFile(legacyFile, newLocation);
legacyExpData.setDataFileURI(newLocation.toURI());
legacyExpData.save(moduleContext.getUpgradeUser());
legacyFile.delete();
}
}
catch (Exception e)
{
_log.error("Error upgrading sequenceanalysis module", e);
}
}
}