diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/RefNtSequenceModel.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/RefNtSequenceModel.java index 4d7afe5c6..71d236c70 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/RefNtSequenceModel.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/RefNtSequenceModel.java @@ -18,7 +18,6 @@ import htsjdk.samtools.util.StringUtil; import org.apache.commons.io.IOUtils; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; import org.jetbrains.annotations.Nullable; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; @@ -32,8 +31,11 @@ import org.labkey.api.exp.api.ExpData; import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.files.FileContentService; +import org.labkey.api.security.Crypt; import org.labkey.api.security.User; +import org.labkey.api.util.FileUtil; import org.labkey.api.util.MemTracker; +import org.labkey.api.util.logging.LogHelper; import org.labkey.api.writer.PrintWriters; import java.io.File; @@ -55,7 +57,9 @@ */ public class RefNtSequenceModel implements Serializable { - private static final Logger _log = LogManager.getLogger(RefNtSequenceModel.class); + private static final Logger _log = LogHelper.getLogger(RefNtSequenceModel.class, "Messages related to Reference NT Sequences"); + + public static String BASE_DIRNAME = ".sequences"; private int _rowid; private String _name; @@ -414,7 +418,7 @@ public byte[] getSequenceBases() public void createFileForSequence(User u, String sequence, @Nullable File outDir) throws IOException { - File output = getExpectedSequenceFile(outDir); + File output = getExpectedSequenceFile(); if (output.exists()) { output.delete(); @@ -439,9 +443,9 @@ public void createFileForSequence(User u, String sequence, @Nullable File outDir Table.update(u, ti, this, _rowid); } - private File getExpectedSequenceFile(@Nullable File outDir) throws IllegalArgumentException + public File getExpectedSequenceFile() throws IllegalArgumentException { - return new File(getSequenceDir(true, outDir), _rowid + ".txt.gz"); + return FileUtil.appendName(getHashedDir(true), _rowid + ".txt.gz"); } private Container getLabKeyContainer() @@ -455,20 +459,9 @@ private Container getLabKeyContainer() return c; } - private File getSequenceDir(boolean create, @Nullable File outDir) throws IllegalArgumentException + private File getBaseSequenceDir() throws IllegalArgumentException { Container c = getLabKeyContainer(); - File ret = outDir == null ? getReferenceSequenceDir(c) : outDir; - if (create && !ret.exists()) - { - ret.mkdirs(); - } - - return ret; - } - - private File getReferenceSequenceDir(Container c) throws IllegalArgumentException - { FileContentService fileService = FileContentService.get(); File root = fileService == null ? null : fileService.getFileRoot(c, FileContentService.ContentType.files); if (root == null) @@ -476,12 +469,7 @@ private File getReferenceSequenceDir(Container c) throws IllegalArgumentExceptio throw new IllegalArgumentException("File root not defined for container: " + c.getPath()); } - return new File(root, ".sequences"); - } - - public void writeSequence(Writer writer, int lineLength) throws IOException - { - writeSequence(writer, lineLength, null, null); + return FileUtil.appendName(root, BASE_DIRNAME); } public void writeSequence(Writer writer, int lineLength, Integer start, Integer end) throws IOException @@ -562,6 +550,26 @@ public File getOffsetsFile() return null; } - return new File(d.getFile().getParentFile(), getRowid() + "_offsets.txt"); + return FileUtil.appendName(d.getFile().getParentFile(), getRowid() + "_offsets.txt"); + } + + private File getHashedDir(boolean create) + { + File baseDir = getBaseSequenceDir(); + String digest = Crypt.MD5.digest(String.valueOf(getRowid())); + + baseDir = FileUtil.appendName(baseDir, digest.substring(0,4)); + baseDir = FileUtil.appendName(baseDir, digest.substring(4,8)); + baseDir = FileUtil.appendName(baseDir, digest.substring(8,12)); + baseDir = FileUtil.appendName(baseDir, digest.substring(12,20)); + baseDir = FileUtil.appendName(baseDir, digest.substring(20,28)); + baseDir = FileUtil.appendName(baseDir, digest.substring(28,32)); + + if (create) + { + baseDir.mkdirs(); + } + + return baseDir; } } diff --git a/SequenceAnalysis/resources/schemas/dbscripts/postgresql/SequenceAnalysis-12.331-12.332.sql b/SequenceAnalysis/resources/schemas/dbscripts/postgresql/SequenceAnalysis-12.331-12.332.sql new file mode 100644 index 000000000..2c2517351 --- /dev/null +++ b/SequenceAnalysis/resources/schemas/dbscripts/postgresql/SequenceAnalysis-12.331-12.332.sql @@ -0,0 +1 @@ +SELECT core.executeJavaUpgradeCode('migrateSequenceDirs'); \ No newline at end of file diff --git a/SequenceAnalysis/resources/schemas/dbscripts/sqlserver/SequenceAnalysis-12.331-12.332.sql b/SequenceAnalysis/resources/schemas/dbscripts/sqlserver/SequenceAnalysis-12.331-12.332.sql new file mode 100644 index 000000000..b24244d15 --- /dev/null +++ b/SequenceAnalysis/resources/schemas/dbscripts/sqlserver/SequenceAnalysis-12.331-12.332.sql @@ -0,0 +1 @@ +EXEC core.executeJavaUpgradeCode 'migrateSequenceDirs'; \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java index 6ea1c01a9..a87859e5e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java @@ -46,6 +46,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -301,10 +302,10 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel { //first sequences log.debug("Inspecting sequences"); - File sequenceDir = new File(root.getRootPath(), ".sequences"); + File sequenceDir = FileUtil.appendName(root.getRootPath(), ".sequences"); TableInfo tableRefNtSequences = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES); TableSelector ntTs = new TableSelector(tableRefNtSequences, new SimpleFilter(FieldKey.fromString("container"), c.getId()), null); - final Set expectedSequences = new HashSet<>(10000, 1000); + final Set expectedSequences = new HashSet<>(10000, 1000); ntTs.forEach(RefNtSequenceModel.class, m -> { if (m.getSequenceFile() == null || m.getSequenceFile() == 0) { @@ -319,26 +320,23 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel return; } - if (!d.getFile().exists()) - { - log.error("expected sequence file does not exist for sequence: " + m.getRowid() + " " + m.getName() + ", expected: " + d.getFile().getPath()); - return; - } - if (d.getFile().getAbsolutePath().toLowerCase().startsWith(sequenceDir.getAbsolutePath().toLowerCase())) { - expectedSequences.add(d.getFile().getName()); + expectedSequences.add(d.getFile()); } }); if (sequenceDir.exists()) { - for (File child : sequenceDir.listFiles()) + inspectSequenceDir(sequenceDir, expectedSequences, log); + } + + if (!expectedSequences.isEmpty()) + { + for (File missing : expectedSequences) { - if (!expectedSequences.contains(child.getName())) - { - deleteFile(child, log); - } + log.error("expected sequence file does not exist: " + missing.getPath()); + return; } } @@ -446,12 +444,12 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel continue; } - deleteFile(new File(child, fileName), log); + deleteFile(FileUtil.appendName(child, fileName), log); } } //check/verify tracks - File trackDir = new File(child, "tracks"); + File trackDir = FileUtil.appendName(child, "tracks"); if (trackDir.exists()) { Set expectedTracks = new HashSet<>(); @@ -486,7 +484,7 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel } //check/verify chainFiles - File chainDir = new File(child, "chainFiles"); + File chainDir = FileUtil.appendName(child, "chainFiles"); if (chainDir.exists()) { Set expectedChains = new HashSet<>(); @@ -555,7 +553,7 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel } } - File sequenceOutputsDir = new File(root.getRootPath(), "sequenceOutputs"); + File sequenceOutputsDir = FileUtil.appendName(root.getRootPath(), "sequenceOutputs"); if (sequenceOutputsDir.exists()) { for (File child : sequenceOutputsDir.listFiles()) @@ -576,6 +574,24 @@ private void processContainer(Container c, Logger log) throws IOException, Pipel } } + private void inspectSequenceDir(File sequenceDir, Set expectedSequences, Logger log) throws IOException + { + for (File child : Objects.requireNonNull(sequenceDir.listFiles())) + { + if (child.isDirectory()) + { + inspectSequenceDir(child, expectedSequences, log); + } + else + { + if (!expectedSequences.remove(child)) + { + deleteFile(child, log); + } + } + } + } + private void deleteFile(File f, Logger log) throws IOException { log.info("deleting sequence file: " + f.getPath()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index a4044bcae..60186f5ee 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -209,7 +209,7 @@ public String getName() @Override public Double getSchemaVersion() { - return 12.331; + return 12.332; } @Override diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisUpgradeCode.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisUpgradeCode.java index 858684d11..40b221c70 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisUpgradeCode.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisUpgradeCode.java @@ -229,4 +229,69 @@ public void updateBarcodeRC(final ModuleContext moduleContext) }); } } + + /** called at 12.331-12.332*/ + @SuppressWarnings({"UnusedDeclaration"}) + @DeferredUpgrade + public void migrateSequenceDirs(final ModuleContext moduleContext) + { + try + { + TableInfo ti = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES); + TableSelector ts = new TableSelector(ti); + List nts = ts.getArrayList(RefNtSequenceModel.class); + _log.info(nts.size() + " total sequences to migrate"); + int processed = 0; + for (RefNtSequenceModel nt : nts) + { + processed++; + + if (processed % 1000 == 0) + { + _log.info("{} of {} sequence files migrated", processed, nts.size()); + } + + ExpData legacyExpData = ExperimentService.get().getExpData(nt.getSequenceFile()); + if (legacyExpData == null) + { + _log.error("Missing ExpData for NT sequence: {}", nt.getSequenceFile()); + continue; + } + + File legacyFile = legacyExpData.getFile(); + if (!legacyFile.exists()) + { + _log.error("Missing file for NT sequence: {}", legacyFile.getPath()); + continue; + } + + if (!RefNtSequenceModel.BASE_DIRNAME.equals(legacyFile.getParentFile().getName())) + { + _log.error("Sequence appears to have already been migrated, this might indicate a retry after a failed move: {}", legacyFile.getPath()); + continue; + } + + File newLocation = nt.getExpectedSequenceFile(); + if (!newLocation.getParentFile().exists()) + { + newLocation.getParentFile().mkdirs(); + } + + if (newLocation.exists()) + { + _log.error("Target location for migrated sequence file exists, this might indicate a retry after a filed move: {}", newLocation.getPath()); + continue; + } + + FileUtils.copyFile(legacyFile, newLocation); + legacyExpData.setDataFileURI(newLocation.toURI()); + legacyExpData.save(moduleContext.getUpgradeUser()); + legacyFile.delete(); + } + } + catch (Exception e) + { + _log.error("Error upgrading sequenceanalysis module", e); + } + } }