Difference between revisions of "Adding Format Identification Tools"
Line 39: | Line 39: | ||
==Set selection== | ==Set selection== | ||
− | + | The first step in the workflow is to set the selection as the tool to use during normalization. This is done by making an insert into the unit's variables table for the variable normalizationFileIdentificationToolIdentifierTypes. The value set is a peice of a SQL query used in [https://github.com/artefactual/archivematica/blob/master/src/MCPServer/lib/linkTaskManagerSplitOnFileIdAndruleset.py linkTaskManagerSplitOnFileIdAndruleset.py] to restrict the fileIDs used to the desired type. | |
+ | |||
+ | For FIDO: | ||
+ | <pre> | ||
+ | INSERT INTO `MicroServiceChainLinks` (`pk`, `currentTask`, `defaultNextChainLink`, `defaultPlaySound`, `microserviceGroup`, `reloadFileList`, `defaultExitMessage`, `replaces`, `lastModified`) VALUES ('982229bd-73b8-432e-a1d9-2d9d15d7287d','1e516ea6-6814-4292-9ea9-552ebfaa0d23','4c4281a1-43cd-4c6e-b1dc-573bd1a23c43',NULL,'Normalize',1,'Failed',NULL,'2012-10-23 19:41:23'); | ||
+ | INSERT INTO `TasksConfigs` (`pk`, `taskType`, `taskTypePKReference`, `description`, `replaces`, `lastModified`) VALUES ('1e516ea6-6814-4292-9ea9-552ebfaa0d23','6f0b612c-867f-4dfd-8e43-5b35b7f882d7','f130c16d-d419-4063-8c8b-2e4c3ad138bb','Set SIP to normalize with FIDO file identification.',NULL,'2012-10-23 19:41:23'); | ||
+ | INSERT INTO `TasksConfigsSetUnitVariable` (`pk`, `variable`, `variableValue`, `microServiceChainLink`, `createdTime`, `updatedTime`) VALUES ('f130c16d-d419-4063-8c8b-2e4c3ad138bb','normalizationFileIdentificationToolIdentifierTypes','FileIDTypes.pk = \'afdbee13-eec5-4182-8c6c-f5638ee290f3\'',NULL,'2012-10-23 19:41:23','0000-00-00 00:00:00'); | ||
+ | INSERT INTO `MicroServiceChainLinksExitCodes` (`pk`, `microServiceChainLink`, `exitCode`, `nextMicroServiceChainLink`, `playSound`, `exitMessage`, `replaces`, `lastModified`) VALUES ('82c97f8d-087d-4636-9dd9-59bbc04e6520','982229bd-73b8-432e-a1d9-2d9d15d7287d',0,'4c4281a1-43cd-4c6e-b1dc-573bd1a23c43',NULL,'Completed successfully',NULL,'2012-10-23 21:39:43'); | ||
+ | </pre> | ||
+ | |||
==Run tool== | ==Run tool== | ||
− | + | The next step is to run the tool on the objects. | |
+ | |||
+ | <pre> | ||
+ | SET @YLink = '83484326-7be7-4f9f-b252-94553cd42370'; | ||
+ | |||
+ | SET @TasksConfigPKReference = '46883944-8561-44d0-ac50-e1c3fd9aeb59'; | ||
+ | SET @TasksConfig = '7f786b5c-c003-4ef1-97c2-c2269a04e89a'; | ||
+ | SET @MicroServiceChainLink = '4c4281a1-43cd-4c6e-b1dc-573bd1a23c43'; | ||
+ | SET @MicroServiceChainLinksExitCodes = 'd7653bbd-cd71-473d-b09e-fdd5b36a1d65'; | ||
+ | SET @defaultNextChainLink = @YLink; | ||
+ | SET @NextMicroServiceChainLink = @YLink; | ||
+ | |||
+ | INSERT INTO StandardTasksConfigs (pk, filterFileEnd, filterFileStart, filterSubDir, requiresOutputLock, standardOutputFile, standardErrorFile, execute, arguments) | ||
+ | VALUES (@TasksConfigPKReference, NULL, NULL, 'objects/', FALSE, NULL, NULL, 'archivematicaFido_v0.0', '--fileUUID "%fileUUID%" --SIPUUID "%SIPUUID%" --filePath "%relativeLocation%" --eventIdentifierUUID "%taskUUID%" --date "%date%" --fileGrpUse "%fileGrpUse%"'); | ||
+ | INSERT INTO TasksConfigs (pk, taskType, taskTypePKReference, description) | ||
+ | VALUES | ||
+ | (@TasksConfig, 'a6b1c323-7d36-428e-846a-e7e819423577', @TasksConfigPKReference, 'Identify file formats with FIDO'); | ||
+ | INSERT INTO MicroServiceChainLinks (pk, microserviceGroup, currentTask, defaultNextChainLink) | ||
+ | VALUES (@MicroServiceChainLink, @microserviceGroup, @TasksConfig, @defaultNextChainLink); | ||
+ | INSERT INTO MicroServiceChainLinksExitCodes (pk, microServiceChainLink, exitCode, nextMicroServiceChainLink) | ||
+ | VALUES (@MicroServiceChainLinksExitCodes, @MicroServiceChainLink, 0, @NextMicroServiceChainLink); | ||
+ | SET @NextMicroServiceChainLink = @MicroServiceChainLink; | ||
+ | </pre> | ||
+ | |||
+ | ==Return== | ||
+ | |||
=Add FPR rules= | =Add FPR rules= | ||
+ | ==FileIDTypes== | ||
[[Category:Development documentation]] | [[Category:Development documentation]] |
Revision as of 14:42, 24 May 2013
These are developer facing instructions for adding a new tool to base normalization on.
Add tool
To include a new tool in the archivematica packages, it should be a dependancy package itself.
Add workflow
The choices for file identification tool choices are made at link f4dea20e-f3fe-4a37-b20f-0e70a7bc960e.
Additional choices can be added by adding entries.
SELECT chainAvailable, startingLink, description FROM MicroServiceChainChoice JOIN MicroServiceChains ON chainAvailable = MicroServiceChains.pk WHERE choiceAvailableAtLink = 'f4dea20e-f3fe-4a37-b20f-0e70a7bc960e'; +--------------------------------------+--------------------------------------+---------------------+ | chainAvailable | startingLink | description | +--------------------------------------+--------------------------------------+---------------------+ | 229e34d9-3768-4b78-97b7-6cd4a2f07868 | b549130c-943b-4791-b1f6-93b837990138 | extension (default) | | c44e0251-1c69-482d-a679-669b70d09fb1 | 56b42318-3eb3-466c-8a0d-7ac272136a96 | FITS - DROID | | 1d8836cf-ac02-437c-9283-4ddb7b018810 | 37f2e794-6485-4524-a384-37b3209916ed | FITS - ffident | | d607f083-7c86-49a2-bc36-06a03db28a80 | 766b23ad-65ed-46a3-aa2e-b9bdaf3386d0 | FITS - JHOVE | | 586006d1-f3af-4b5f-9f1a-c893244fa7a9 | d7a0e33d-aa3c-435f-a6ef-8e39f2e7e3a0 | FITS - summary | | 50f47870-3932-4a88-879d-d021a24758ad | f87f13d2-8aae-45c9-bc8a-e5c32a37654e | FITS - file utility | | c76624a8-6f85-43cf-8ea7-0663502c712f | 982229bd-73b8-432e-a1d9-2d9d15d7287d | FIDO | +--------------------------------------+--------------------------------------+---------------------+
FIDO was added by:
INSERT INTO MicroServiceChains (pk, startingLink, description) VALUES ('c76624a8-6f85-43cf-8ea7-0663502c712f', '982229bd-73b8-432e-a1d9-2d9d15d7287d', 'FIDO'); INSERT INTO MicroServiceChainChoice (pk, choiceAvailableAtLink, chainAvailable) VALUES ('e95b8f27-ea52-4247-bdf0-615273bc5fca', 'f4dea20e-f3fe-4a37-b20f-0e70a7bc960e', 'c76624a8-6f85-43cf-8ea7-0663502c712f');
Set selection
The first step in the workflow is to set the selection as the tool to use during normalization. This is done by making an insert into the unit's variables table for the variable normalizationFileIdentificationToolIdentifierTypes. The value set is a peice of a SQL query used in linkTaskManagerSplitOnFileIdAndruleset.py to restrict the fileIDs used to the desired type.
For FIDO:
INSERT INTO `MicroServiceChainLinks` (`pk`, `currentTask`, `defaultNextChainLink`, `defaultPlaySound`, `microserviceGroup`, `reloadFileList`, `defaultExitMessage`, `replaces`, `lastModified`) VALUES ('982229bd-73b8-432e-a1d9-2d9d15d7287d','1e516ea6-6814-4292-9ea9-552ebfaa0d23','4c4281a1-43cd-4c6e-b1dc-573bd1a23c43',NULL,'Normalize',1,'Failed',NULL,'2012-10-23 19:41:23'); INSERT INTO `TasksConfigs` (`pk`, `taskType`, `taskTypePKReference`, `description`, `replaces`, `lastModified`) VALUES ('1e516ea6-6814-4292-9ea9-552ebfaa0d23','6f0b612c-867f-4dfd-8e43-5b35b7f882d7','f130c16d-d419-4063-8c8b-2e4c3ad138bb','Set SIP to normalize with FIDO file identification.',NULL,'2012-10-23 19:41:23'); INSERT INTO `TasksConfigsSetUnitVariable` (`pk`, `variable`, `variableValue`, `microServiceChainLink`, `createdTime`, `updatedTime`) VALUES ('f130c16d-d419-4063-8c8b-2e4c3ad138bb','normalizationFileIdentificationToolIdentifierTypes','FileIDTypes.pk = \'afdbee13-eec5-4182-8c6c-f5638ee290f3\'',NULL,'2012-10-23 19:41:23','0000-00-00 00:00:00'); INSERT INTO `MicroServiceChainLinksExitCodes` (`pk`, `microServiceChainLink`, `exitCode`, `nextMicroServiceChainLink`, `playSound`, `exitMessage`, `replaces`, `lastModified`) VALUES ('82c97f8d-087d-4636-9dd9-59bbc04e6520','982229bd-73b8-432e-a1d9-2d9d15d7287d',0,'4c4281a1-43cd-4c6e-b1dc-573bd1a23c43',NULL,'Completed successfully',NULL,'2012-10-23 21:39:43');
Run tool
The next step is to run the tool on the objects.
SET @YLink = '83484326-7be7-4f9f-b252-94553cd42370'; SET @TasksConfigPKReference = '46883944-8561-44d0-ac50-e1c3fd9aeb59'; SET @TasksConfig = '7f786b5c-c003-4ef1-97c2-c2269a04e89a'; SET @MicroServiceChainLink = '4c4281a1-43cd-4c6e-b1dc-573bd1a23c43'; SET @MicroServiceChainLinksExitCodes = 'd7653bbd-cd71-473d-b09e-fdd5b36a1d65'; SET @defaultNextChainLink = @YLink; SET @NextMicroServiceChainLink = @YLink; INSERT INTO StandardTasksConfigs (pk, filterFileEnd, filterFileStart, filterSubDir, requiresOutputLock, standardOutputFile, standardErrorFile, execute, arguments) VALUES (@TasksConfigPKReference, NULL, NULL, 'objects/', FALSE, NULL, NULL, 'archivematicaFido_v0.0', '--fileUUID "%fileUUID%" --SIPUUID "%SIPUUID%" --filePath "%relativeLocation%" --eventIdentifierUUID "%taskUUID%" --date "%date%" --fileGrpUse "%fileGrpUse%"'); INSERT INTO TasksConfigs (pk, taskType, taskTypePKReference, description) VALUES (@TasksConfig, 'a6b1c323-7d36-428e-846a-e7e819423577', @TasksConfigPKReference, 'Identify file formats with FIDO'); INSERT INTO MicroServiceChainLinks (pk, microserviceGroup, currentTask, defaultNextChainLink) VALUES (@MicroServiceChainLink, @microserviceGroup, @TasksConfig, @defaultNextChainLink); INSERT INTO MicroServiceChainLinksExitCodes (pk, microServiceChainLink, exitCode, nextMicroServiceChainLink) VALUES (@MicroServiceChainLinksExitCodes, @MicroServiceChainLink, 0, @NextMicroServiceChainLink); SET @NextMicroServiceChainLink = @MicroServiceChainLink;