From 2a14d38e1390673171bde9a04af367779b9a944e Mon Sep 17 00:00:00 2001 From: Alexandre Flores Date: Fri, 19 Dec 2025 13:00:53 +0000 Subject: [PATCH] Add SIARDDK128Ext modules to allow lobs of PDF file type. Used for editing SIARDDK128 --- .../dbptk-bindings-siarddk-128-ext/pom.xml | 56 + .../resources/schema/128-ext/XMLSchema.dtd | 58 + .../resources/schema/128-ext/XMLSchema.xsd | 2534 +++++++++++++++++ .../resources/schema/128-ext/archiveIndex.xsd | 448 +++ .../128-ext/contextDocumentationIndex.xsd | 388 +++ .../resources/schema/128-ext/docIndex.xsd | 119 + .../resources/schema/128-ext/fileIndex.xsd | 84 + .../schema/128-ext/fileIndex_original.xsd | 83 + .../schema/128-ext/researchIndex.xsd | 52 + .../resources/schema/128-ext/tableIndex.xsd | 511 ++++ dbptk-bindings/pom.xml | 53 + dbptk-core/pom.xml | 4 + dbptk-modules/dbptk-module-siard/pom.xml | 4 + .../siard/SIARDDK128ExtModuleFactory.java | 37 + .../siard/constants/SIARDConstants.java | 2 + .../in/input/SIARDDK128ExtImportModule.java | 60 + .../SIARDDK128ExtMetadataImportStrategy.java | 605 ++++ .../ResourceFileIndexInputStreamStrategy.java | 2 + .../path/SIARDDK128ExtPathImportStrategy.java | 49 + .../siard/update/SIARDDKEditModule.java | 28 +- pom.xml | 6 + 21 files changed, 5170 insertions(+), 13 deletions(-) create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/pom.xml create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.dtd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/archiveIndex.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/contextDocumentationIndex.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/docIndex.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex_original.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/researchIndex.xsd create mode 100644 dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/tableIndex.xsd create mode 100644 dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/SIARDDK128ExtModuleFactory.java create mode 100644 dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/input/SIARDDK128ExtImportModule.java create mode 100644 dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/metadata/SIARDDK128ExtMetadataImportStrategy.java create mode 100644 dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/SIARDDK128ExtPathImportStrategy.java diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/pom.xml b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/pom.xml new file mode 100644 index 000000000..4f48e5b32 --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/pom.xml @@ -0,0 +1,56 @@ + + + 4.0.0 + dbptk-bindings-siarddk-128-ext + + + jakarta.xml.bind + jakarta.xml.bind-api + 4.0.2 + compile + + + dbptk-bindings-siarddk-128-ext + 2.1.0 + + com.databasepreservation + dbptk-bindings + 2.1.0 + .. + + + siard_dk_128_ext + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.12.1 + + ${version.java} + ${version.java} + UTF-8 + + + + org.apache.maven.plugins + maven-resources-plugin + + UTF-8 + + + + org.jvnet.jaxb + jaxb-maven-plugin + + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.dtd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.dtd new file mode 100644 index 000000000..adf2c738b --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.dtd @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.xsd new file mode 100644 index 000000000..2e9a2729e --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/XMLSchema.xsd @@ -0,0 +1,2534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]> + + + + Part 1 version: Id: structures.xsd,v 1.2 2004/01/15 11:34:25 ht Exp + Part 2 version: Id: datatypes.xsd,v 1.3 2004/01/23 18:11:13 ht Exp + + + + + + The schema corresponding to this document is normative, + with respect to the syntactic constraints it expresses in the + XML Schema language. The documentation (within <documentation> elements) + below, is not normative, but rather highlights important aspects of + the W3C Recommendation of which this is a part + + + + + The simpleType element and all of its members are defined + towards the end of this schema document + + + + + + Get access to the xml: attribute groups for xml:lang + as declared on 'schema' and 'documentation' below + + + + + + + + This type is extended by almost all schema types + to allow attributes from other namespaces to be + added to user schemas. + + + + + + + + + + + + + This type is extended by all types which allow annotation + other than <schema> itself + + + + + + + + + + + + + + + + This group is for the + elements which occur freely at the top level of schemas. + All of their types are based on the "annotated" type by extension. + + + + + + + + + + + + + This group is for the + elements which can self-redefine (see <redefine> below). + + + + + + + + + + + + + A utility type, not for public use + + + + + + + + + + + A utility type, not for public use + + + + + + + + + + + A utility type, not for public use + + #all or (possibly empty) subset of {extension, restriction} + + + + + + + + + + + + + + + + + A utility type, not for public use + + + + + + + + + + + + + A utility type, not for public use + + #all or (possibly empty) subset of {extension, restriction, list, union} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + for maxOccurs + + + + + + + + + + + + for all particles + + + + + + + for element, group and attributeGroup, + which both define and reference + + + + + + + + 'complexType' uses this + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This branch is short for + <complexContent> + <restriction base="xs:anyType"> + ... + </restriction> + </complexContent> + + + + + + + + + + + + + + + Will be restricted to required or forbidden + + + + + + Not allowed if simpleContent child is chosen. + May be overriden by setting on complexContent child. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This choice is added simply to + make this a valid restriction per the REC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Overrides any setting on complexType parent. + + + + + + + + + + + + + + + This choice is added simply to + make this a valid restriction per the REC + + + + + + + + + + + + + + + + + No typeDefParticle group reference + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A utility type, not for public use + + #all or (possibly empty) subset of {substitution, extension, + restriction} + + + + + + + + + + + + + + + + + + + + + + + + + The element element can be used either + at the top level to define an element-type binding globally, + or within a content model to either reference a globally-defined + element or type or declare an element-type binding locally. + The ref form is not allowed at the top level. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + group type for explicit groups, named top-level groups and + group references + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + group type for the three kinds of group + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This choice with min/max is here to + avoid a pblm with the Elt:All/Choice/Seq + Particle derivation constraint + + + + + + + + + + restricted max/min + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Only elements allowed inside + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple type for the value of the 'namespace' attr of + 'any' and 'anyAttribute' + + + + Value is + ##any - - any non-conflicting WFXML/attribute at all + + ##other - - any non-conflicting WFXML/attribute from + namespace other than targetNS + + ##local - - any unqualified non-conflicting WFXML/attribute + + one or - - any non-conflicting WFXML/attribute from + more URI the listed namespaces + references + (space separated) + + ##targetNamespace or ##local may appear in the above list, to + refer to the targetNamespace of the enclosing + schema or an absent targetNamespace respectively + + + + + + A utility type, not for public use + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A subset of XPath expressions for use +in selectors + A utility type, not for public +use + + + + The following pattern is intended to allow XPath + expressions per the following EBNF: + Selector ::= Path ( '|' Path )* + Path ::= ('.//')? Step ( '/' Step )* + Step ::= '.' | NameTest + NameTest ::= QName | '*' | NCName ':' '*' + child:: is also allowed + + + + + + + + + + + + + + + + + + + + + + + A subset of XPath expressions for use +in fields + A utility type, not for public +use + + + + The following pattern is intended to allow XPath + expressions per the same EBNF as for selector, + with the following change: + Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest ) + + + + + + + + + + + + + + + + + + + + + + + + + + + The three kinds of identity constraints, all with + type of or derived from 'keybase'. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A utility type, not for public use + + A public identifier, per ISO 8879 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + notations for use within XML Schema schemas + + + + + + + + + Not the real urType, but as close an approximation as we can + get in the XML representation + + + + + + + + + + First the built-in primitive datatypes. These definitions are for + information only, the real built-in definitions are magic. + + + + For each built-in datatype in this schema (both primitive and + derived) can be uniquely addressed via a URI constructed + as follows: + 1) the base URI is the URI of the XML Schema namespace + 2) the fragment identifier is the name of the datatype + + For example, to address the int datatype, the URI is: + + http://www.w3.org/2001/XMLSchema#int + + Additionally, each facet definition element can be uniquely + addressed via a URI constructed as follows: + 1) the base URI is the URI of the XML Schema namespace + 2) the fragment identifier is the name of the facet + + For example, to address the maxInclusive facet, the URI is: + + http://www.w3.org/2001/XMLSchema#maxInclusive + + Additionally, each facet usage in a built-in datatype definition + can be uniquely addressed via a URI constructed as follows: + 1) the base URI is the URI of the XML Schema namespace + 2) the fragment identifier is the name of the datatype, followed + by a period (".") followed by the name of the facet + + For example, to address the usage of the maxInclusive facet in + the definition of int, the URI is: + + http://www.w3.org/2001/XMLSchema#int.maxInclusive + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NOTATION cannot be used directly in a schema; rather a type + must be derived from it by specifying at least one enumeration + facet whose value is the name of a NOTATION declared in the + schema. + + + + + + + + + + Now the derived primitive types + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pattern specifies the content of section 2.12 of XML 1.0e2 + and RFC 3066 (Revised version of RFC 1766). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pattern matches production 7 from the XML spec + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pattern matches production 5 from the XML spec + + + + + + + + + + + + + + + pattern matches production 4 from the Namespaces in XML spec + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A utility type, not for public use + + + + + + + + + + + + + + + + + + + + + + #all or (possibly empty) subset of {restriction, union, list} + + + A utility type, not for public use + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Can be restricted to required or forbidden + + + + + + + + + + + + + + + + + + Required at the top level + + + + + + + + + + + + + + + + + + + Forbidden when nested + + + + + + + + + + + + + + + + + + + We should use a substitution group for facets, but + that's ruled out because it would allow users to + add their own, which we're not ready for yet. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + base attribute and simpleType child are mutually + exclusive, but one or other is required + + + + + + + + + + + + + + + + itemType attribute and simpleType child are mutually + exclusive, but one or other is required + + + + + + + + + + + + + + + + + + memberTypes attribute must be non-empty or there must be + at least one simpleType child + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/archiveIndex.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/archiveIndex.xsd new file mode 100644 index 000000000..403153ed5 --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/archiveIndex.xsd @@ -0,0 +1,448 @@ + + + + + + + + + Information om arkiveringsversionen + Information about the information package + + + + + ArkiveringsversionsID + Entydigt ID som tildeles arkiveringsversionen af Rigsarkivet + ArkiveringsversionsID som defineret i denne bekendtgørelse + Information package ID + + + + + Tidligere aflevering + Entydigt ID på tidligere aflevering fra samme it-system eller datakilde + ArkiveringsversionsID som defineret i denne eller tidligere bekendtgørelse + Previous transfer + + + + + Arkiveringsversions startdato + Startdato for de aflevererede data + År, år-måned, år-måned-dag + Start date for the information package + + + + + Arkiveringsversions slutdato + Slutdato for de aflevererede data + År, år-måned, år-måned-dag + End date for the information package + + + + + + Startdato for dokumenter + År, år-måned, år-måned-dag + Start date for documents in the information package + + + + + Slutdato for dokumenter + Anvendes til angivelse af slutdato for dokumenter + År, år-måned, år-måned-dag + End date for documents in the information package + + + + + + Slutaflevering + Angivelse af, om arkiveringsversionen er en slutaflevering + Boolsk værdi + Final transfer + + + + + + + + Arkivskaber + Den eller de myndigheder, institutioner, organisationer eller personer, der har skabt data. Kan også være organisatoriske enheder inden for en myndighed eller andre bidragydere til arkivaliet. + Fritekst + Archival creator + + + + + Arkivskaber startdato + For hver arkivskaber angives, hvornår arkivskaber er begyndt at tilføje data + År, år-måned, år-måned-dag + Start date for the archival creator + + + + + Arkivskaber slutdato + For hver arkivskaber angives, hvornår arkivskaber er ophørt med at tilføje data + År, år-måned, år-måned-dag + End date for the archival creator + + + + + + + + Arkiveringsversionsperiodetype + Angivelse af, om der er tale om en afsluttet arkivperiode/årgang, eller om der er tale om et øjebliksbillede. True=afsluttet, false=øjebliksbillede + Boolsk værdi + information package period type + + + + + Arkiveringsversioner med lukkede sager + Angiver for it-systemer med dokumenter, at der er tale om et øjebliksbillede af metadata, men kun dokumenter fra lukkede sager. True=der er tale om et øjebliksbillede af metadata, men kun dokumenter fra lukkede sagers + Boolsk værdi + Metadata also exists for cases and documents not submitted in information packagage + + + + + Systemnavn + Det officielle navn på it-systemet eller datakilde, hvor alle forkortelser er opløst + Fritekst + System name or data source + + + + + Alternativt systemnavn + Alternative navne på systemet eller datakilde + Fritekst + Alternative system name + + + + + Systemformål + Beskrivelse af formålet med at indsamle data + Fritekst + System purpose + + + + + Systemindhold + It-systemets centrale population og centrale variabler + Fritekst + System content + + + + + Regionsnumre + Standardidentifikatorer. Angivelse af, om der i systematisk form er registreret regionsnumre + Boolsk værdi + Standard identifier. Defines if the information package contains regionsnumre + + + + + Kommunenumre + Standardidentifikatorer. Angivelse af, om der systematisk form er registreret kommunenumre + Boolsk værdi + Standard identifier. Defines if the information package contains kommunenumre + + + + + CPR-numre + Standardidentifikatorer. Angivelse af, om der i systematisk form er registreret CPR-nr. + Boolsk værdi + Standard identifier. Defines if the information package contains CPR-nr. + + + + + CVR-numre + Standardidentifikatorer. Angivelse af, om der i systematisk form er registreret CVR-nr. + Boolsk værdi + Standard identifier. Defines if the information package contains CVR-nr. + + + + + Matrikelnumre + Standardidentifikatorer. Angivelse af, om der i systematisk form er registreret Matrikelnumre + Boolsk værdi + Standard identifier. Defines if the information package contains Matrikelnumre + + + + + BBR-numre + Standardidentifikatorer. Angivelse af, om der i systematisk form er registreret BBR-nr. + Boolsk værdi + Standard identifier. Defines if the information package contains BBR-nr. + + + + + WHO-sygdomskoder + Standardidentifikatorer. Angivelse af, om der i systematisk form er registreret WHOs sygdomskoder + Boolsk værdi + Standard identifier. Defines if the information package contains WHOs sygdomskoder + + + + + Datakilde + Det officielle navn på andre datakilder som har leveret data til it-systemet (opslag, overførsel, samkøring osv.) + Fritekst + Data source + + + + + Databrugere + Andre it-systemer som har brugt data fra it-systemet (opslag, overførsel, samkøring osv.) + Fritekst + Data users + + + + + Forgængersystemer + Systemer, der tidligere har varetaget samme funktion. + Fritekst + Predecessor systems + + + + + + + + FORM-version + Angivelse af, hvilken version af FORM kategorisering er hentet fra + Obligatorisk for it-systemer fra offentlige myndigheder. Gyldige FORM-versionsnumre + FORM version + + + + + + + + FORM-klassifikation + Kategorisering af arkiveringsversionen iht. den fællesoffentlige forretningsreferencemodel (FORM) + Obligatorisk for it-systemer fra offentlige myndigheder. Enhver gyldig FORM-reference iht. den angivne version af FORM + FORM class + + + + + FORM-klassifikation klartekst + FORM-klassifikation i klartekst + Obligatorisk for it-systemer fra offentlige myndigheder. Fritekst + FORM Class text + + + + + + + + + + + Digitale dokumenter indeholdt + Bruges til at angive, om der er digitale dokumenter i arkiveringsversionen, udover kontekstdokumentation + Boolsk værdi + Contains digital documents + + + + + GML data indeholdt + Angivelse af, om der er GML data i arkiveringsversionen + Boolsk værdi + Contains geodata + + + + + Forskningsdata indeholdt + Angivelse af, om arkiveringsversionen indeholder forskningsdata + Boolsk værdi + Contains research data + + + + + Forskningsdata afleveret iht. bilag 9 + Angivelse af, om data er afleveret iht. til bilag 9 + Boolsk værdi + Contains research data + + + + + Kassation + Angivelse af, om der er foretaget systematisk kassation af dokumenter inden produktion af arkiveringsversion + Boolsk værdi + Documents disposal + + + + + Søgemiddel til andre sager eller dokumenter + Bruges til at angive, om arkiveringsversionen er et nødvendigt søgemiddel til papirsager/dokumenter eller sager/dokumenter i et andet it-system + Boolsk værdi + Search tool to other files or documents + + + + + Adresserede arkivalier + Henvisning til de arkivalier, som arkiveringsversionen er søgemiddel til + Obligatorisk, hvis der er svaret ja på foregående spørgsmål. Fritekst + Addressed records + + + + + Eksistens af sagsbegreb i it-systemet + Angivelse af, at it-systemet har et sagsbegreb, forstået som en registreret sammenhæng mellem sagligt sammenhørende dokumenter + Kan kun være ja, hvis der er digitale dokumenter i it-systemet. Boolsk værdi + Existence of a file concept in the IT-system + + + + + SOA arkitektur + Angivelse af, om data i arkiveringsversionen er sammensat af data og eventuelt dokumenter fra flere forskellige datakilder i en serviceorienteret arkitektur + Boolsk værdi + SOA Architecture + + + + + Persondata + Angivelse af, om der i arkiveringsversionen findes følsomme personoplysninger iht. Persondatalovenj + Boolsk værdi + Personal data included + + + + + Fristforlængende oplysninger + Angivelse af, om der i arkiveringsversionen findes oplysninger, der kan betinge længere tilgængelighedsfrist i øvrigt + Boolsk værdi + Other access restrictions + + + + + Godkendelsesarkiv + Angivelse af, hvilket offentligt arkiv, der godkender arkiveringsversionen + Identifikation af det pågældende arkiv (2-4 tegn) + Approving archive + + + + + Tilgængelighedsbegrænsninger + Angivelse af nærmere bestemmelser for adgang til materialet. Elementet kan anvendes efter det modtagende arkivs nærmere retningslinjer + Fritekst + Other access restrictions + + + + + + + + type for tekst med en mindstelængde på 1 tegn + type for text with a minimum lenght of 1 character + + + + + + + + datotype + date type + + + + + + dato fra 1700 til 2100 + date from 1700 to 2100 + + + + + + + + + dato og måned fra 1700-01 til 2100-12 + date and month from 1700-01 to 2100-12 + + + + + + + + + dato fra 1700-01-01 til 2100-12-31 + date from 1700-01-01 to 2100-12-31 + + + + + + + + + type for en archiveID + type for an archiveID + + + + + + + + type for en arkiveringsversionsID + type for an information package ID + + + + + + + + type for en arkiveringsversionsID i denne eller tidligere bekendtgørelse + type for an information package ID + + + + + + + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/contextDocumentationIndex.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/contextDocumentationIndex.xsd new file mode 100644 index 000000000..34c47700f --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/contextDocumentationIndex.xsd @@ -0,0 +1,388 @@ + + + + + + + + Indeks for alle dokumenter i arkiveringsversionens kontekstdokumentation + Index for all documents in the context documentation of the information package + + + + + + + + DokumentID. Unikt ID på op til 12 cifre + DocumentID. Unique ID with up to 12 digits in total + + + + + Dokumenttitel. Fri tekst + Document title. Free text + + + + + Dokumentbeskrivelse. Fri tekst + Document description. Free text + + + + + Dato. År, år-måned, år-måned-dag + Date. Year, year-month, year-month-day + + + + + + + + Forfatternavn. Fri tekst + Author name. Free text + + + + + Forfatterinstitution. Fri tekst + Author institution. Free text + + + + + + + + Dokumentkategori + Document category + + + + + + + + + + + type for dokumentkategori + type for a document category + + + + + Dokumentation vedrørende administrativ brug af it-systemet + Documentation regarding administrative use of the it system + + + + + Dokumentation vedrørende it-systemets tekniske udformning, drift og udvikling + Documentation regarding the technical design, operation and development of the it system + + + + + Dokumentation vedrørende arkivskabers aflevering af data + Documentation regarding the transfer of data from the archival creator + + + + + Dokumentation vedrørende arkivets modtagelse af data (udfyldes af modtagende arkiv) + Documentation regarding the receipt of data by the archive + + + + + Dokumentation vedrørende arkivets bevaring af arkiveringsversionen (udfyldes at det modtagende arkiv) + Documentation regarding the preservation of the archival version by the archive + + + + + Anden dokumentation + Other documentation + + + + + Dokumentation af forskningsdata + Research documentation + + + + + + + + + It-systemets formål + IT System purpose + + + + + It-systemets lov- og regelgrundlag + Legal and regulatory framework of the it system + + + + + It-systemets indhold, population og særlige begreber + IT System content, population and special concepts + + + + + It-systemets administrative funktioner + Administrative functions of the IT system + + + + + It-systemets præsentationsstruktur + Presentation structure of the IT system + + + + + Tilvejebringelse af data + Provision of data + + + + + Videregivelse af data + Transfer of data + + + + + Data og funktioner fælles med forgænger- og efterfølgersystemer + Data and functions in common with predecessor systems or successor systems + + + + + Myndighedens egen kvalitetskontrol + Quality control of the authority performed by itself + + + + + Publikation af og om data + Publication of and about data + + + + + Andet + Other + + + + + Registreringssystematik + System taxonomy + + + + + Instruks for anvendelse af systemet + instructions for system use + + + + + + + + + Driftsversionens opbygning + Structure of the operational system + + + + + Konvertering hos myndigheden + Transformation at the authority + + + + + Dokumentation af sammensætning af data og eventuelle dokumenter fra flere forskellige systemer i en serviceorienteret arkitektur + Documentation of the combination of data and any documents from several different systems in a service oriented architecture + + + + + Andet + Other + + + + + + + + + Arkivets bestemmelser, herunder afleveringsbestemmelse + Archive regulations, including stipulations about the transfer + + + + + Dokumentation af konvertering fra driftsversion til arkiveringsversion + Documentation of transformation from operational system to information package + + + + + Andet + Other + + + + + + + + + Arkivarnoter + Archivist notes + + + + + Testnoter + Test notes + + + + + Andet + Other + + + + + + + + + Konvertering hos arkivet + Transformations at the archive + + + + + Andet + Other + + + + + + + + + Andet + Other + + + + + + + + + Projektbeskrivelse gældende for de afleverede data + Research project description for the submitted data + + + + + Spørgeskema, interviewguide og/eller registreringsskema anvendt til at indsamle og analysere afleverede data + Questionnaire, interview guide, or registration form used to collect and analyze the submitted data + + + + + Protokoller og metoderapporter + Research protocol + + + + + Publikationer som er udgivet på basis af afleverede data + Publications that stem from the submitted data + + + + + Andet + Other + + + + + + + type for et dokumentID for et dokument i generel dokumentation + A type for a document ID for a document i generel documentation + + + + + + + + datotype for et dokument i kontekstinformation + date type for a document in context information + + + + + + dato fra 1700 til 2100 + date from 1700 to 2100 + + + + + + + + + dato og måned fra 1700-01 til 2100-12 + date and month from 1700-01 to 2100-12 + + + + + + + + + dato fra 1700-01-01 til 2100-12-31 + date from 1700-01-01 to 2100-12-31 + + + + + + + + + type for tekst med en mindstelængde på 1 tegn + type for text with a minimum lenght of 1 character + + + + + + \ No newline at end of file diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/docIndex.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/docIndex.xsd new file mode 100644 index 000000000..7422d5213 --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/docIndex.xsd @@ -0,0 +1,119 @@ + + + + + + + + Indeks for alle documenter i arkiveringsversionen + Index for all documents in the information package + + + + + + + + type for et dokument + A type for a document + + + + + DocID - ID, som entydigt udpeger det enkelte dokument + DocID - ID that uniquely identifies a document- + + + + + ParentID - ID på overordnet dokument ved indlejrede dokumenter eller dokumenter, der på anden vis indgår i et hierarki + ParentID - ID of the parent document for embedded documents or for documents which are otherwise part of a hierarchy + + + + + MediaID - ID på det lagringsmedie, som dokumentet ligger på + MediaID - ID for the storage media of the the document + + + + + ContainerFolder - Dokumentsamlingsmappe, som dokumentet ligger i. Skal være unikt inden for Documents, men dette krav er ikke indført i dette XML Schema for at lette skabelse og validering + ContainerFolder - document container folder in which the document is residing. Must be unique within Documents, but this requirement is not implemented in this XML Schema in order to ease creation and validation + + + + + OriginalFilename - Angivelse af filens navn inkl. suffix i IT-systemet + OriginalFilename - The original name of the file in the IT-system including its extension + + + + + Angivelse af filens format i arkiveringsversionen + The format of the file in the information package + + + + + For GML filer angives det skema, der skal bruges til validering af den pågældende GML-fil + GML Schema to be used to for validation the GML-file in question + + + + + + + type for et dokumentID + A type for a document ID + + + + + + + + type for et medie ID + A type for a media ID + + + + + + + + Type for en dokumentundermappenavn + Type for a document container folder name + + + + + + + + type for et dokument i arkiveringversionen + type for a document in the information package + Små bogstaver (minuskler) for filtyper (suffix) er krævet i bekendtgørelse, men store bogstaver (versaler) er tilladt i skemaet for at mindske behov for navneændring + Lower case is required, but upper case for the following file type extensions is accepted to reduce the need for renaming--> + + + + + + + + + + + + + + + Navn for et gml xml skema + Name for a gml xml schema + + + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex.xsd new file mode 100644 index 000000000..c37d0e1a4 --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex.xsd @@ -0,0 +1,84 @@ + + + + + + Dette skema angiver tilladte mappenavne og filnavne, men ikke hvilke filnavne, som er tilladt i hvilke mapper. Denne begrænsning skyldes ønsket om at anvende samme elementnavn 'foN' for alle mappenavne, og XML Schema tillader ikke at samme element kan være af forskellig type (Consistent Declaration Rule) + This Schema defines legal folder names and file names, but not what file names are allowed in what folders. This limitation is due to a desire to use the same element name 'foN' for all folder names, and XML Schema does not allow the same element name to belong to different types (Consistent Declaration Rule) + + + + + Indeks for alle filer i arkiveringsversionen + Index for all files in the information package + + + + + + + + + + + + + + + + Navn for en mappe i arkiveringsversionen + Name for a folder in the information package + + + + + Højst 10.000 docCollection mapper per ContextDocumentation mappe + Maximum of 10.000 docCollection folders per ContextDocumentation folder + + + + + Højst 10.000 docCollection mapper per Documents mappe + Maximum of 10.000 docCollection folders per Documents folder + + + + + + + + + + type for et filnavn i arkiveringversionen + type for a file name in the information package + + + + Små bogstaver (minuskler) for filtyper (suffix) er krævet i bekendtgørelse, men store bogstaver (versaler) er tilladt i Schemaet for at mindske behov for navneændring + Lower case is required, but upper case for the following file type extensions is accepted to reduce the needed for renaming--> + + + + + + + + + + + + + + + + + + + Dette er en MD5 kontrolsum med et resultat på 128 bit svarende til 16 bytes repræsenteret som 32 hexadecimale tegn + This is a MD5 checksum with a result of 128 bit equalt to 16 bytes represented as 32 characters in hexadecimal + + + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex_original.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex_original.xsd new file mode 100644 index 000000000..d6d1fe9b7 --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/fileIndex_original.xsd @@ -0,0 +1,83 @@ + + + + + + Dette skema angiver tilladte mappenavne og filnavne, men ikke hvilke filnavne, som er tilladt i hvilke mapper. Denne begrænsning skyldes ønsket om at anvende samme elementnavn 'foN' for alle mappenavne, og XML Schema tillader ikke at samme element kan være af forskellig type (Consistent Declaration Rule) + This Schema defines legal folder names and file names, but not what file names are allowed in what folders. This limitation is due to a desire to use the same element name 'foN' for all folder names, and XML Schema does not allow the same element name to belong to different types (Consistent Declaration Rule) + + + + + Indeks for alle filer i arkiveringsversionen + Index for all files in the information package + + + + + + + + + + + + + + + + Navn for en mappe i arkiveringsversionen + Name for a folder in the information package + + + + + Højst 10.000 docCollection mapper per ContextDocumentation mappe + Maximum of 10.000 docCollection folders per ContextDocumentation folder + + + + + Højst 10.000 docCollection mapper per Documents mappe + Maximum of 10.000 docCollection folders per Documents folder + + + + + + + + + + type for et filnavn i arkiveringversionen + type for a file name in the information package + + + + Små bogstaver (minuskler) for filtyper (suffix) er krævet i bekendtgørelse, men store bogstaver (versaler) er tilladt i Schemaet for at mindske behov for navneændring + Lower case is required, but upper case for the following file type extensions is accepted to reduce the needed for renaming--> + + + + + + + + + + + + + + + + + + Dette er en MD5 kontrolsum med et resultat på 128 bit svarende til 16 bytes repræsenteret som 32 hexadecimale tegn + This is a MD5 checksum with a result of 128 bit equalt to 16 bytes represented as 32 characters in hexadecimal + + + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/researchIndex.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/researchIndex.xsd new file mode 100644 index 000000000..62085ca4d --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/researchIndex.xsd @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/tableIndex.xsd b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/tableIndex.xsd new file mode 100644 index 000000000..d220afea3 --- /dev/null +++ b/dbptk-bindings/dbptk-bindings-siarddk-128-ext/src/main/resources/schema/128-ext/tableIndex.xsd @@ -0,0 +1,511 @@ + + + + + + + + + + + + + + + Rodelement for DDL for en siardDiark arkiveringsversion + Root element of DDL for a siardDiark information package + + + + + + + Udgavenr. for dette regelsæt for en siardDiark arkiveringsversion, dette versionsnr. er ikke det samme som versionsnr. for skemaet eller for XML Schema standarden. + Version number for the rules for a siardDiark information package, not for this XML Schema, nor the XML Schema Standard + + + + + Kort navn på databasen + Short database name + + + + + Produktnavn på det anvendte RDBMS> + Name of the RDBMS product used + + + + + Liste over tabeller i databasen + List of the tables in the database + + + + + Liste over views i databasen + List of the views in the database + + + + + + + + + type for a list of tables + type for en tabelliste, bestående af én eller flere tabeller + + + + + + + + + type for et tabelelement kaldet 'table' in siardDiark Archive + type for table element in siardDiark Archive + + + + + tabelnavn + table name + + + + + navn på mappen som indeholder tabellens data og dets tilhørende XML Schema + name of the folder containing the table and its XML Schema + + + + + Beskrivelse af tabellens formål og indhold + Description of the meaning and content of the table + + + + + Liste af kolonner i tabellen + List of the columns in the table + + + + + tabellens primærnøgle + Primary key of the table + + + + + Liste af tabellens fremmednøgler + List of the foreign keys of the table + + + + + Antal rækker i tabellen + Number of rows in the table + + + + + + + + Type for en liste af views + Type for a list of views + + + + + + + + + View element i siardDiark arkiveringsversion + View element in siardDiark Archive + + + + + Navn på view + Name of the view + + + + + Oprindelig SQL forespørgsel som definerer view, eller forespørgsel som er dannet specifikt til arkiveringsversionen + Original query string defining the view + + + + + Beskrivelse af views formål og indhold + Description of the view's meaning and content + + + + + + + + Liste af kolonner + List of columns + + + + + + + + + Kolonneelement i siardDiark arkiveringsversion + Column element in siardDiark Archive + + + + + Kolonnenavn + Column name in the table + + + + + Entydig identifikation af kolonne + Begynder med bogstavet 'c' efterfulgt af nummeret på kolonnens plads i tabellen. Samme rækkefølge skal anvendes i den schemaet for den pågældende tabel. + Column ID + Begins with the letter 'c' followed by the number for the order of the column in the tabe. Same order must be used in the schema for the table + + + + + + + + + + SQL:1999 datatype + SQL:1999 column type + + + + + Original datatype + Original column type + + + + + Kolonnens standardværdi + Column default + + + + + Angivelse af, om kolonnen kan være NULL + Defines if the column can be NULL + + + + + Beskrivelse af kolonnens indhold + The meaning and the content of the column + + + + + Angivelse af særlige information + Defines special information for the column + + + + + + + + Type for en primærnøgle + Type for a Primary key + + + + + Navn på primærnøgle. Navnet skal være unikt inden for arkiveringsversionen og være i overenstemmelse med reglerne for SQL Identifiers i SQL:1999 + Name of the primary key according to SQL:1999 rules for SQL Identifiers + + + + + Liste over navne på kolonner i prinærnøglen + List of names of columns of the primary key + + + + + + + + Liste af fremmednøgler + List of foreign keys + + + + + + + + + fremmednøgle 'foreignKey' element i siardDiark arkiveringsversion + foreignKey element in siardDiark Archive + + + + + Navn på fremmednøgle. Navnet skal være unikt inden for arkiveringsversionen og være i overenstemmelse med reglerne for SQL Identifiers i SQL:1999 + Name of the foreign key according to SQL:1999 rules for SQL Identifiers + + + + + Den tabel som fremmednøglen refererer til. + Table referenced by foreign key + + + + + Reference (liste af kolonner og de kolonner fremmednøglen refererer til + Reference (list of columns and referenced columns) + + + + + + + + Reference element i siardDiark arkiveringsversionen + reference element in siardDiark Archive + + + + + Referende kolonne + Referencing column + + + + + Refereret kolonne + Referenced column + + + + + + + + SQL Identifier i SQL:1999 som skal anvendes til navne for database, tabel, kolonner + SQL Identifier i SQL:1999 to be used for database names, table names, column names + + + + + + + SQL identifiers (except for delimited identifiers) must begin with a letter and may only contain letters, digits and _ unless they are delimited. + + + + + + + + fsNames may only consist of ASCII characters and digits and must start with a non-digit + + + + + + + + + + Uddrag af datatyper fra SQL:1999 data typer + Except of data types from SQL:1999 data types + Max amount of spaces between data type elements is limited + Combinations of upper case/lower case are limited. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Type for særlige informationer - angiver at indholdet af den pågældende kolonne har en særlig funktion/information + special information for a column + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder oplysninger om hvilken myndighed, der har registreret sagen eller dokumentet. + + + + + Bruges til at angive den eller de kolonner i arkiveringsversionen, som beskriver dokumenternes entydige identifikation. + + + + + Bruges til at angive den eller de kolonner i arkiveringsversionen, som beskriver, om dokumentet er lagret elektronisk, på papir eller delvist på papir. Helt eller delvis digitalt = 1, papir = 2, ikke relevant = 3 + + + + + Bruges ved aflevering af øjebliksbilleder m.v. til at angive den eller de kolonner i arkiveringsversionen, som beskriver, om dokumentet allerede er afleveret i en tidligere arkiveringsversion. Tidligere afleveret = 1, ikke tidligere afleveret = 2 + + + + + Den eller de kolonner i arkiveringsversionen, som beskriver sagernes entydige identifikation + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder sagernes titler. + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder dokumenternes titler/beskrivelser. + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder oplysninger om dokumenternes afsendelses- og modtagelsesdatoer. + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder oplysninger om et dokuments afsender eller modtager. + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder oplysninger, der uddraget fra en digital signatur + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder reference til FORM (Den fællesoffentlige forretningsreferencemodel) + + + + + Den eller de kolonner i arkiveringsversionen, som indeholder oplysninger om bevaring og kassation + + + + + diff --git a/dbptk-bindings/pom.xml b/dbptk-bindings/pom.xml index fdfd6e807..74e76081a 100644 --- a/dbptk-bindings/pom.xml +++ b/dbptk-bindings/pom.xml @@ -24,6 +24,7 @@ dbptk-bindings-siard2-2 dbptk-bindings-siarddk-1007 dbptk-bindings-siarddk-128 + dbptk-bindings-siarddk-128-ext @@ -160,6 +161,58 @@ ${project.build.directory}/generated-sources/xjc-b + + com.databasepreservation.modules.siard.bindings.${packagename} + + + + generate-siard-128-ext-contextDocumentationIndex + + generate + + + false + + + ${basedir}/src/main/resources/schema/128-ext + + contextDocumentationIndex.xsd + + + + ${project.build.directory}/generated-sources/xjc-a + + + + + com.databasepreservation.modules.siard.bindings.${packagename}.context + + + + + generate-siardk-128-ext + + generate + + + false + + + ${basedir}/src/main/resources/schema/128-ext + + archiveIndex.xsd + docIndex.xsd + fileIndex.xsd + researchIndex.xsd + XMLSchema.xsd + tableIndex.xsd + + true + + + ${project.build.directory}/generated-sources/xjc-b + + com.databasepreservation.modules.siard.bindings.${packagename} diff --git a/dbptk-core/pom.xml b/dbptk-core/pom.xml index 89fc26a68..fc16c054b 100644 --- a/dbptk-core/pom.xml +++ b/dbptk-core/pom.xml @@ -114,6 +114,10 @@ com.databasepreservation dbptk-bindings-siarddk-128 + + com.databasepreservation + dbptk-bindings-siarddk-128-ext + diff --git a/dbptk-modules/dbptk-module-siard/pom.xml b/dbptk-modules/dbptk-module-siard/pom.xml index 1d410b08a..e59f118eb 100644 --- a/dbptk-modules/dbptk-module-siard/pom.xml +++ b/dbptk-modules/dbptk-module-siard/pom.xml @@ -48,6 +48,10 @@ com.databasepreservation dbptk-bindings-siarddk-128 + + com.databasepreservation + dbptk-bindings-siarddk-128-ext + xerces diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/SIARDDK128ExtModuleFactory.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/SIARDDK128ExtModuleFactory.java new file mode 100644 index 000000000..322dbc7df --- /dev/null +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/SIARDDK128ExtModuleFactory.java @@ -0,0 +1,37 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE file at the root of the source + * tree and available online at + * + * https://github.com/keeps/db-preservation-toolkit + */ +package com.databasepreservation.modules.siard; + +import java.nio.file.Path; +import java.util.Map; + +import com.databasepreservation.modules.siard.in.input.SIARDDK128ExtImportModule; +import com.databasepreservation.modules.siard.in.input.SIARDDKImportModule; +import com.databasepreservation.modules.siard.out.output.SIARDDK128ExportModule; +import com.databasepreservation.modules.siard.out.output.SIARDDKExportModule; + +/** + * @author Alexandre Flores + */ +public class SIARDDK128ExtModuleFactory extends SIARDDKModuleFactory { + + @Override + String getModuleFactoryName() { + return "siard-dk-128-ext"; + } + + @Override + SIARDDKImportModule createSIARDDKImportModuleInstance(Path path, String schemaName) { + return new SIARDDK128ExtImportModule(path, schemaName); + } + + @Override + SIARDDKExportModule createSIARDDKExportModuleInstance(Map exportModuleArgs) { + return new SIARDDK128ExportModule(exportModuleArgs); + } +} \ No newline at end of file diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/constants/SIARDConstants.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/constants/SIARDConstants.java index d2ff13c35..4b855af46 100644 --- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/constants/SIARDConstants.java +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/constants/SIARDConstants.java @@ -42,6 +42,8 @@ public enum SiardVersion { DK_128("128", "dk-128"), + DK_128_EXT("128_EXT", "dk-128-ext"), + // eCH-0165 v1.0: replaced by 2.0 V1_0("1.0", "1.0", "v1.0"); diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/input/SIARDDK128ExtImportModule.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/input/SIARDDK128ExtImportModule.java new file mode 100644 index 000000000..df2d57705 --- /dev/null +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/input/SIARDDK128ExtImportModule.java @@ -0,0 +1,60 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE file at the root of the source + * tree and available online at + * + * https://github.com/keeps/db-preservation-toolkit + */ +package com.databasepreservation.modules.siard.in.input; + +import java.nio.file.Path; + +import com.databasepreservation.modules.siard.SIARDDK128ExtModuleFactory; +import com.databasepreservation.modules.siard.common.SIARDArchiveContainer; +import com.databasepreservation.modules.siard.common.path.SIARDDK128MetadataPathStrategy; +import com.databasepreservation.modules.siard.constants.SIARDConstants; +import com.databasepreservation.modules.siard.in.content.ContentImportStrategy; +import com.databasepreservation.modules.siard.in.content.SIARDDK128ContentImportStrategy; +import com.databasepreservation.modules.siard.in.metadata.MetadataImportStrategy; +import com.databasepreservation.modules.siard.in.metadata.SIARDDK128ExtMetadataImportStrategy; +import com.databasepreservation.modules.siard.in.path.ResourceFileIndexInputStreamStrategy; +import com.databasepreservation.modules.siard.in.path.SIARDDK128ExtPathImportStrategy; +import com.databasepreservation.modules.siard.in.read.FolderReadStrategyMD5Sum; + +/** + * @author Alexandre Flores + * + */ +public class SIARDDK128ExtImportModule extends SIARDDKImportModule { + private static final String moduleName = "siard-dk-128-ext"; + + public SIARDDK128ExtImportModule(Path siardPackage, String paramImportAsSchema) { + super(moduleName, siardPackage, paramImportAsSchema, + new SIARDDK128ExtPathImportStrategy( + new SIARDArchiveContainer(SIARDConstants.SiardVersion.DK, siardPackage.toAbsolutePath().normalize(), + SIARDArchiveContainer.OutputContainerType.MAIN), + new FolderReadStrategyMD5Sum(new SIARDArchiveContainer(SIARDConstants.SiardVersion.DK, + siardPackage.toAbsolutePath().normalize(), SIARDArchiveContainer.OutputContainerType.MAIN)), + new SIARDDK128MetadataPathStrategy(), paramImportAsSchema, new ResourceFileIndexInputStreamStrategy())); + } + + @Override + protected MetadataImportStrategy createMetadataImportStrategy() { + return new SIARDDK128ExtMetadataImportStrategy((SIARDDK128ExtPathImportStrategy) pathStrategy, paramImportAsSchema); + } + + @Override + protected ContentImportStrategy createContentImportStrategy() { + return new SIARDDK128ContentImportStrategy(readStrategy, pathStrategy, paramImportAsSchema); + } + + @Override + protected String getModuleFactoryParameterFolder() { + return SIARDDK128ExtModuleFactory.PARAMETER_FOLDER; + } + + @Override + protected String getModuleFactoryParameterAsSchema() { + return SIARDDK128ExtModuleFactory.PARAMETER_AS_SCHEMA; + } +} diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/metadata/SIARDDK128ExtMetadataImportStrategy.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/metadata/SIARDDK128ExtMetadataImportStrategy.java new file mode 100644 index 000000000..16ee2857d --- /dev/null +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/metadata/SIARDDK128ExtMetadataImportStrategy.java @@ -0,0 +1,605 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE file at the root of the source + * tree and available online at + * + * https://github.com/keeps/db-preservation-toolkit + */ +package com.databasepreservation.modules.siard.in.metadata; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.security.DigestInputStream; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +import javax.xml.XMLConstants; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.Schema; +import javax.xml.validation.SchemaFactory; + +import org.apache.commons.lang3.StringUtils; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; + +import com.databasepreservation.Constants; +import com.databasepreservation.model.exception.ModuleException; +import com.databasepreservation.model.modules.configuration.ModuleConfiguration; +import com.databasepreservation.model.reporters.Reporter; +import com.databasepreservation.model.structure.ColumnStructure; +import com.databasepreservation.model.structure.DatabaseStructure; +import com.databasepreservation.model.structure.ForeignKey; +import com.databasepreservation.model.structure.PrimaryKey; +import com.databasepreservation.model.structure.Reference; +import com.databasepreservation.model.structure.SchemaStructure; +import com.databasepreservation.model.structure.TableStructure; +import com.databasepreservation.model.structure.ViewStructure; +import com.databasepreservation.model.structure.type.Type; +import com.databasepreservation.model.structure.virtual.VirtualColumnStructure; +import com.databasepreservation.model.structure.virtual.VirtualForeignKey; +import com.databasepreservation.model.structure.virtual.VirtualPrimaryKey; +import com.databasepreservation.model.structure.virtual.VirtualTableStructure; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ArchiveIndex; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ColumnType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ColumnsType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.DocIndexType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ForeignKeyType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ForeignKeysType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.FunctionalDescriptionType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.PrimaryKeyType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ReferenceType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.SiardDiark; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.TableType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.ViewType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.context.ContextDocumentationIndex; +import com.databasepreservation.modules.siard.common.SIARDArchiveContainer; +import com.databasepreservation.modules.siard.constants.SIARDDKConstants; +import com.databasepreservation.modules.siard.in.metadata.typeConverter.SQL99StandardDatatypeImporter; +import com.databasepreservation.modules.siard.in.metadata.typeConverter.SQLStandardDatatypeImporter; +import com.databasepreservation.modules.siard.in.path.SIARDDK128ExtPathImportStrategy; +import com.databasepreservation.modules.siard.in.read.FolderReadStrategyMD5Sum; +import com.databasepreservation.modules.siard.in.read.ReadStrategy; + +import jakarta.xml.bind.JAXBContext; +import jakarta.xml.bind.JAXBElement; +import jakarta.xml.bind.JAXBException; +import jakarta.xml.bind.Unmarshaller; + +/** + * @author Alexandre Flores + */ +public class SIARDDK128ExtMetadataImportStrategy implements MetadataImportStrategy { + + protected final Logger logger = LoggerFactory.getLogger(SIARDDK128ExtMetadataImportStrategy.class); + + protected final SIARDDK128ExtPathImportStrategy pathStrategy; + protected final String importAsSchemaName; + protected DatabaseStructure databaseStructure; + private int currentTableIndex = 1; + + private SQLStandardDatatypeImporter sqlStandardDatatypeImporter; + private Reporter reporter; + + public SIARDDK128ExtMetadataImportStrategy(SIARDDK128ExtPathImportStrategy pathStrategy, String importAsSchameName) { + this.pathStrategy = pathStrategy; + this.importAsSchemaName = importAsSchameName; + sqlStandardDatatypeImporter = new SQL99StandardDatatypeImporter(); + } + + @Override + public void loadMetadata(ReadStrategy readStrategy, SIARDArchiveContainer container, + ModuleConfiguration moduleConfiguration) throws ModuleException { + FolderReadStrategyMD5Sum readStrategyMD5Sum = null; + if (!(readStrategy instanceof FolderReadStrategyMD5Sum)) { + throw new IllegalArgumentException( + "The current implemenation of SIARDDKMetadataImportStrategy requires relies on the FolderReadStrategyMD5Sum (should be passed to loadMetadata )."); + } + readStrategyMD5Sum = (FolderReadStrategyMD5Sum) readStrategy; + pathStrategy.parseFileIndexMetadata(); + + JAXBContext tableIndexContext; + JAXBContext archiveIndexContext; + try { + tableIndexContext = JAXBContext.newInstance(SiardDiark.class.getPackage().getName()); + archiveIndexContext = JAXBContext.newInstance(ArchiveIndex.class.getPackage().getName()); + } catch (JAXBException e) { + throw new ModuleException().withMessage("Error loading JAXBContext").withCause(e); + } + + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema tableIndexXsdSchema = null; + Schema archiveIndexXsdSchema = null; + InputStream tableIndexXsdInputStream = readStrategyMD5Sum.createInputStream(container, + pathStrategy.getXsdFilePath(SIARDDKConstants.TABLE_INDEX)); + InputStream archiveIndexXsdInputStream = readStrategyMD5Sum.createInputStream(container, + pathStrategy.getXsdFilePath(SIARDDKConstants.ARCHIVE_INDEX)); + try { + tableIndexXsdSchema = schemaFactory.newSchema(new StreamSource(tableIndexXsdInputStream)); + archiveIndexXsdSchema = schemaFactory.newSchema(new StreamSource(archiveIndexXsdInputStream)); + } catch (SAXException e) { + throw new ModuleException() + .withMessage("Error reading metadata XSD file: " + pathStrategy.getXsdFilePath(SIARDDKConstants.TABLE_INDEX)) + .withCause(e); + } + DigestInputStream tableIndexInputStreamXml = null; + DigestInputStream archiveIndexInputStreamXml = null; + SiardDiark xmlRoot; + ArchiveIndex archiveIndex = null; + Unmarshaller tableIndexUnmarshaller; + Unmarshaller archiveIndexUnmarshaller; + + try { + tableIndexUnmarshaller = tableIndexContext.createUnmarshaller(); + tableIndexUnmarshaller.setSchema(tableIndexXsdSchema); + tableIndexInputStreamXml = readStrategyMD5Sum.createInputStream(container, + pathStrategy.getXmlFilePath(SIARDDKConstants.TABLE_INDEX), pathStrategy.getTabelIndexExpectedMD5Sum()); + xmlRoot = (SiardDiark) tableIndexUnmarshaller.unmarshal(tableIndexInputStreamXml); + + archiveIndexUnmarshaller = archiveIndexContext.createUnmarshaller(); + archiveIndexUnmarshaller.setSchema(archiveIndexXsdSchema); + if (Files.exists(Paths.get(container.getPath().toString() + SIARDDKConstants.RESOURCE_FILE_SEPARATOR + + pathStrategy.getXmlFilePath(SIARDDKConstants.ARCHIVE_INDEX)))) { + archiveIndexInputStreamXml = readStrategyMD5Sum.createInputStream(container, + pathStrategy.getXmlFilePath(SIARDDKConstants.ARCHIVE_INDEX), pathStrategy.getArchiveIndexExpectedMD5Sum()); + archiveIndex = (ArchiveIndex) archiveIndexUnmarshaller.unmarshal(archiveIndexInputStreamXml); + } + } catch (JAXBException e) { + throw new ModuleException().withMessage("Error while Unmarshalling JAXB").withCause(e); + } finally { + try { + tableIndexXsdInputStream.close(); + archiveIndexXsdInputStream.close(); + + if (tableIndexInputStreamXml != null) { + readStrategyMD5Sum.closeAndVerifyMD5Sum(tableIndexInputStreamXml); + } + + if (archiveIndexInputStreamXml != null) { + readStrategyMD5Sum.closeAndVerifyMD5Sum(archiveIndexInputStreamXml); + } + } catch (IOException e) { + logger.debug("Could not close xsdStream", e); + } + } + + databaseStructure = getDatabaseStructure(xmlRoot, archiveIndex); + } + + @Override + public DatabaseStructure getDatabaseStructure() throws ModuleException { + if (databaseStructure != null) { + return databaseStructure; + } else { + throw new ModuleException().withMessage("getDatabaseStructure must not be called before loadMetadata"); + } + } + + @Override + public void setOnceReporter(Reporter reporter) { + this.reporter = reporter; + sqlStandardDatatypeImporter.setOnceReporter(reporter); + } + + protected DatabaseStructure getDatabaseStructure(SiardDiark siardArchive, ArchiveIndex archiveIndex) + throws ModuleException { + DatabaseStructure databaseStructure = new DatabaseStructure(); + databaseStructure.setProductName(siardArchive.getDatabaseProduct()); + databaseStructure.setSchemas(getSchemas(siardArchive)); + if (archiveIndex != null) { + setDatabaseMetadata(siardArchive, databaseStructure, archiveIndex); + } else { + databaseStructure.setName(siardArchive.getDbName()); + } + return databaseStructure; + } + + protected void setDatabaseMetadata(SiardDiark siardArchive, DatabaseStructure databaseStructure, + ArchiveIndex archiveIndex) { + databaseStructure.setDbOriginalName(siardArchive.getDbName()); + String[] informationPackageIdSPlit = archiveIndex.getArchiveInformationPackageID().split("\\."); + String id = informationPackageIdSPlit[informationPackageIdSPlit.length - 1]; + databaseStructure.setName(id + ": " + archiveIndex.getSystemName()); + DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyy-MM-dd"); + DateTime formattedDate = formatter.parseDateTime(archiveIndex.getArchivePeriodEnd()); + databaseStructure.setArchivalDate(formattedDate); + databaseStructure.setDataOriginTimespan(archiveIndex.getArchivePeriodStart() + + SIARDDKConstants.RESOURCE_FILE_SEPARATOR + archiveIndex.getArchivePeriodEnd()); + StringBuilder creatorsList = new StringBuilder(); + List> creatorListElements = archiveIndex.getArchiveCreatorList() + .getCreatorNameAndCreationPeriodStartAndCreationPeriodEnd(); + for (int i = 0; i < creatorListElements.size(); i++) { + JAXBElement element = creatorListElements.get(i); + if (element.getName().getLocalPart().equals(SIARDDKConstants.CREATOR_NAME)) { + creatorsList.append(element.getValue()); + + boolean isLastCreatorName = true; + for (int j = i + 1; j < creatorListElements.size(); j++) { + if (creatorListElements.get(j).getName().getLocalPart().equals(SIARDDKConstants.CREATOR_NAME)) { + isLastCreatorName = false; + break; + } + } + + if (!isLastCreatorName) { + creatorsList.append("; "); + } + } + } + + databaseStructure.setDataOwner(creatorsList.toString()); + databaseStructure.setDescription(archiveIndex.getSystemPurpose()); + } + + protected List getSchemas(SiardDiark siardArchive) throws ModuleException { + SchemaStructure schemaImportAs = new SchemaStructure(); + schemaImportAs.setName(getImportAsSchemaName()); + schemaImportAs.setTables(getTables(siardArchive)); + schemaImportAs.setViews(getViews(siardArchive)); + List list = new LinkedList(); + list.add(schemaImportAs); + + return list; + + } + + protected List getViews(SiardDiark siardArchive) { + List lstViewsDptkl = new LinkedList(); + if (siardArchive.getViews() != null && siardArchive.getViews().getView() != null) { + for (ViewType viewXml : siardArchive.getViews().getView()) { + ViewStructure viewDptkl = new ViewStructure(); + if (StringUtils.isNotBlank(viewXml.getDescription())) { + viewDptkl.setDescription(viewXml.getDescription()); + } + viewDptkl.setName(viewXml.getName()); + viewDptkl.setQueryOriginal(viewXml.getQueryOriginal()); + // NOTICE: As siard-dk only support defining the query original + // attribute - + // we'll use it for both the query and the query original field in the + // internal representation of the view. + viewDptkl.setQuery(viewXml.getQueryOriginal()); + lstViewsDptkl.add(viewDptkl); + } + } + return lstViewsDptkl; + } + + protected List getTables(SiardDiark siardArchive) throws ModuleException { + List lstTblsDptkl = new LinkedList(); + if (siardArchive.getTables() != null && siardArchive.getTables().getTable() != null) { + boolean needsVirtualTable = false; + for (TableType tblXml : siardArchive.getTables().getTable()) { + TableStructure tblDptkl = new TableStructure(); + tblDptkl.setIndex(currentTableIndex++); + tblDptkl.setSchema(getImportAsSchemaName()); + tblDptkl.setName(tblXml.getName()); + tblDptkl.setId(String.format("%s.%s", tblDptkl.getSchema(), tblDptkl.getName())); + tblDptkl.setDescription(tblXml.getDescription()); + tblDptkl.setPrimaryKey(getPrimaryKey(tblXml.getPrimaryKey())); + tblDptkl.setForeignKeys(getForeignKeys(tblXml.getForeignKeys(), tblDptkl.getId())); + tblDptkl.setRows(getNumberOfTblRows(tblXml.getRows(), tblXml.getName())); + tblDptkl.setColumns(getTblColumns(tblXml.getColumns(), tblDptkl.getId())); + List virtualForeignKeys = getVirtualForeignKeys(tblXml.getColumns(), tblDptkl.getId()); + if (!virtualForeignKeys.isEmpty()) { + tblDptkl.getForeignKeys().addAll(virtualForeignKeys); + needsVirtualTable = true; + } + pathStrategy.associateTableWithFolder(tblDptkl.getId(), tblXml.getFolder()); + lstTblsDptkl.add(tblDptkl); + } + if (needsVirtualTable) { + lstTblsDptkl.add(createVirtualTable()); + } + TableStructure contextDocumentationTable = createContextDocumentationTable(); + if (contextDocumentationTable != null) { + lstTblsDptkl.add(contextDocumentationTable); + } + } + return lstTblsDptkl; + } + + private TableStructure createContextDocumentationTable() throws ModuleException { + try { + ContextDocumentationIndex contextDocumentationIndex = loadContextDocumentationTableMetadata(); + if (contextDocumentationIndex == null) { + return null; + } else { + VirtualTableStructure virtualTable = new VirtualTableStructure(); + virtualTable.setIndex(currentTableIndex++); + virtualTable.setSchema(getImportAsSchemaName()); + virtualTable.setId( + String.format("%s.%s", virtualTable.getSchema(), SIARDDKConstants.CONTEXT_DOCUMENTATION_VIRTUAL_TABLE_NAME)); + virtualTable.setName(SIARDDKConstants.CONTEXT_DOCUMENTATION_VIRTUAL_TABLE_NAME); + virtualTable.setDescription(SIARDDKConstants.CONTEXT_DOCUMENTATION_VIRTUAL_TABLE_DESCRIPTION); + virtualTable.setRows(contextDocumentationIndex.getDocument().size()); + virtualTable.setColumns(createContextDocumentsTableColumns()); + virtualTable.setPrimaryKey(createVirtualPrimaryKey( + SIARDDKConstants.CONTEXT_DOCUMENTATION_VIRTUAL_TABLE_PRIMARY_KEY_NAME, SIARDDKConstants.DID)); + return virtualTable; + } + } catch (FileNotFoundException e) { + throw new ModuleException().withMessage( + "Error reading metadata XSD file: " + pathStrategy.getXsdFilePath(SIARDDKConstants.CONTEXT_DOCUMENTATION_INDEX)) + .withCause(e); + } + } + + private ContextDocumentationIndex loadContextDocumentationTableMetadata() + throws ModuleException, FileNotFoundException { + JAXBContext context; + try { + context = JAXBContext.newInstance(ContextDocumentationIndex.class.getPackage().getName()); + } catch (JAXBException e) { + throw new ModuleException().withMessage("Error loading JAXBContext").withCause(e); + } + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema xsdSchema = null; + InputStream xsdInputStream = new FileInputStream( + pathStrategy.getMainFolder().getPath().toString() + SIARDDKConstants.RESOURCE_FILE_SEPARATOR + + pathStrategy.getXsdFilePath(SIARDDKConstants.CONTEXT_DOCUMENTATION_INDEX)); + try { + xsdSchema = schemaFactory.newSchema(new StreamSource(xsdInputStream)); + } catch (SAXException e) { + throw new ModuleException().withMessage( + "Error reading metadata XSD file: " + pathStrategy.getXsdFilePath(SIARDDKConstants.CONTEXT_DOCUMENTATION_INDEX)) + .withCause(e); + } + InputStream inputStreamXml = null; + Unmarshaller unmarshaller; + try { + unmarshaller = context.createUnmarshaller(); + unmarshaller.setSchema(xsdSchema); + String contextDocumentationIndexFilePath = pathStrategy.getMainFolder().getPath().toString() + + SIARDDKConstants.RESOURCE_FILE_SEPARATOR + + pathStrategy.getXmlFilePath(SIARDDKConstants.CONTEXT_DOCUMENTATION_INDEX); + if (Paths.get(contextDocumentationIndexFilePath).toFile().exists()) { + inputStreamXml = new FileInputStream(contextDocumentationIndexFilePath); + ContextDocumentationIndex jaxbElement = (ContextDocumentationIndex) unmarshaller.unmarshal(inputStreamXml); + return jaxbElement; + } else { + return null; + } + } catch (JAXBException e) { + throw new ModuleException().withMessage("Error while Unmarshalling JAXB").withCause(e); + } finally { + try { + xsdInputStream.close(); + if (inputStreamXml != null) { + inputStreamXml.close(); + xsdInputStream.close(); + } + } catch (IOException e) { + logger.debug("Could not close xsdStream", e); + } + } + } + + private DocIndexType loadVirtualTableMetadata() throws ModuleException, FileNotFoundException { + JAXBContext context; + try { + context = JAXBContext.newInstance(DocIndexType.class.getPackage().getName()); + } catch (JAXBException e) { + throw new ModuleException().withMessage("Error loading JAXBContext").withCause(e); + } + + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema xsdSchema = null; + InputStream xsdInputStream = new FileInputStream(pathStrategy.getMainFolder().getPath().toString() + + SIARDDKConstants.RESOURCE_FILE_SEPARATOR + pathStrategy.getXsdFilePath(SIARDDKConstants.DOC_INDEX)); + + try { + xsdSchema = schemaFactory.newSchema(new StreamSource(xsdInputStream)); + } catch (SAXException e) { + throw new ModuleException() + .withMessage("Error reading metadata XSD file: " + pathStrategy.getXsdFilePath(SIARDDKConstants.DOC_INDEX)) + .withCause(e); + } + InputStream inputStreamXml = null; + Unmarshaller unmarshaller; + try { + unmarshaller = context.createUnmarshaller(); + unmarshaller.setSchema(xsdSchema); + inputStreamXml = new FileInputStream(pathStrategy.getMainFolder().getPath().toString() + + SIARDDKConstants.RESOURCE_FILE_SEPARATOR + pathStrategy.getXmlFilePath(SIARDDKConstants.DOC_INDEX)); + Object result = unmarshaller.unmarshal(inputStreamXml); + DocIndexType docIndex; + if (result instanceof JAXBElement) { + docIndex = ((JAXBElement) result).getValue(); + } else if (result instanceof DocIndexType) { + docIndex = (DocIndexType) result; + } else { + throw new IllegalArgumentException("Unexpected object type: " + result.getClass().getName()); + } + return docIndex; + } catch (JAXBException e) { + throw new ModuleException().withMessage("Error while Unmarshalling JAXB").withCause(e); + } finally { + try { + xsdInputStream.close(); + if (inputStreamXml != null) { + inputStreamXml.close(); + xsdInputStream.close(); + } + } catch (IOException e) { + logger.debug("Could not close xsdStream", e); + } + } + } + + private List getVirtualForeignKeys(ColumnsType columns, String tableId) { + List virtualForeignKeys = new ArrayList<>(); + for (ColumnType column : columns.getColumn()) { + if (column.getFunctionalDescription() != null + && column.getFunctionalDescription().contains(FunctionalDescriptionType.DOKUMENTIDENTIFIKATION)) { + VirtualForeignKey virtualForeignKey = new VirtualForeignKey(); + virtualForeignKey.setReferencedSchema(getImportAsSchemaName()); + virtualForeignKey.setName(SIARDDKConstants.VIRTUAL_TABLE_FOREIGN_KEY_NAME); + virtualForeignKey.setReferencedTable(SIARDDKConstants.VIRTUAL_TABLE_NAME); + Reference reference = new Reference(column.getName(), SIARDDKConstants.DID); + List referenceList = new ArrayList<>(); + referenceList.add(reference); + virtualForeignKey.setReferences(referenceList); + virtualForeignKey.setId(String.format("%s.%s", tableId, SIARDDKConstants.VIRTUAL_TABLE_FOREIGN_KEY_NAME)); + virtualForeignKeys.add(virtualForeignKey); + } + } + + return virtualForeignKeys; + } + + private TableStructure createVirtualTable() throws ModuleException { + try { + DocIndexType docIndexType = loadVirtualTableMetadata(); + VirtualTableStructure virtualTable = new VirtualTableStructure(); + virtualTable.setIndex(currentTableIndex++); + virtualTable.setSchema(getImportAsSchemaName()); + virtualTable.setId(String.format("%s.%s", virtualTable.getSchema(), SIARDDKConstants.VIRTUAL_TABLE_NAME)); + virtualTable.setName(SIARDDKConstants.VIRTUAL_TABLE_NAME); + virtualTable.setDescription(SIARDDKConstants.VIRTUAL_TABLE_DESCRIPTION); + virtualTable.setRows(docIndexType.getDoc().size()); + virtualTable.setColumns(createVirtualTableColumns()); + virtualTable + .setPrimaryKey(createVirtualPrimaryKey(SIARDDKConstants.VIRTUAL_TABLE_PRIMARY_KEY_NAME, SIARDDKConstants.DID)); + return virtualTable; + } catch (FileNotFoundException e) { + throw new ModuleException() + .withMessage("Error reading metadata XSD file: " + pathStrategy.getXsdFilePath(SIARDDKConstants.DOC_INDEX)) + .withCause(e); + } + } + + private List createContextDocumentsTableColumns() { + List columnStructureList = new ArrayList<>(); + Type typeInt = sqlStandardDatatypeImporter.getCheckedType("", "", + "", "", "INTEGER", "INTEGER"); + Type typeChar = sqlStandardDatatypeImporter.getCheckedType("", "", + "", "", "CHARACTER(255)", "CHARACTER(255)"); + Type typeBlob = sqlStandardDatatypeImporter.getCheckedType("", "", + "", "", Constants.BINARY_LARGE_OBJECT, + Constants.BINARY_LARGE_OBJECT); + VirtualColumnStructure columnID = new VirtualColumnStructure(SIARDDKConstants.DOCUMENT_ID, + SIARDDKConstants.DOCUMENT_ID, typeInt, true, SIARDDKConstants.DOCUMENT_IDENTIFIER, "1", true); + VirtualColumnStructure columnTitle = new VirtualColumnStructure(SIARDDKConstants.DOCUMENT_TITLE, + SIARDDKConstants.DOCUMENT_TITLE, typeChar, true, SIARDDKConstants.DOCUMENT_TITLE_DESCRIPTION, "", true); + VirtualColumnStructure columnDate = new VirtualColumnStructure(SIARDDKConstants.DOCUMENT_DATE, + SIARDDKConstants.DOCUMENT_DATE, typeChar, true, SIARDDKConstants.DOCUMENT_DATE_DESCRIPTION, "", true); + VirtualColumnStructure columnLOB = new VirtualColumnStructure(Constants.BLOB, Constants.BLOB_COLUMN_NAME, typeBlob, + true, "", "1", true); + columnStructureList.add(columnID); + columnStructureList.add(columnTitle); + columnStructureList.add(columnDate); + columnStructureList.add(columnLOB); + return columnStructureList; + } + + private List createVirtualTableColumns() { + List columnStructureList = new ArrayList<>(); + Type typeInt = sqlStandardDatatypeImporter.getCheckedType("", "", + "", "", "INTEGER", "INTEGER"); + VirtualColumnStructure columnID = new VirtualColumnStructure(SIARDDKConstants.DID, SIARDDKConstants.DID, typeInt, + true, SIARDDKConstants.DOCUMENT_IDENTIFIER, "1", true); + Type type = sqlStandardDatatypeImporter.getCheckedType("", "", + "", "", Constants.BINARY_LARGE_OBJECT, + Constants.BINARY_LARGE_OBJECT); + VirtualColumnStructure columnPID = new VirtualColumnStructure(SIARDDKConstants.PID, SIARDDKConstants.PID, typeInt, + true, SIARDDKConstants.PARENT_IDENTIFIER, "1", false); + VirtualColumnStructure columnLOB = new VirtualColumnStructure(Constants.BLOB, Constants.BLOB_COLUMN_NAME, type, + true, "", "1", true); + columnStructureList.add(columnID); + columnStructureList.add(columnPID); + columnStructureList.add(columnLOB); + return columnStructureList; + } + + private PrimaryKey createVirtualPrimaryKey(String name, String columnName) { + List columnList = new ArrayList<>(); + columnList.add(columnName); + return new VirtualPrimaryKey(name, columnList, SIARDDKConstants.VIRTUAL_TABLE_PRIMARY_KEY_DESCRIPTION); + } + + protected List getTblColumns(ColumnsType columnsXml, String tableId) throws ModuleException { + List lstColumnsDptkl = new LinkedList(); + if (columnsXml != null && columnsXml.getColumn() != null) { + for (ColumnType columnXml : columnsXml.getColumn()) { + ColumnStructure columnDptkl = new ColumnStructure(); + columnDptkl.setName(columnXml.getName()); + columnDptkl.setId(String.format("%s.%s", tableId, columnDptkl.getName())); + String typeOriginal = StringUtils.isNotBlank(columnXml.getTypeOriginal()) ? columnXml.getTypeOriginal() : null; + columnDptkl.setType(sqlStandardDatatypeImporter.getCheckedType("", + "", "", "", columnXml.getType(), + typeOriginal, columnDptkl.getCardinality())); + columnDptkl.setDescription(columnXml.getDescription()); + String defaultValue = StringUtils.isNotBlank(columnXml.getDefaultValue()) ? columnXml.getDefaultValue() : null; + columnDptkl.setDefaultValue(defaultValue); + columnDptkl.setNillable(columnXml.isNullable()); + lstColumnsDptkl.add(columnDptkl); + } + } + return lstColumnsDptkl; + } + + protected Type getType(String type) { + + return null; + } + + protected long getNumberOfTblRows(BigInteger numRows, String tableName) throws ModuleException { + try { + return numRows.longValue(); + } catch (ArithmeticException e) { + throw new ModuleException().withMessage("Unable to import table [" + tableName + "], as the number of rows [" + + numRows + + "] exceeds the max value of the long datatype used to store the number.(Consult the vendor/a programmer for a fix of this problem, if needed)") + .withCause(e); + } + } + + protected PrimaryKey getPrimaryKey(PrimaryKeyType primaryKeyXml) { + PrimaryKey keyDptkl = new PrimaryKey(); + keyDptkl.setName(primaryKeyXml.getName()); + keyDptkl.setColumnNames(primaryKeyXml.getColumn()); + return keyDptkl; + } + + protected List getForeignKeys(ForeignKeysType foreignKeysXml, String tableId) { + List lstForeignKeyDptkl = new LinkedList(); + if (foreignKeysXml != null) { + for (ForeignKeyType foreignKeyXml : foreignKeysXml.getForeignKey()) { + ForeignKey foreignKeyDptkl = new ForeignKey(); + foreignKeyDptkl.setReferencedSchema(getImportAsSchemaName()); + foreignKeyDptkl.setName(foreignKeyXml.getName()); + foreignKeyDptkl.setReferencedTable(foreignKeyXml.getReferencedTable()); + foreignKeyDptkl.setReferences(getReferences(foreignKeyXml.getReference())); + foreignKeyDptkl.setId(String.format("%s.%s", tableId, foreignKeyDptkl.getName())); + lstForeignKeyDptkl.add(foreignKeyDptkl); + } + } + return lstForeignKeyDptkl; + } + + protected List getReferences(List referencesXml) { + List refsDptkld = new LinkedList(); + if (referencesXml != null) { + for (ReferenceType referenceTypeXml : referencesXml) { + Reference refDptkld = new Reference(); + refDptkld.setColumn(referenceTypeXml.getColumn()); + refDptkld.setReferenced(referenceTypeXml.getReferenced()); + refsDptkld.add(refDptkld); + } + } + return refsDptkld; + } + + public String getImportAsSchemaName() { + return importAsSchemaName; + } +} \ No newline at end of file diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/ResourceFileIndexInputStreamStrategy.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/ResourceFileIndexInputStreamStrategy.java index 283f7f2c4..7545c0211 100644 --- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/ResourceFileIndexInputStreamStrategy.java +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/ResourceFileIndexInputStreamStrategy.java @@ -32,6 +32,8 @@ public InputStream getInputStream(SIARDDKPathImportStrategy strategy) thro return this.getClass().getClassLoader().getResourceAsStream("schema/1007/fileIndex.xsd"); } else if (strategy instanceof SIARDDK128PathImportStrategy) { return this.getClass().getClassLoader().getResourceAsStream("schema/128/fileIndex.xsd"); + } else if (strategy instanceof SIARDDK128ExtPathImportStrategy) { + return this.getClass().getClassLoader().getResourceAsStream("schema/128-ext/fileIndex.xsd"); } else { throw new ModuleException(); } diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/SIARDDK128ExtPathImportStrategy.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/SIARDDK128ExtPathImportStrategy.java new file mode 100644 index 000000000..7eab0d96b --- /dev/null +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/in/path/SIARDDK128ExtPathImportStrategy.java @@ -0,0 +1,49 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE file at the root of the source + * tree and available online at + * + * https://github.com/keeps/db-preservation-toolkit + */ +package com.databasepreservation.modules.siard.in.path; + +import java.util.List; + +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.FileIndexType; +import com.databasepreservation.modules.siard.bindings.siard_dk_128_ext.FileIndexType.F; +import com.databasepreservation.modules.siard.common.SIARDArchiveContainer; +import com.databasepreservation.modules.siard.common.path.MetadataPathStrategy; +import com.databasepreservation.modules.siard.in.read.ReadStrategy; + +/** + * @author Alexandre Flores + */ +public class SIARDDK128ExtPathImportStrategy extends SIARDDKPathImportStrategy { + + public SIARDDK128ExtPathImportStrategy(SIARDArchiveContainer mainFolder, ReadStrategy readStrategy, + MetadataPathStrategy metadataPathStrategy, String importAsSchema, + FileIndexXsdInputStreamStrategy fileIndexXsdInputStreamStrategy) { + super(mainFolder, readStrategy, metadataPathStrategy, importAsSchema, fileIndexXsdInputStreamStrategy, + FileIndexType.class); + } + + @Override + byte[] getMd5(F fileInfo) { + return fileInfo.getMd5(); + } + + @Override + List getF(FileIndexType fileIndex) { + return fileIndex.getF(); + } + + @Override + String getFoN(F fileInfo) { + return fileInfo.getFoN(); + } + + @Override + String getFiN(F fileInfo) { + return fileInfo.getFiN(); + } +} diff --git a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/update/SIARDDKEditModule.java b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/update/SIARDDKEditModule.java index c2e46b50c..32c3cc968 100644 --- a/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/update/SIARDDKEditModule.java +++ b/dbptk-modules/dbptk-module-siard/src/main/java/com/databasepreservation/modules/siard/update/SIARDDKEditModule.java @@ -12,7 +12,6 @@ import java.nio.file.Paths; import java.util.List; -import com.databasepreservation.modules.siard.constants.SIARDDKConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,12 +27,13 @@ import com.databasepreservation.modules.siard.common.path.SIARDDK1007MetadataPathStrategy; import com.databasepreservation.modules.siard.common.path.SIARDDK128MetadataPathStrategy; import com.databasepreservation.modules.siard.constants.SIARDConstants; +import com.databasepreservation.modules.siard.constants.SIARDDKConstants; import com.databasepreservation.modules.siard.in.metadata.MetadataImportStrategy; import com.databasepreservation.modules.siard.in.metadata.SIARDDK1007MetadataImportStrategy; -import com.databasepreservation.modules.siard.in.metadata.SIARDDK128MetadataImportStrategy; +import com.databasepreservation.modules.siard.in.metadata.SIARDDK128ExtMetadataImportStrategy; import com.databasepreservation.modules.siard.in.path.ResourceFileIndexInputStreamStrategy; import com.databasepreservation.modules.siard.in.path.SIARDDK1007PathImportStrategy; -import com.databasepreservation.modules.siard.in.path.SIARDDK128PathImportStrategy; +import com.databasepreservation.modules.siard.in.path.SIARDDK128ExtPathImportStrategy; import com.databasepreservation.modules.siard.in.read.FolderReadStrategyMD5Sum; import com.databasepreservation.modules.siard.in.read.ReadStrategy; import com.databasepreservation.utils.ModuleConfigurationUtils; @@ -63,15 +63,16 @@ public SIARDDKEditModule(Path siardPackagePath) { String paramImportAsSchema = "public"; if (Files.exists(Paths.get(siardPackagePath + SIARDDKConstants.SIARDDK_128_RESEARCH_INDEX_PATH))) { - mainContainer = new SIARDArchiveContainer(SIARDConstants.SiardVersion.DK_128, siardPackageNormalizedPath, + mainContainer = new SIARDArchiveContainer(SIARDConstants.SiardVersion.DK_128_EXT, siardPackageNormalizedPath, SIARDArchiveContainer.OutputContainerType.MAIN); readStrategy = new FolderReadStrategyMD5Sum(mainContainer); MetadataPathStrategy metadataPathStrategy = new SIARDDK128MetadataPathStrategy(); - SIARDDK128PathImportStrategy pathStrategy = new SIARDDK128PathImportStrategy(mainContainer, readStrategy, + SIARDDK128ExtPathImportStrategy pathStrategy = new SIARDDK128ExtPathImportStrategy(mainContainer, readStrategy, metadataPathStrategy, paramImportAsSchema, new ResourceFileIndexInputStreamStrategy()); - metadataImportStrategy = new SIARDDK128MetadataImportStrategy(pathStrategy, paramImportAsSchema); + metadataImportStrategy = new SIARDDK128ExtMetadataImportStrategy(pathStrategy, paramImportAsSchema); + } else { mainContainer = new SIARDArchiveContainer(SIARDConstants.SiardVersion.DK_1007, siardPackageNormalizedPath, SIARDArchiveContainer.OutputContainerType.MAIN); @@ -91,9 +92,9 @@ public SIARDDKEditModule(Path siardPackagePath) { * * @return A DatabaseStructure * @throws NullPointerException - * If the SIARD archive version were not 2.0 or 2.1 + * If the SIARD archive version were not 2.0 or 2.1 * @throws ModuleException - * Generic module exception + * Generic module exception */ @Override public DatabaseStructure getMetadata() throws ModuleException { @@ -107,7 +108,8 @@ public DatabaseStructure getMetadata() throws ModuleException { dbStructure = metadataImportStrategy.getDatabaseStructure(); } catch (NullPointerException e) { - throw new ModuleException().withMessage("Metadata editing only supports SIARD version 1, 2.0 and 2.1").withCause(e); + throw new ModuleException().withMessage("Metadata editing only supports SIARD version 1, 2.0 and 2.1") + .withCause(e); } finally { readStrategy.finish(mainContainer); } @@ -120,20 +122,20 @@ public String getSIARDVersion() { } /** - * @param dbStructure The {@link DatabaseStructure} with the updated values. + * @param dbStructure + * The {@link DatabaseStructure} with the updated values. * @throws ModuleException - * Generic module exception + * Generic module exception */ @Override public void updateMetadata(DatabaseStructure dbStructure) throws ModuleException { throw new ModuleException().withMessage("Metadata editing is not supported for SIARD version DK"); } - /** * @return A list of SIARDDatabaseMetadata * @throws ModuleException - * Generic module exception + * Generic module exception */ @Override public List getDescriptiveSIARDMetadataKeys() throws ModuleException { diff --git a/pom.xml b/pom.xml index 41d54465f..b48b85d36 100644 --- a/pom.xml +++ b/pom.xml @@ -334,6 +334,12 @@ ${dbptk.bindings.version} + + com.databasepreservation + dbptk-bindings-siarddk-128-ext + ${dbptk.bindings.version} + + org.reflections