AAT naar MySQL8

AAT naar MySQL8

AAT naar MySQL 8

Data verwerken met EasyMorph (XML naar CSV)

EasyMorph

Database tabel in MySQL-8

CREATE TABLE aat (
`id` INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
`_xslstylesheet` text,
`version` text,
`xmlnsxsl` text,
`xmlnsxsi` text,
`excluderesultprefixes` text,
`V` text,
`V_xsinoNamespaceSchemaLocation` text,
`V_Title` text,
`V_Date` text,
`V_S` text,
`V_S_SID` text,
`V_S_ParentRels` text,
`V_S_ParentRels_PrefParent` text,
`V_S_ParentRels_PrefParent_ParentSID` text,
`V_S_ParentRels_PrefParent_RelType` text,
`V_S_ParentRels_PrefParent_HistoricFlag` text,
`V_S_ParentRels_PrefParent_ParentString` text,
`V_S_ParentRels_PrefParent_HierRelType` text,
`V_S_DescrNotes` text,
`V_S_DescrNotes_DescrNote` text,
`V_S_DescrNotes_DescrNote_NoteText` text,
`V_S_DescrNotes_DescrNote_NoteLang` text,
`V_S_DescrNotes_DescrNote_NoteContribs` text,
`V_S_DescrNotes_DescrNote_NoteContribs_NoteContr` text,
`V_S_DescrNotes_DescrNote_NoteContribs_NoteContr_Contrid` text,
`V_S_DescrNotes_DescrNote_NoteSrcs` text,
`V_S_DescrNotes_DescrNote_NoteSrcs_NoteSrc` text,
`V_S_DescrNotes_DescrNote_NoteSrcs_NoteSrc_Src` text,
`V_S_DescrNotes_DescrNote_NoteSrcs_NoteSrc_Src_SrcID` text,
`V_S_RecordType` text,
`V_S_MergedStatus` text,
`V_S_Hierarchy` text,
`V_S_SortOrder` text,
`V_S_Terms` text,
`V_S_Terms_PrefTerm` text,
`V_S_Terms_PrefTerm_TermText` text,
`V_S_Terms_PrefTerm_DisplayName` text,
`V_S_Terms_PrefTerm_HistoricFlag` text,
`V_S_Terms_PrefTerm_Vernacular` text,
`V_S_Terms_PrefTerm_TermID` text,
`V_S_Terms_PrefTerm_TermLangs` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_Lang` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_Pref` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_Qualifier` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_TermType` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_PartofSpeech` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_LangStat` text,
`V_S_Terms_PrefTerm_TermContribs` text,
`V_S_Terms_PrefTerm_TermContribs_TermContr` text,
`V_S_Terms_PrefTerm_TermContribs_TermContr_Contrid` text,
`V_S_Terms_PrefTerm_TermContribs_TermContr_Pref` text,
`V_S_Terms_NonPrefTerm` text,
`V_S_Terms_NonPrefTerm_TermText` text,
`V_S_Terms_NonPrefTerm_DisplayName` text,
`V_S_Terms_NonPrefTerm_HistoricFlag` text,
`V_S_Terms_NonPrefTerm_Vernacular` text,
`V_S_Terms_NonPrefTerm_TermID` text,
`V_S_Terms_NonPrefTerm_TermLangs` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_Lang` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_Pref` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_Qualifier` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_TermType` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_PartofSpeech` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_LangStat` text,
`V_S_Terms_NonPrefTerm_TermContribs` text,
`V_S_Terms_NonPrefTerm_TermContribs_TermContr` text,
`V_S_Terms_NonPrefTerm_TermContribs_TermContr_Contrid` text,
`V_S_Terms_NonPrefTerm_TermContribs_TermContr_Pref` text,
`V_S_Terms_NonPrefTerm_TermSrcs` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Src` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Src_SrcID` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Page` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Pref` text,
`V_S_SContribs` text,
`V_S_SContribs_SContr` text,
`V_S_SContribs_SContr_Contrid` text,
`V_S_Terms_PrefTerm_TermSrcs` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Src` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Src_SrcID` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Page` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Pref` text,
`V_S_AssocRels` text,
`V_S_AssocRels_AssocRelationship` text,
`V_S_AssocRels_AssocRel_RelType` text,
`V_S_AssocRels_AssocRel_RelatedSID` text,
`V_S_AssocRels_AssocRel_RelatedSID_VPSID` text,
`V_S_AssocRels_AssocRel_HistoricFlag` text,
`V_S_SSrcs` text,
`V_S_SSrcs_SSrc` text,
`V_S_SSrcs_SSrc_Src` text,
`V_S_SSrcs_SSrc_Src_SrcID` text,
`V_S_ParentRels_NonPrefParent` text,
`V_S_ParentRels_NonPrefParent_ParentSID` text,
`V_S_ParentRels_NonPrefParent_RelType` text,
`V_S_ParentRels_NonPrefParent_HistoricFlag` text,
`V_S_ParentRels_NonPrefParent_ParentString` text,
`V_S_ParentRels_NonPrefParent_HierRelType` text,
`V_S_AssocRels_AssocRel_ARDate` text,
`V_S_AssocRels_AssocRel_ARDate_DisplayDate` text,
`V_S_AssocRels_AssocRel_ARDate_StartDate` text,
`V_S_AssocRels_AssocRel_ARDate_EndDate` text,
`V_S_ParentRels_PrefParent_ParentDate` text,
`V_S_ParentRels_PrefParent_ParentDate_DisplayDate` text,
`V_S_ParentRels_PrefParent_ParentDate_StartDate` text,
`V_S_ParentRels_PrefParent_ParentDate_EndDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate_DisplayDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate_StartDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate_EndDate` text
);

Inladen CSV in MySQL-8

LOAD DATA INFILE 'easymorphed_csv.csv' 
INTO TABLE aat
FIELDS TERMINATED BY ',' 
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

Data poking in MySQL-8

--concept ID
INSERT INTO concepts (concept_id)
SELECT DISTINCT `V_S_SID` AS concept_id FROM aat WHERE `V_S_SID` != '' GROUP BY `V_S_SID`

--unique terms
INSERT INTO terms (term)
SELECT DISTINCT `V_S_Terms_PrefTerm_TermText` AS term FROM aatmorphed.aat
WHERE `V_S_Terms_PrefTerm_TermText` != ""
UNION
SELECT DISTINCT `V_S_Terms_NonPrefTerm_TermText` AS term FROM aatmorphed.aat
WHERE `V_S_Terms_NonPrefTerm_TermText` != ""
ORDER BY term ASC

--concept has terms
INSERT INTO concept_has_terms (term_id, concept_id)
SELECT `V_S_SID` AS concept_id, `V_S_Terms_PrefTerm_TermID` AS term_id FROM aat
INNER JOIN concepts
ON concepts.concept_id = `V_S_SID`
WHERE aat.`V_S_Terms_PrefTerm_TermID` != ""

INSERT INTO concept_has_terms (term_id, concept_id)
SELECT `V_S_SID` AS concept_id, `V_S_Terms_NonPrefTerm_TermID` AS term_id FROM aat
INNER JOIN concepts
ON concepts.concept_id = `V_S_SID`
WHERE aat.`V_S_Terms_NonPrefTerm_TermID` != ""

--only dutch pos
INSERT INTO concept_postags (id,postag)
SELECT `V_S_SID` AS concept_id, `V_S_Terms_PrefTerm_TermLangs_TermLang_PartofSpeech` AS postag FROM aat
WHERE `V_S_Terms_PrefTerm_TermLangs_TermLang_PartofSpeech` != ''
AND `V_S_Terms_PrefTerm_TermLangs_TermLang_Lang` = '70261/Dutch'
UNION
SELECT `V_S_SID` AS concept_id, `V_S_Terms_NonPrefTerm_TermLangs_TermLang_PartofSpeech` AS postag FROM aat
WHERE `V_S_Terms_NonPrefTerm_TermLangs_TermLang_PartofSpeech` != ''
AND `V_S_Terms_NonPrefTerm_TermLangs_TermLang_Lang` = '70261/Dutch'

--only dutch notes/definitions
INSERT INTO concept_notes (concept_id,note)
SELECT `V_S_SID` AS concept_id, `V_S_DescrNotes_DescrNote_NoteText` AS note FROM aat WHERE `V_S_DescrNotes_DescrNote_NoteText` != ''
AND `V_S_DescrNotes_DescrNote_NoteLang` = 'Dutch'

--concept to related concept
INSERT INTO concept_relations (concept_id,relation_id)
SELECT `V_S_SID` AS concept_id, `V_S_AssocRels_AssocRel_RelatedSID_VPSID` AS relation_id FROM aat
WHERE `V_S_SID` != ''
AND `V_S_AssocRels_AssocRel_RelatedSID_VPSID` != ''

--from child to parent
INSERT INTO concept_has_concept_parents (concept_id,parent_id)
SELECT `V_S_SID` AS concept_id, `V_S_ParentRels_PrefParent_ParentSID` AS parent_id FROM aat
WHERE `V_S_SID` != ''
AND `V_S_ParentRels_PrefParent_ParentSID` != ''

--recursive tree (for finding all the relevant concepts underneath)
--https://github.com/mdlincoln/getty_vocab/blob/master/process/full_aat.rb
WITH RECURSIVE runner (concept_id, parent_id) AS (
    SELECT concept_id, parent_id FROM concept_has_parent_concepts WHERE concept_id = 300264091
    UNION ALL
    SELECT c.concept_id, c.parent_id FROM runner AS r JOIN concept_has_parent_concepts AS c ON r.concept_id = c.parent_id
)
SELECT concept_id, parent_id FROM runner ORDER BY concept_id;

--with updating
--300264091 and 300264092 is 7 (Materials Facet and Objects Facet is materials)
--300264090 is 4 (Activities Facet is techniques)
--300054134 is 1 (Activities Facet>Disciplines (hierarchy name), is disciplines)
--300264088 and 300264087 is 9 (Styles and Periods Facet and Physical Attributes Facet is styles)
--300024978 is 2 (Agents Facet>People (hierarchy name) is functions)
--300234770 is 3 (Agents Facet>Organisations (hierarchy name) is companies)
--300265673 is 7 (Agents Facet>Living Organisms (hierarchy name) is materials)
UPDATE concepts
SET expression_id = 7
WHERE concept_id IN (
    (
        WITH RECURSIVE runner (concept_id, parent_id) AS (
            SELECT concept_id, parent_id FROM concept_has_parent_concepts WHERE concept_id = 300264091
            UNION ALL
            SELECT c.concept_id, c.parent_id FROM runner AS r JOIN concept_has_parent_concepts AS c ON r.concept_id = c.parent_id
        )
        SELECT concept_id FROM runner ORDER BY concept_id;
    )
);

Author: jeroen

Just another HTMLy user.