cmd /c ">NUL (@for %I in (*) do @set /a ext[%~xI] += 1) & set ext["
Archive: 2023
ImageMagick: Afbeeldingen in bulk croppen
xcopy folder folder_backup
for %s in (*.jpg) do convert -gravity Center -crop 800x800+0+0 +repage %s %~ns_cropped.jpg
AAT naar MySQL8
Data verwerken met EasyMorph (XML naar CSV)
Database tabel in MySQL-8
CREATE TABLE aat (
`id` INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
`_xslstylesheet` text,
`version` text,
`xmlnsxsl` text,
`xmlnsxsi` text,
`excluderesultprefixes` text,
`V` text,
`V_xsinoNamespaceSchemaLocation` text,
`V_Title` text,
`V_Date` text,
`V_S` text,
`V_S_SID` text,
`V_S_ParentRels` text,
`V_S_ParentRels_PrefParent` text,
`V_S_ParentRels_PrefParent_ParentSID` text,
`V_S_ParentRels_PrefParent_RelType` text,
`V_S_ParentRels_PrefParent_HistoricFlag` text,
`V_S_ParentRels_PrefParent_ParentString` text,
`V_S_ParentRels_PrefParent_HierRelType` text,
`V_S_DescrNotes` text,
`V_S_DescrNotes_DescrNote` text,
`V_S_DescrNotes_DescrNote_NoteText` text,
`V_S_DescrNotes_DescrNote_NoteLang` text,
`V_S_DescrNotes_DescrNote_NoteContribs` text,
`V_S_DescrNotes_DescrNote_NoteContribs_NoteContr` text,
`V_S_DescrNotes_DescrNote_NoteContribs_NoteContr_Contrid` text,
`V_S_DescrNotes_DescrNote_NoteSrcs` text,
`V_S_DescrNotes_DescrNote_NoteSrcs_NoteSrc` text,
`V_S_DescrNotes_DescrNote_NoteSrcs_NoteSrc_Src` text,
`V_S_DescrNotes_DescrNote_NoteSrcs_NoteSrc_Src_SrcID` text,
`V_S_RecordType` text,
`V_S_MergedStatus` text,
`V_S_Hierarchy` text,
`V_S_SortOrder` text,
`V_S_Terms` text,
`V_S_Terms_PrefTerm` text,
`V_S_Terms_PrefTerm_TermText` text,
`V_S_Terms_PrefTerm_DisplayName` text,
`V_S_Terms_PrefTerm_HistoricFlag` text,
`V_S_Terms_PrefTerm_Vernacular` text,
`V_S_Terms_PrefTerm_TermID` text,
`V_S_Terms_PrefTerm_TermLangs` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_Lang` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_Pref` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_Qualifier` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_TermType` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_PartofSpeech` text,
`V_S_Terms_PrefTerm_TermLangs_TermLang_LangStat` text,
`V_S_Terms_PrefTerm_TermContribs` text,
`V_S_Terms_PrefTerm_TermContribs_TermContr` text,
`V_S_Terms_PrefTerm_TermContribs_TermContr_Contrid` text,
`V_S_Terms_PrefTerm_TermContribs_TermContr_Pref` text,
`V_S_Terms_NonPrefTerm` text,
`V_S_Terms_NonPrefTerm_TermText` text,
`V_S_Terms_NonPrefTerm_DisplayName` text,
`V_S_Terms_NonPrefTerm_HistoricFlag` text,
`V_S_Terms_NonPrefTerm_Vernacular` text,
`V_S_Terms_NonPrefTerm_TermID` text,
`V_S_Terms_NonPrefTerm_TermLangs` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_Lang` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_Pref` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_Qualifier` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_TermType` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_PartofSpeech` text,
`V_S_Terms_NonPrefTerm_TermLangs_TermLang_LangStat` text,
`V_S_Terms_NonPrefTerm_TermContribs` text,
`V_S_Terms_NonPrefTerm_TermContribs_TermContr` text,
`V_S_Terms_NonPrefTerm_TermContribs_TermContr_Contrid` text,
`V_S_Terms_NonPrefTerm_TermContribs_TermContr_Pref` text,
`V_S_Terms_NonPrefTerm_TermSrcs` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Src` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Src_SrcID` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Page` text,
`V_S_Terms_NonPrefTerm_TermSrcs_TermSrc_Pref` text,
`V_S_SContribs` text,
`V_S_SContribs_SContr` text,
`V_S_SContribs_SContr_Contrid` text,
`V_S_Terms_PrefTerm_TermSrcs` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Src` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Src_SrcID` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Page` text,
`V_S_Terms_PrefTerm_TermSrcs_TermSrc_Pref` text,
`V_S_AssocRels` text,
`V_S_AssocRels_AssocRelationship` text,
`V_S_AssocRels_AssocRel_RelType` text,
`V_S_AssocRels_AssocRel_RelatedSID` text,
`V_S_AssocRels_AssocRel_RelatedSID_VPSID` text,
`V_S_AssocRels_AssocRel_HistoricFlag` text,
`V_S_SSrcs` text,
`V_S_SSrcs_SSrc` text,
`V_S_SSrcs_SSrc_Src` text,
`V_S_SSrcs_SSrc_Src_SrcID` text,
`V_S_ParentRels_NonPrefParent` text,
`V_S_ParentRels_NonPrefParent_ParentSID` text,
`V_S_ParentRels_NonPrefParent_RelType` text,
`V_S_ParentRels_NonPrefParent_HistoricFlag` text,
`V_S_ParentRels_NonPrefParent_ParentString` text,
`V_S_ParentRels_NonPrefParent_HierRelType` text,
`V_S_AssocRels_AssocRel_ARDate` text,
`V_S_AssocRels_AssocRel_ARDate_DisplayDate` text,
`V_S_AssocRels_AssocRel_ARDate_StartDate` text,
`V_S_AssocRels_AssocRel_ARDate_EndDate` text,
`V_S_ParentRels_PrefParent_ParentDate` text,
`V_S_ParentRels_PrefParent_ParentDate_DisplayDate` text,
`V_S_ParentRels_PrefParent_ParentDate_StartDate` text,
`V_S_ParentRels_PrefParent_ParentDate_EndDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate_DisplayDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate_StartDate` text,
`V_S_ParentRels_NonPrefParent_ParentDate_EndDate` text
);
Inladen CSV in MySQL-8
LOAD DATA INFILE 'easymorphed_csv.csv'
INTO TABLE aat
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
Data poking in MySQL-8
--concept ID
INSERT INTO concepts (concept_id)
SELECT DISTINCT `V_S_SID` AS concept_id FROM aat WHERE `V_S_SID` != '' GROUP BY `V_S_SID`
--unique terms
INSERT INTO terms (term)
SELECT DISTINCT `V_S_Terms_PrefTerm_TermText` AS term FROM aatmorphed.aat
WHERE `V_S_Terms_PrefTerm_TermText` != ""
UNION
SELECT DISTINCT `V_S_Terms_NonPrefTerm_TermText` AS term FROM aatmorphed.aat
WHERE `V_S_Terms_NonPrefTerm_TermText` != ""
ORDER BY term ASC
--concept has terms
INSERT INTO concept_has_terms (term_id, concept_id)
SELECT `V_S_SID` AS concept_id, `V_S_Terms_PrefTerm_TermID` AS term_id FROM aat
INNER JOIN concepts
ON concepts.concept_id = `V_S_SID`
WHERE aat.`V_S_Terms_PrefTerm_TermID` != ""
INSERT INTO concept_has_terms (term_id, concept_id)
SELECT `V_S_SID` AS concept_id, `V_S_Terms_NonPrefTerm_TermID` AS term_id FROM aat
INNER JOIN concepts
ON concepts.concept_id = `V_S_SID`
WHERE aat.`V_S_Terms_NonPrefTerm_TermID` != ""
--only dutch pos
INSERT INTO concept_postags (id,postag)
SELECT `V_S_SID` AS concept_id, `V_S_Terms_PrefTerm_TermLangs_TermLang_PartofSpeech` AS postag FROM aat
WHERE `V_S_Terms_PrefTerm_TermLangs_TermLang_PartofSpeech` != ''
AND `V_S_Terms_PrefTerm_TermLangs_TermLang_Lang` = '70261/Dutch'
UNION
SELECT `V_S_SID` AS concept_id, `V_S_Terms_NonPrefTerm_TermLangs_TermLang_PartofSpeech` AS postag FROM aat
WHERE `V_S_Terms_NonPrefTerm_TermLangs_TermLang_PartofSpeech` != ''
AND `V_S_Terms_NonPrefTerm_TermLangs_TermLang_Lang` = '70261/Dutch'
--only dutch notes/definitions
INSERT INTO concept_notes (concept_id,note)
SELECT `V_S_SID` AS concept_id, `V_S_DescrNotes_DescrNote_NoteText` AS note FROM aat WHERE `V_S_DescrNotes_DescrNote_NoteText` != ''
AND `V_S_DescrNotes_DescrNote_NoteLang` = 'Dutch'
--concept to related concept
INSERT INTO concept_relations (concept_id,relation_id)
SELECT `V_S_SID` AS concept_id, `V_S_AssocRels_AssocRel_RelatedSID_VPSID` AS relation_id FROM aat
WHERE `V_S_SID` != ''
AND `V_S_AssocRels_AssocRel_RelatedSID_VPSID` != ''
--from child to parent
INSERT INTO concept_has_concept_parents (concept_id,parent_id)
SELECT `V_S_SID` AS concept_id, `V_S_ParentRels_PrefParent_ParentSID` AS parent_id FROM aat
WHERE `V_S_SID` != ''
AND `V_S_ParentRels_PrefParent_ParentSID` != ''
--recursive tree (for finding all the relevant concepts underneath)
--https://github.com/mdlincoln/getty_vocab/blob/master/process/full_aat.rb
WITH RECURSIVE runner (concept_id, parent_id) AS (
SELECT concept_id, parent_id FROM concept_has_parent_concepts WHERE concept_id = 300264091
UNION ALL
SELECT c.concept_id, c.parent_id FROM runner AS r JOIN concept_has_parent_concepts AS c ON r.concept_id = c.parent_id
)
SELECT concept_id, parent_id FROM runner ORDER BY concept_id;
--with updating
--300264091 and 300264092 is 7 (Materials Facet and Objects Facet is materials)
--300264090 is 4 (Activities Facet is techniques)
--300054134 is 1 (Activities Facet>Disciplines (hierarchy name), is disciplines)
--300264088 and 300264087 is 9 (Styles and Periods Facet and Physical Attributes Facet is styles)
--300024978 is 2 (Agents Facet>People (hierarchy name) is functions)
--300234770 is 3 (Agents Facet>Organisations (hierarchy name) is companies)
--300265673 is 7 (Agents Facet>Living Organisms (hierarchy name) is materials)
UPDATE concepts
SET expression_id = 7
WHERE concept_id IN (
(
WITH RECURSIVE runner (concept_id, parent_id) AS (
SELECT concept_id, parent_id FROM concept_has_parent_concepts WHERE concept_id = 300264091
UNION ALL
SELECT c.concept_id, c.parent_id FROM runner AS r JOIN concept_has_parent_concepts AS c ON r.concept_id = c.parent_id
)
SELECT concept_id FROM runner ORDER BY concept_id;
)
);
OSM: Nederlandse adres data naar CSV
OpenStreetMap is een gratis open geografische database die bijgehouden en onderhouden wordt door een community aan vrijwilligers via een open samenwerking. Deze data wordt gedeeld door diverse partijen, zoals Geofabrik.de. Geofabrik heeft data van Nederland beschikbaar gesteld, zoals van heel Nederland, maar ook per provincie.
Data downloaden
Ga naar Netherlands region op Geofabrik en download de "Commonly Used Format", namelijk netherlands-latest.osm.pbf (circa 1.1 GB op 16-3-2023).
Converteren
Download de converteer tool genaamd Osmconvert voor het gewenste besturingssysteem.
Extraheren CSV
Voer vervolgens volgende commando uit via de CMD, om het osm.pbf bestand naar CSV om te zetten, waarbij we filteren op latitude, longitude, straat, huisnummer, stad en postcode.
osmconvert64-0.8.8p.exe netherlands-latest.osm.pbf --csv="@lat @lon addr:street addr:housenumber addr:city addr:postcode" --csv-headline -o=1.csv
Filteren met Osmfilter
Mochten we eerst de Geofabrik-data willen filteren, dan kunnen we een filter toepassen met de OSmfilter tool. Hiervoor dienen we eerst de .osm.pbf naar bijvoorbeeld .05m om te zetten.
osmconvert64-0.8.8p.exe netherlands-latest.osm.pbf -o=netherlands.o5m
Vervolgens passen we te filter toe, zoals bijvoorbeeld voorkomens met lege adres voorkomens te verwijderen:
osmfilter.exe netherlands.o5m --keep="@lat @lon addr:street addr:housenumber addr:city addr:postcode" > filter.osm
De gefilterde data kunnen we ten slotte weer extraheren en converteren naar CSV met:
osmconvert64-0.8.8p.exe filter.osm --csv="@lat @lon addr:street addr:housenumber addr:city addr:postcode" --csv-headline -o=1.csv
Filteren met Python en Panda
import pandas as pd
# Create a Dataframe from a CSV
col_names = ["@lat", "@lon", "addr:street", "addr:housenumber", "addr:city", "addr:postcode"]
df = pd.read_csv('2.csv', names=col_names, sep='\t', lineterminator='\n', header=0)
df2 = df.copy() # Create duplicate of data
df2.dropna(subset = ['addr:street'], inplace = True) # Remove streets with NaN
df2.dropna(subset = ['addr:housenumber'], inplace = True) # Remove housenumbers with NaN
df2.dropna(subset = ['addr:city'], inplace = True) # Remove cities with NaN
df2.dropna(subset = ['addr:postcode'], inplace = True) # Remove cities with NaN
print(df2) # Print updated data
df2.to_csv("panda.csv")