%macro common; **** %common defines common librefs and SAS options.; libname source "C:\path\source_data"; libname library "C:\path\source_data"; libname target "C:\path\sdtm_data"; options ls=256 nocenter; %mend common; %macro make_codelist_formats; *---------------------------------------------------------------*; * make_codelist_formats.sas creates a permanent SAS format library * stored to the libref LIBRARY from the codelist metadata file * CODELISTS.xlsx. The permanent format library that is created * contains formats that are named like this: * CODELISTNAME_SOURCEDATASET_SOURCEVARIABLE * where CODELISTNAME is the name of the SDTM codelist, * SOURCEDATASET is the name of the source SAS dataset and * SOURCEVARIABLE is the name of the source SAS variable. *---------------------------------------------------------------*; proc import datafile="C:\path\SDTM_METADATA.xlsx" out=formatdata dbms=excelcs replace; sheet="CODELISTS"; run; ** make a proc format control dataset out of the SDTM metadata; data source.formatdata; set formatdata(drop=type); where sourcedataset ne "" and sourcevalue ne ""; keep fmtname start end label type; length fmtname $ 32 start end $ 16 label $ 200 type $ 1; fmtname = compress(codelistname || "_" || sourcedataset || "_" || sourcevariable); start = left(sourcevalue); end = left(sourcevalue); label = left(codedvalue); if upcase(sourcetype) = "NUMBER" then type = "N"; else if upcase(sourcetype) = "CHARACTER" then type = "C"; run; ** create a SAS format library to be used in SDTM conversions; proc format library=library cntlin=source.formatdata fmtlib; run; %mend make_codelist_formats; %macro make_empty_dataset(metadatafile=,dataset=); *---------------------------------------------------------------*; * make_empty_dataset.sas creates a zero record dataset based on a * dataset metadata spreadsheet. The dataset created is called * EMPTY_** where "**" is the name of the dataset. This macro also * creates a global macro variable called **KEEPSTRING that holds * the dataset variables desired and listed in the order they * should appear. [The variable order is dictated by VARNUM in the * metadata spreadsheet.] * * MACRO PARAMETERS: * metadatafile = the MS Excel file containing the dataset metadata * dataset = the dataset or domain name you want to extract *---------------------------------------------------------------*; proc import datafile="&metadatafile" out=_temp dbms=excelcs replace; sheet="VARIABLE_METADATA"; run; ** sort the dataset by expected specified variable order; proc sort data=_temp; where domain = "&dataset"; by varnum; run; ** create keepstring macro variable and load metadata ** information into macro variables; %global &dataset.KEEPSTRING; data _null_; set _temp nobs=nobs end=eof; if _n_=1 then call symput("vars", compress(put(nobs,3.))); call symputx('var' || compress(put(_n_, 3.)), variable); call symputx('label' || compress(put(_n_, 3.)), label); call symputx('length' || compress(put(_n_, 3.)), put(length, 3.)); ** valid ODM types include TEXT, INTEGER, FLOAT, DATETIME, ** DATE, TIME and map to SAS numeric or character; if upcase(type) in ("INTEGER", "FLOAT") then call symputx('type' || compress(put(_n_, 3.)), ""); else if upcase(type) in ("TEXT", "DATE", "DATETIME", "DATE", "TIME") then call symputx('type' || compress(put(_n_, 3.)), "$"); else put "ERR" "OR: not using a valid ODM type. " type=; ** create **KEEPSTRING macro variable; length keepstring $ 32767; retain keepstring; keepstring = compress(keepstring) || "|" || left(variable); if eof then call symputx(upcase(compress("&dataset" || 'KEEPSTRING')), left(trim(translate(keepstring," ","|")))); run; ** create a 0-observation template data set used for assigning ** variable attributes to the actual data sets; data EMPTY_&dataset; %do i=1 %to &vars; attrib &&var&i label="&&label&i" length=&&type&i.&&length&i... ; %if &&type&i=$ %then retain &&var&i ''; %else retain &&var&i .; ; %end; if 0; run; %mend make_empty_dataset; %macro make_dtc_date(dtcdate=, year=., month=., day=., hour=., minute=., second=.); *---------------------------------------------------------------*; * make_dtc_date.sas is a SAS macro that creates a SDTM --DTC date * within a SAS datastep when provided the pieces of the date in * separate SAS variables. * * NOTE: This macro must have SAS OPTIONS MISSING = ' ' set before * it is called to handle missing date parts properly. * * MACRO PARAMETERsS: * dtcdate = SDTM --DTC date variable desired * year = year variable * month = month variable * day = day variable * hour = hour variable * minute = minute variable * second = second variable *---------------------------------------------------------------*; ** in a series of if-then-else statements, determine where the ** smallest unit of date and time is present and then construct a DTC ** date based on the non-missing date variables.; if (&second ne .) then &dtcdate = put(&year,z4.) || "-" || put(&month,z2.) || "-" || put(&day,z2.) || "T" || put(&hour,z2.) || ":" || put(&minute,z2.) || ":" || put(&second,z2.); else if (&minute ne .) then &dtcdate = put(&year,z4.) || "-" || put(&month,z2.) || "-" || put(&day,z2.) || "T" || put(&hour,z2.) || ":" || put(&minute,z2.); else if (&hour ne .) then &dtcdate = put(&year,z4.) || "-" || put(&month,z2.) || "-" || put(&day,z2.) || "T" || put(&hour,z2.); else if (&day ne .) then &dtcdate = put(&year,z4.) || "-" || put(&month,z2.) || "-" || put(&day,z2.); else if (&month ne .) then &dtcdate = put(&year,z4.) || "-" || put(&month,z2.); else if (&year ne .) then &dtcdate = put(&year,z4.); else if (&year = .) then &dtcdate = ""; ** remove duplicate blanks and replace space with a dash; &dtcdate = translate(trim(compbl(&dtcdate)),'-',' '); %mend make_dtc_date; %macro make_sdtm_dy(refdate=RFSTDTC,date=); *---------------------------------------------------------------*; * make_sdtm_dy.sas is a SAS macro that takes two SDTM --DTC dates * and calculates a SDTM study day (--DY) variable. It must be used * in a datastep that has both the REFDATE and DATE variables * specified in the macro parameters below. * MACRO PARAMETERS: * refdate = --DTC baseline date to calculate the --DY from. * This should be DM.RFSTDTC for SDTM --DY variables. * date = --DTC date to calculate the --DY to. The variable * associated with the --DY variable. *---------------------------------------------------------------*; if length(&date) >= 10 and length(&refdate) >= 10 then do; if input(substr(%substr("&date",2,%length(&date)-3)dtc,1,10),yymmdd10.) >= input(substr(%substr("&refdate",2,%length(&refdate)-3)dtc,1,10),yymmdd10.) then %upcase(%substr("&date",2,%length(&date)-3))DY = input(substr(%substr("&date",2,%length(&date)-3)dtc,1,10),yymmdd10.) - input(substr(%substr("&refdate",2,%length(&refdate)-3)dtc,1,10),yymmdd10.) + 1; else %upcase(%substr("&date",2,%length(&date)-3))DY = input(substr(%substr("&date",2,%length(&date)-3)dtc,1,10),yymmdd10.) - input(substr(%substr("&refdate",2,%length(&refdate)-3)dtc,1,10),yymmdd10.); end; %mend make_sdtm_dy; %macro make_sort_order(metadatafile=,dataset=); *----------------------------------------------------------------*; * make_sort_order.sas creates a global macro variable called * **SORTSTRING where ** is the name of the dataset that contains * the metadata specified sort order for a given dataset. * * MACRO PARAMETERS: * metadatafile = the file containing the dataset metadata * dataset = the dataset or domain name *----------------------------------------------------------------*; proc import datafile="&metadatafile" out=_temp dbms=excelcs replace; sheet="TOC_METADATA"; run; ** create **SORTSTRING macro variable; %global &dataset.SORTSTRING; data _null_; set _temp; where name = "&dataset"; call symputx(compress("&dataset" || "SORTSTRING"), translate(domainkeys," ",",")); run; %mend make_sort_order; %macro make_define(path=,metadata=); *---------------------------------------------------------------*; * %make_define creates the define.xml file for the SDTM and ADaM. * It creates define.xml based on the contents of a set of metadata * tabs found in an Excel spreadsheet. * * PARAMETERS: * path = System path to where the SDTM or ADaM metadata * file exists as well as where the define.xml * file will be stored. * metadata = The name of the metadata spreadsheet. * * It requires that the following tabs exist in the metadata file: * DEFINE_HEADER_METADATA = define file header metadata * TOC_METADATA = "table of contents" dataset metadata * VARIABLE_METADATA = variable/column level metadata * VALUELEVEL_METADATA = value/parameter level metadata * COMPUTATIONAL_MKETHOD = computational methods * CODELISTS = controlled terminology metadata * ANALYSIS_RESULTS = ADaM analysis metadata. [Only for ADaM define] * EXTERNAL_LINKS = ADaM results file pointers. [Only for ADaM define] *---------------------------------------------------------------*; **** GET DEFINE FILE HEADER INFORMATION METADATA; proc import out = define_header datafile = "&path\&metadata" dbms=excelcs replace; sheet="DEFINE_HEADER_METADATA"; run; **** DETERMINE IF THIS IS A SDTM DEFINE FILE OR AN ADAM DEFINE FILE **** AND SET THE STANDARD MACRO VARIABLE FOR THE REST OF THE PROGRAM; data _null_; set define_header; if upcase(standard) = 'ADAM' then call symput('standard','ADAM'); else if upcase(standard) = 'SDTM' then call symput('standard','SDTM'); else put "ERR" "OR: CDISC standard undefined in define_header_metadata"; run; **** GET "TABLE OF CONTENTS" LEVEL DATASET METADATA; proc import out = toc_metadata datafile = "&path\&metadata" dbms=excelcs replace; sheet = "TOC_METADATA" ; run; **** GET THE VARIABLE METADATA; proc import out = VARIABLE_METADATA datafile = "&path\&metadata" dbms=excelcs replace; sheet = "VARIABLE_METADATA"; run; **** GET THE CODELIST METADATA; proc import out = codelists datafile = "&path\&metadata" dbms=excelcs replace; sheet = "CODELISTS" ; run; **** GET THE COMPUTATIONAL METHOD METADATA; proc import out = compmethod datafile = "&path\&metadata" dbms=excelcs replace; sheet = "COMPUTATION_METHOD" ; run; **** GET THE VALUE LEVEL METADATA; proc import out = valuelevel datafile = "&path\&metadata" dbms=excelcs replace; sheet = "VALUELEVEL_METADATA" ; run; %if "&standard" = "ADAM" %then %do; **** GET THE ANALYSIS RESULTS METADATA; proc import out = analysisresults datafile = "&path\&metadata" dbms=excelcs replace; sheet = "ANALYSIS_RESULTS" ; run; **** GET THE ANALYSIS RESULTS METADATA; proc import out = externallinks datafile = "&path\&metadata" dbms=excelcs replace; sheet = "EXTERNAL_LINKS" ; run; %end; **** USE HTMLENCODE ON SOURCE TEXT THAT NEEDS ENCODING FOR PROPER BROWSER REPRESENTATIION; %if &standard=ADAM %then %do; data toc_metadata; length documentation $ 800; set toc_metadata; documentation = htmlencode(documentation); ** convert single quotes to double quotes; documentation = tranwrd(documentation, "'", '"'); ** convert double quotes to html quote; documentation = tranwrd(trim(documentation), '"', '"'); format documentation $800.; run; %end; data variable_metadata; length comment $ 2000; set variable_metadata; format comment; informat comment; origin = htmlencode(origin); label = htmlencode(label); comment = htmlencode(comment); **** FOR ADAM, JOIN ORIGIN/"SOURCE" AND COMMENT **** TO FORM "SOURCE/DERIVATION" METADATA; if "&standard" = "ADAM" and origin ne '' and comment ne '' then comment = "SOURCE: " || left(trim(origin)) || " DERIVATION: " || left(trim(comment)); else if "&standard" = "ADAM" and origin ne '' and comment = '' then comment = "SOURCE: " || left(trim(origin)); if "&standard" = "ADAM" and origin = '' and comment ne '' then comment = "DERIVATION: " || left(trim(comment)); run; data codelists; set codelists; codedvalue = htmlencode(codedvalue); translated = htmlencode(translated); run; data compmethod; set compmethod; computationmethod = htmlencode(computationmethod); run; data valuelevel; length comment $ 2000; set valuelevel; format comment; informat comment; origin = htmlencode(origin); label = htmlencode(label); comment = htmlencode(comment); **** FOR ADAM, JOIN ORIGIN/"SOURCE" AND COMMENT **** TO FORM "SOURCE/DERIVATION" METADATA; if "&standard" = "ADAM" and origin ne '' and comment ne '' then comment = "SOURCE: " || left(trim(origin)) || " DERIVATION: " || left(trim(comment)); else if "&standard" = "ADAM" and origin ne '' and comment = '' then comment = "SOURCE: " || left(trim(origin)); if "&standard" = "ADAM" and origin = '' and comment ne '' then comment = "DERIVATION: " || left(trim(comment)); run; %if "&standard" = "ADAM" %then %do; data analysisresults; length programmingcode $800. docleafid $40.; set analysisresults; where displayid ne ''; arrow + 1; selectioncriteria = htmlencode(selectioncriteria); paramlist = htmlencode(paramlist); reason = htmlencode(reason); documentation = htmlencode(documentation); if index(documentation, '[r]')>0 then docleafid = substr(documentation, index(documentation,'[r]')+3, index(documentation,'[\r]')-index(documentation,'[r]')-3); else docleafid = '.'; programmingcode = htmlencode(programmingcode); ** convert single quotes to double quotes; programmingcode = tranwrd(programmingcode, "'", '"'); ** convert double quotes to html quote; programmingcode = tranwrd(programmingcode, '"', '"'); format programmingcode $800.; run; ** ENSURE UNIQUENESS ON DISPLAYID AND RESULTID AND CREATE A COMBO ID; data analysisresults; set analysisresults; by displayid notsorted; drop resultnum; retain resultnum; if first.displayid then resultnum = 0; resultnum + 1; if not(first.displayid and last.displayid) then arid = trim(displayid) || ".R." || put(resultnum,z2.); else arid = displayid; run; ** IF DOCLEAFID IS NON-MISSING, MERGE IN THE TITLE FROM EXTERNAL_LINKS; proc sort data = analysisresults; by docleafid; run; proc sort data = externallinks (keep = leafid title rename=(leafid=docleafid title=doctitle)) out = doc_links; by docleafid; run; data analysisresults; merge analysisresults (in = inar) doc_links (in = indoc_links); by docleafid; if inar; ** if the leaf ID exists, then the title of the leaf ID will be printed and can be removed from DOCUMENTIATION; if indoc_links then documentation = tranwrd(documentation, '[r]' || trim(docleafid) || '[\r]', " "); run; proc sort data = analysisresults; by arrow; run; %end; **** CREATE DEFINE FILE HEADER SECTION; filename dheader "&path\define_header.txt"; data define_header; set define_header; file dheader notitles; creationdate = compress(put(datetime(), IS8601DT.)); put @1 '' / @1 '' / @1 '' / @1 '' / @1 "" / @1 '' / @1 '' / @1 '' / @1 '' / @3 '' / @5 '' studyname +(-1) '' / @5 '' studydescription +(-1) '' / @5 '' protocolname +(-1) '' / @3 '' / @3 '' / %if "&standard" = "ADAM" %then %do; @5 '' / @7 '' / @5 '' / @5 '' / @7 'Data Guide' / %end; %else %if "&standard" = "SDTM" %then %do; @5 '' / @7 '' / @5 '' / @5 '' / @7 'Annotated Case Report Form' / %end; @5 ''; run; **** ADD OTHER ADAM EXTERNAL LINKS; %if "&standard" = "ADAM" %then %do; filename leaves "&path\leaves.txt"; data _null_; set externallinks; file leaves notitles; put @5 '' / @7 '' title '' / @5 '' ; run; %end; **** ADD ITEMOID TO VARIABLE METADATA; data variable_metadata; set variable_metadata(rename=(domain = oid)); length itemoid $ 40; if variable in ("STUDYID","DOMAIN","USUBJID","SUBJID") then itemoid = variable; else itemoid = compress(oid || "." || variable); run; **** ADD ITEMOID TO VALUE LEVEL METADATA; data valuelevel; set valuelevel; length itemoid $ 200; itemoid = compress(valuelistoid || "." || valuename); run; **** CREATE COMPUTATION METHOD SECTION; filename comp "&path\compmethod.txt"; data compmethods; set compmethod; file comp notitles; if _n_ = 1 then put @5 "" / @5 "" / @5 ""; put @5 '' computationmethod +(-1) ''; run; **** CREATE VALUE LEVEL LIST DEFINITION SECTION; proc sort data=valuelevel; where valuelistoid ne ''; by valuelistoid; run; filename vallist "&path\valuelist.txt"; data valuelevel; set valuelevel; by valuelistoid; file vallist notitles; if _n_ = 1 then put @5 "" / @5 "" / @5 ""; if first.valuelistoid then put @5 ''; put @7 ''; if last.valuelistoid then put @5 ''; run; **** CREATE "ITEMGROUPDEF" SECTION; proc sort data=VARIABLE_METADATA; where oid ne ''; by oid varnum; run; proc sort data=toc_metadata; where oid ne ''; by oid; run; filename igdef "&path\itemgroupdef.txt"; data itemgroupdef; length label $ 40; merge toc_metadata VARIABLE_METADATA(drop=label); by oid; file igdef notitles; if first.oid then do; put @5 "" / @5 "" / @5 "" / @5 '' ; %else put @7 'def:ArchiveLocationID="Location.' archivelocationid +(-1) '">'; ; end; put @7 ''; else put '"/>'; if last.oid then put @7 "" / @7 "" / @7 "" / @7 '' / @9 '' archivelocationid +(-1) '.xpt ' / @7 '' / @5 ''; run; **** CREATE "ITEMDEF" SECTION; filename idef "&path\itemdef.txt"; data itemdef; set VARIABLE_METADATA end=eof; by oid; file idef notitles; if _n_ = 1 then put @5 "" / @5 "" / @5 "" ; put @5 ''; if codelistname ne '' then put @7 ''; if valuelistoid ne '' then put @7 ''; put @5 ''; run; **** ADD ITEMDEFS FOR VALUE LEVEL ITEMS TO "ITEMDEF" SECTION; filename idefvl "&path\itemdef_value.txt"; data itemdefvalue; set valuelevel end=eof; by valuelistoid; file idefvl notitles; if _n_ = 1 then put @5 "" / @5 "" / @5 "" ; put @5 ''; if codelistname ne '' then put @7 ''; put @5 ''; run; **** ADD ANALYSIS RESULTS METADATA SECTION FOR ADAM; %if "&standard" = "ADAM" %then %do; filename ar "&path\analysisresults.txt"; data _null_; set analysisresults; ** note that it is required that identical display IDs be adjacent to ** each other in the metadata spreadsheet; by displayid notsorted; file ar notitles; if _n_ = 1 then put @5 "" / @5 "" / @5 "" ; if first.displayid then put @5 '' / @7 '' ; put @9 '' / @9 '' / @9 '' ; ** loop through PARAMCD/PARAM sets; set = 1; do while(scan(paramlist,set,'|') ne ''); paramset = scan(paramlist,set,'|'); paramcd = scan(paramset,1,'/\'); param = trim(scan(paramset,2,'/\')); put @11 '' ; set = set + 1; end; put @9 ''; ** loop through the analysis variables; set = 1; do while(scan(analysisvariables,set,',') ne ''); analysisvar = scan(analysisvariables,set,','); put @11 ''; set = set + 1; end; put @9 ''; put @9 '' / @11 '' / @11 '' / /* just use the row number of the data set as the unique number for the selection criteria */ @13 ' [' selectioncriteria ' ] '/ @11 ' ' / @9 ' ' / @9 '' / @11 '' Documentation / @11 '' / @9 '' / @9 '' / @9 '' / @1 ProgrammingCode / @9 '' / @9 '' / @7 '' ; if last.displayid then put @5 '' / @5 '' ; run; %end; **** CREATE CODELIST SECTION; filename codes "&path\codelist.txt"; proc sort data=codelists nodupkey; by codelistname codedvalue translated; run; **** MAKE SURE CODELIST IS UNIQUE; data _null_; set codelists; by codelistname codedvalue; if not (first.codedvalue and last.codedvalue) then put "ERR" "OR: multiple versions of the same coded value " codelistname= codedvalue=; run; proc sort data=codelists; by codelistname rank; run; data codelists; set codelists end=eof; by codelistname rank; file codes notitles; if _n_ = 1 then put @5 "" / @5 "" / @5 "" ; if first.codelistname then put @5 ''; **** output codelists that are not external dictionaries; if codelistdictionary = '' then do; put @7 ''; else put '>'; put @9 '' / @11 '' translated +(-1) '' / @9 '' / @7 ''; end; **** output codelists that are pointers to external codelists; if codelistdictionary ne '' then put @7 ''; if last.codelistname then put @5 ''; if eof then put @3 '' / @1 '' / @1 ''; run; ** create the .BAT file that will put all of the files together to create the define; filename dotbat "make_define.bat"; data _null_; file dotbat notitles; drive = substr("&path",1,2); put @1 drive; put @1 "cd &path"; if "&standard" = "ADAM" then put @1 "type define_header.txt leaves.txt compmethod.txt valuelist.txt itemgroupdef.txt itemdef.txt itemdef_value.txt analysisresults.txt codelist.txt > define.xml"; else if "&standard" = "SDTM" then put @1 "type define_header.txt leaves.txt compmethod.txt valuelist.txt itemgroupdef.txt itemdef.txt itemdef_value.txt codelist.txt > define.xml"; put @1 "exit"; run; x "make_define"; %mend make_define;