1%**start of header 2\catcode`@=11 % borrow the private macros of PLAIN (with care) 3 4\newskip\ttglue 5 6\font\elevenrm=cmr10 scaled \magstephalf % roman text 7\font\eleventt=cmtt10 scaled \magstephalf % typewriter 8\font\elevenit=cmti10 scaled\magstephalf 9\font\ninebf=cmbx9 10\font\ninerm=cmr9 11\font\ninei=cmmi9 12\font\ninesy=cmsy9 13\font\ninett=cmtt9 14\font\ninesl=cmsl9 15\font\nineit=cmti9 16\font\sixi=cmmi6 17\font\sixsy=cmsy6 18\font\sixbf=cmbx6 19\skewchar\ninei='177 20\skewchar\ninesy='60 21\hyphenchar\ninett=-1 22 23\def\ninepoint{\def\rm{\fam0\ninerm}% set in nine point families 24 \textfont0=\ninerm \scriptfont0=\sixrm \scriptscriptfont0=\fiverm 25 \textfont1=\ninei \scriptfont1=\sixi \scriptscriptfont1=\fivei 26 \textfont2=\ninesy \scriptfont2=\sixsy \scriptscriptfont2=\fivesy 27 \textfont3=\tenex \scriptfont3=\tenex \scriptscriptfont3=\tenex 28 \textfont\itfam=\nineit \def\it{\fam\itfam\nineit}% \it is family 4 29 \textfont\slfam=\ninesl \def\sl{\fam\slfam\ninesl}% \sl is family 5 30 \textfont\bffam=\ninebf \scriptfont\bffam=\sixbf 31 \scriptscriptfont\bffam=\fivebf \def\bf{\fam\bffam\ninebf}% \bf is family 6 32 \textfont\ttfam=\ninett \def\tt{\fam\ttfam\ninett}% \tt is family 7 33 \tt \ttglue=.5em plus.25em minus.15em 34 \normalbaselineskip=11pt 35 \setbox\strutbox=\hbox{\vrule height8pt depth3pt width0pt}% strut for 11pt 36 \let\sc=\sevenrm \normalbaselines\rm} 37 38\font\scaps=cmcsc10 % for \LaTeX 39\font\lb=logobf10 40\chardef\bs=`\\ % backslash in a string 41\def\;#1{{\it #1\/}} % The simplest and most useful of all. 42 43\input mftmac 44\parindent=1.5pc % restore after mftmac clobbered it 45 46\font\ninetex=cmtex9 \hyphenchar\ninetex=-1 47\def\finstring"#1"{\ninetex"#1"\egroup} 48 49\def\Beginmft{\par\begingroup\ninepoint 50% \chardef:=`\: \chardef!=`\! % \chardef==`\= 51 \catcode`\:=12 \catcode`\!=12 \catcode`\==12 52 \def\mathsemicolon{\mskip\thickmuskip} 53 \let\;=\mathsemicolon 54 % nine-point type: 55 \def\bf{\fam\bffam\ninebf 56 \def\_{\kern.04em\vbox{\hrule width.3em height .6pt}\kern.08em}% 57 \ninebf} 58 \textfont\bffam=\ninebf \scriptfont\bffam=\sixbf 59 \scriptscriptfont\bffam=\fivebf 60 \baselineskip=11pt 61 \def\MF{{\manual hijk}\-{\manual lmnj}} 62 \let\big=\ninebig 63 \setbox\strutbox=\hbox{\vrule height8pt depth3pt width0pt} 64 \rm 65 \setbox\shorthyf=\hbox{-\kern-.05em} 66 \parindent=0pt 67 } 68\def\Endmft{\par 69 \endgroup} 70 71\input tkccode 72\font\tentk=tkr10 73% \font\elvntk=tkr11 74\font\tenti=tkti10 75% \font\elvnti=tkti11 76 77\def\BeginTurkish{\par \begingroup 78% \chardef:=`\: \chardef!=`\! % \chardef==`\= 79 \def\`{\char'43}\def\'{\char'100}% 80 \catcode`\:=12 \catcode`\!=12 \catcode`\==12 81 \sfcode`\:=1000 \sfcode`\!=1000 \sfcode`\==1000 % Frenchspacing after 82% these 83 } 84\def\EndTurkish{\par %let's be quite certain about this! 85 \endgroup} 86\def\verbatim#1{\begingroup\ninepoint \frenchspacing 87 \def\do##1{\catcode`##1=12 } \dospecials 88 \catcode`\:=12 \catcode`\!=12 \catcode`\==12 89 \parskip 0pt \parindent 0pt 90 \catcode`\ =\active \catcode`\^^M=\active 91 \tt \def\par{\ \endgraf}\overfullrule=0pt \obeylines \obeyspaces 92 \input #1 \endgroup} 93% a blank line will be typeset at the end of the file; 94% if you're unlucky it will appear on a page by itself! 95{\obeyspaces\global\let =\ } 96 97\font\titlefont=cmr17 98\font\namefont=cmcsc10 scaled \magstep1 99\font\twelvebd=cmbx12 100\font\sixrm=cmr6 101 102\vsize=43pc 103\hsize=29pc 104\parindent=16pt 105\nopagenumbers 106\normalbaselines 107\footline={\hss\sixrm\folio\hss} 108\def\makefootline{\baselineskip3pc\line{\the\footline}} 109\def\plainoutput{\shipout\vbox{\makeheadline\pagebody\makefootline}% 110 \advancepageno 111 \vsize=45pc 112 \ifnum\outputpenalty>-\@MM \else\dosupereject\fi} 113\def\makeheadline{\vbox to\z@{\vskip-22.5\p@ 114 \line{\vbox to8.5\p@{}\ifnum\count0=1 115 \else\ifodd\count0 116 \hfill\the\doctitle\ignorespaces 117 \else \the\authorname\hfill\fi\fi}\vss}\nointerlineskip} 118 119\newtoks\doctitle \newtoks\authorname 120 121\doctitle={The Ottoman Texts Project} 122\authorname={Walter Andrews and Pierre MacKay} 123% Added to the general format file 124% an addition needed by mftmac 125\def\ninebig#1{{\hbox{$\textfont0=\tenrm\textfont2=\tensy 126 \left#1\vbox to7.25pt{}\right.\n@space$}}} 127\catcode`@=12 % at signs are no longer letters 128\tenrm 129\begingroup 130\titlefont 131\the\doctitle\hfil 132\vskip 3pc 133\namefont\the\authorname\hfil 134\tenrm 135\vskip 1pc 136{\obeylines 137Department of Near Eastern Languages and Civilization 138University of Washington 139Seattle, Washington 98195 140} 141\vskip 3pc 142\line{\hfill\twelvebd ABSTRACT\hfill} 143\vskip 3pc 144\leftskip 4pc \rightskip 4pc 145The Turkish orthographic reform of 1928, which required the abandonment 146of Arabic script in favor of a Latin letter alphabet, was accompanied 147by a cultural rejection of all literature from the 148Ottoman period of Turkish history. As a result, only a small part of 149Ottoman Turkish literature has been made available in scholarly editions 150in the new orthography. The Ottoman Texts Project is a cooperative 151effort of Turkish and North American scholars to provide new editions 152of these works using popular low-priced personal computer systems and 153standard general purpose software. This paper describes an approach 154based on the adoption of \TeX\ as the preferred output system for 155publication. 156\par 157\endgroup 158\vskip 3pc 159%**end of header 160\font\eightrm=cmr8 161\def\sc{\eightrm} 162 163\font\huge=cmr17 scaled \magstep2 164\def\dropinitial#1#2{\def\biginitial#1{{\huge#1}}% 165 \def\makeinitial#1{\setbox8\hbox{\strut\vbox to 1.3ex 166 {\hbox{\biginitial#1}\vskip -4pc plus 3.5pc minus 3.5pc}}}% 167 \makeinitial#1% 168 \ifdim\parindent>1.3\wd8\dimen8=\parindent 169 \else\dimen8=1.3\wd8\fi 170 \hangindent=\dimen8\hangafter=-2 171 \noindent 172 \strut\hskip-1\dimen8\box8{\sc#2}}% 173 174\noindent 175The Ottoman Texts editing and typesetting project 176represents an attempt to provide a simple, low-cost 177system for the entry, editing, and typesetting of 178transcribed [romanized] Ottoman Turkish texts. The 179purpose of developing such a system was to take advantage 180of the increasing availability of microcomputers 181world-wide and to induce the editors of Ottoman texts-- 182especially Turkish editors--to employ electronic media 183for their editing tasks. The benefits to scholars of 184having a large corpus of texts available in 185machine-readable form seem obvious, but overcoming 186``technology cringe'' on the part of scholars whose 187devotion to medieval literature stems in large part from 188a strong conservative-traditionalist ideological bent is 189no small task. Nonetheless, the rewards of converting a 190significant number of such scholars would be quite high. 191The vast majority of significant Ottoman Turkish texts 192await up-to-date editing and the suggested technological 193change could have a major impact on the speed and 194accuracy of the editing process as well as on the 195development of lexicographical tools and on many areas of 196literary and linguistic study. 197 198 The situation in Ottoman studies that makes a 199switch to electronic media especially attractive at this 200time is rather complex and demands some historical 201introduction. From its earliest years at about the 202beginning of the 14th century until early in the 20th 203century, the Ottoman dialect of Turkish was written in 204the Arabic script. The political decline of the Ottoman 205Empire from its pinnacle of world power in the 16th 206century to its status as a moribund, defeated ally of 207Germany following World War I, was arrested in the first 208three decades of this century by a political and 209ideological revolution that saw the establishment of a 210Turkish Republic and an accompanying rejection of the 211literary, cultural, and religious institutions of the 212Ottoman past. One aspect of the cultural revolution was 213the adoption of a latin letter alphabet for Turkish, a 214change which had among its 215consequences the expansion of literacy beyond a small 216elite circle to the general populace, a conscious effort 217to simplify the written language, and a resultant major 218decline in the ability to read and comprehend the Ottoman 219literary language in any of its forms. The ethos of the 220early years of the Republic, to which the Ottoman Empire 221appeared as decadent and its culture as derivative, also 222meant that, at a time when the scholarly edition of older 223texts was becoming a growing concern in other parts of 224the world, in Turkey interest in things Ottoman, 225including Ottoman texts, was considered backward, 226anti-nationalist, counter-revolutionary, and 227wrong-headed. As a result, very few texts were 228adequately edited and the population in general was 229further cut off from its historical past. Since the 230Second World War, however, there has been an increased 231scholarly interest in Ottoman texts and in the 232transcription and edition of such texts. This interest 233has grown with the growth of a tolerance for some 234reemergences of older ethical, religious, and cultural 235practices and attitudes. 236 237 It is clear that the particular situation in 238Turkey today lends itself to the adoption of editing 239methodologies that take advantage of computer technology: 240there is a large cadre of well-educated persons with very 241positive attitudes toward technological innovation; the 242Latin alphabet is used [with modifications for Ottoman 243transcription]; most of the basic editing work remains to 244be done; there is already great interest in the types of 245concordancing, indexing, lexicographical analysis, etc. 246that can be most easily done by computers. Nonetheless, 247Ottoman studies is still an area that attracts persons 248who would be least likely to welcome technological 249innovation and so any change would need to bring 250immediate and obvious benefits. When the editing project 251was initiated, it was decided that the result should have 252the following characteristics: 253 254\medskip 255\noindent1.\quad It should be easy to use even for the most 256unsophisticated user. 257 258\noindent2.\quad It should be adaptable to many different 259circumstances and should be easily supportable. 260 261\noindent3.\quad It should obviously eliminate the need for 262more than one entry of the basic text. [This is, of 263course, common to all computer word-processing systems 264but it is such a major departure from the usual round of 265draft typings that its benefits must be emphasized to 266those who have not experienced it.] 267 268\noindent4.\quad It should be capable of producing typeset 269camera-ready copy for printing. [This is a major 270potential benefit even in Turkey where the costs of more 271labor-intensive typesetting methods are growing rapidly.] 272 273\medskip 274 275 The project developed in several stages and was 276not without its problems and false starts. The first 277stage involved convincing a noted Turkish scholar and 278respected editor of Ottoman texts to come to the 279University of Washington to attempt to edit the collected 280poems of a 16th century Ottoman poet using the IBM XT 281already employed by the Department of Near Eastern 282Languages and Civilizations for the development of 283Turkish character-sets. Scholarly processes being what 284they are it turned out to be easier to bring the scholar 285than to have the necessary word-processing capabilities 286ready when he arrived. As a result, a rather cumbersome 287combination of Microsoft's WORD, Rosesoft's ``smart key'' 288program [PROKEY], and a series of BASIC programs 289developed by Robert Blum of the UW administration was 290used to enable the Turkish visitor to input and edit 291about 90\% of the poems in the collection [over 500 poems] 292in about three months. The editor, who had had no 293previous experience of computers and no particular liking 294or aptitude for them, was an eager and willing convert to 295the process. Prior to his departure, we were also able 296to employ a simple translation program which converted 297the character-set designed for the XT to \TeX\ notation 298and, subsequently, to produce a typeset sample of the 299edited text on the SUN 300minicomputer. The reaction of our visitor to the 301results of this process, which was carried out with the 302help of two fellow scholars without the intervention of 303typists or typesetters, was pure delight and amazement. 304 305 In the ensuing months the project has been 306considerably refined and improved. With the invaluable 307assistance of the UW Humanities and Arts Computing 308Center and its resident character-sets guru, Gerald 309Barnett, we have been able to develop a word-processing 310system that is simple, efficient, flexible, and low-cost. 311The system is based on Quicksoft's PC WRITE program used 312with EGA/VGA and compatable graphics hardware.\footnote{$^*$}{NOTE: At 313present, the system produces a host of irritating ``ghost 314diacritics'' when used with the IBM PS 2 graphics--these 315are a distraction more than a real hinderance but, as 316yet, we have no idea why they occur.} The advantages of 317PC WRITE for this kind of word-processing are numerous 318but it is worth mentioning a few in some detail. 319 320 Given the goal of making this technology widely 321available among scholars and students [especially among 322foreign students and scholars], the fact that PC WRITE is 323low-cost, share-ware [\$89.00 with full support] makes it an 324attractive alternative. Moreover, PC WRITE permits 325virtually limitless customization of keyboards, fonts, 326printer controls, etc. in a manner accessible to persons 327without any knowledge of programming or programming 328languages. Using a simple set of programs--a program 329designed at Duke University for the creation of 330characters for display on an EGA driven monitor and a 331program being developed by Gerald Barnett of the UW for 332the production of downloadable printer fonts--we have 333been able to produce a word processing system that can 334display and edit an extended IBM character-set, which 335will allow the use of modern [roman alphabet] Turkish, 336the romanized transcription of Ottoman Turkish [Arabic 337alphabet], and a full English characters font. One can 338also switch instantly between a standard IBM 339keyboard, an IBM keyboard adapted to Turkish 340characters, and the standard Turkish keyboard with 341extensions for the Ottoman character-set. In addition, 342the system supports draft printing on the IBM Pro Printer 343and letter quality printing on the NEC and Toshiba 24 pin 344printers [with the use of a bi-directional tractor]. 345 346 The extended IBM character-set uses 8 of the 347special European characters, 32 special Ottoman 348transcription characters [on ASCII codes 192--223], and 9 349special modern Turkish characters [on ASCII codes 350225--233], as well as the full English set. All of the 351modern Turkish characters appear as characters on the 352modified standard keyboard. The Ottoman Turkish 353characters [standard English characters with diacritics] 354are called up by two-key sequences. For example, a ``d'' 355with a dot under it is produced by striking ``/'' followed 356by ``d''; all other special Ottoman characters are produced 357by the same sequence [``/''$+$``character'']. Keyboard 358arrangement and the particular character used to call up 359the special characters can be easily modified to suit the 360preferences of the user. 361 362 One fortunate aspect of the print control features of PC 363WRITE for this project 364is that the print control program can be set up to 365support two different fonts for each character. 366Therefore, \TeX\ notation can be provided as an alternative 367for each character and translation from the usual 368word-processor font to \TeX\ notation can be done 369automatically by simply printing in the \TeX\ input character set to 370another file. Because PC WRITE produces ``clean'' ASCII 371files, the material is immediately ready for typesetting 372in whatever \TeX\ environment is being used. 373 374\medskip 375\centerline{{\elevenrm Accented character sets in \TeX}} 376\smallskip 377 378\noindent 379In the few years since 380the official release of \TeX, a number of 381attempts have been made to adapt the program to languages other than 382English. The best known successes have depended on adaptations of the 383program itself, partly because the standard release of \TeX\ can 384support only one system of hyphenation at a time, which makes a truly 385bilingual document quite difficult to produce. These adaptations may 386be broadly classed as program-based extensions of the language. The 387extension which is most obviously necessary is the addition of a 388primitive which can control the switch between one predigested 389hyphenation pattern and another. Michael Ferguson's bilingual CNRS-\TeX, 390which was initially developed for an environment in the province of 391Quebec, where French and English are constantly intermingled, is one 392of the outstanding developments in this class of adaptations, and 393there are others as well. 394 395A second extension is needed to get around the problem of hyphenation 396in languages which make use of diacriticals and accents. The basic 397form of \TeX\ will reject any word containing an accent from the 398evaluation routine 399which normally looks for acceptable hyphenation breaks. In effect, 400any word with an accent is treated as if it were an unbreakable 401horizontal box, and is not evaluated for hyphenation at all. This can 402make line-breaking very difficult, and several users of \TeX\ have 403found it necessary to introduce a loop into the program so that 404accents and diacriticals will be stripped out just before the entry to 405the hyphenation routine, and then returned to their remembered positions 406after the discretionary hyphen nodes have been inserted into the word. 407 408The disadvantage of both these systems is that the adapted program is no 409longer \TeX. It is often possible to add the extra features in such a way 410that the resultant program will produce {\tt DVI} files that are 411indistinguishable from those generated by \TeX, but the extra features 412are not generally available on all systems which run \TeX, and the 413user is often excluded, therefore, from some of the most popular small 414system versions of \TeX. 415 416An alternative solution to the problem of accented languages, though 417not of bilingual hyphenation patterns, is a font-based, rather than a 418program-based approach. Font characters may be generated with the 419accents already applied, and mapped into unused or little-used areas 420of the normal Computer Modern font table. If these characters are 421then supplied with an appropriate \TeX\ {\tt\bs lccode} value, the 422hyphenation loop will recognize them as part of a sequence capable of 423being hyphenated. For a monolingual application in a language which 424makes intensive use of accents and diacriticals, this can be an 425attractive approach, especially when there are reasons for wishing to 426preserve the ability to make use of small system versions of \TeX. 427This is the approach we have taken for Turkish \TeX. 428 429Turkish provides a delightfully vivid set of examples of accentuation 430and hyphenation. The Latin-letter character set which has been in 431use since the orthographic reform of 1928 is extended, even in Modern 432Turkish, by means of a considerable number of diacriticals and accents. A 433diligent search through the modern dictionary will produce several 434five- and six-letter words in which every character is accented, and an 435intensive search might come up with words as much as nine letters long 436with every character accented. In critical editions of Ottoman texts, 437the number of accents more than doubles. Modern Turkish knows only 438the accented and unaccented pair of letters `{\bf s}' and `{\bf\c s}', but 439Ottoman Turkish has `{\bf s}', `{\bf\c s}', `{\bf\d s}' and `{\bf\b s}', which 440represent four completely distinct characters in the Arabic alphabet. 441The letter `{\bf h}' shows almost as much variety, and so do several 442others. Our Ottoman Turkish font has twenty-seven accent and letter 443composites, in addition to the basic twenty-six simple Latin letters. 444Moreover, all composites can exist in upper case forms as well as in 445lower case. 446 447When a character set is as heavily accented as this, it is desirable 448to make sure that the accents are positioned over their letters as 449exactly as possible. The {\tt\bs accent} primitive in \TeX\ does a 450remarkably good job of positioning accents, but it depends on a very 451general algorithm, and tends to place accents exactly centered over 452or under the affected character, no matter what the appearance of 453that character may be. Donald Knuth recognized this limitation in 454the very earliest stages of the development of \TeX, and has 455consistently recommended that frequently used combinations of 456character and accent be developed as composite single images in the 457font. The center of a character is not always the best visual 458position for an accent; top accents should often be slipped just a 459bit to the right, and bottom accents just a bit to the left of the 460mechanically defined centerline of the character. Height and depth 461of accents are similarly subject to aesthetic judgement. 462The{\tt\bs accent} primitive of \TeX\ works very well indeed for 463sparsely occurring accentuation, but not so well when accents occur 464in every second word. 465 466The problem of hyphenation in Turkish is even more striking. Turkish 467is known as an ``agglutinating'' language, which means, in effect that 468each discrete logico-syntactic qualification of a basic word is 469expressed in a single syllable tacked onto all the other syllables in 470the word. At the same time, it is a language in which consonant 471clusters are virtually unknown. A Turkish word is made up of simple 472open and closed syllables, of the form {\tt cv} or {\tt cvc}, and in 473native words there is not even the distinction between long and short 474vowels. The result is a language in which word-length tends to be 475greater than it is even in English, and where, as a result, 476hyphenation is often necessary. The hyphenation rules are inherited 477from the syllabification of Arabic. A syllable is assumed always to 478consist of an initial consonant (even when that consonant is no longer 479written) and to terminate in a vowel or in the next unvowelled 480consonant. This pattern is followed so absolutely that it is 481permitted to break up native Turkish suffixes. The plural suffix \;{ 482-ler-} will be hyphenated as \;{-le-rine} in an environment where 483the {\tt -cv-cv-cv} pattern predominates. 484 485A set of hyphenation patterns for Turkish will therefore be quite 486simple to produce, but it will have no effect on most Turkish words 487unless something is done about the problem of accents. A word such as 488\;{\c cektirilebilecek} ought to provide six discretionary 489hyphenation nodes: \;{\c cek-ti-ri-le-bi-le-cek}, but the {\tt\bs 490accent} primitive applied to the first letter will guarantee that the 491standard version of \TeX\ gives up any attempt to hyphenate it at 492all.\footnote{$^*$}{The word is a future participle, and describes 493something as being 494capable of being extracted at some time in the future--like a tooth. 495A morphological division of the word would produce a very different 496hyphenation pattern, \;{\c cek-tir-il-e-bil-ecek}, with only five nodes.} 497If the initial letter `{\bf \c c}' were a single character in a special 498font, and were provided with an {\tt\bs lccode} value, the {\tt\bs 499accent} primitive would no longer appear, and the word could be 500evaluated for hyphenation. 501 502Since the majority of \TeX\ users will never have to deal with {\tt\bs 503lccode}s at all, a word of explanation is in order here. \TeX\ 504is designed to take care of the problems of typesetting in a 505general manner, independent of the language of the text to be 506set. The program recognizes that while many languages have 507paired upper and lower case character sets, not all do, and the 508order of the basic text character set may not be that of the 509Latin alphabet. For this reason, specific upper and lower case 510pairings are not built into the program, but are supplied by 511macro definitions in {\tt plain.tex}. Like all other 512definitions in plain.tex, they may be replaced, and it is quite 513possible to dispense with plain.tex altogether, and substitute 514another basic format file such as {\tt sadece.tex}, {\tt 515franc.tex}, {\tt einfach.tex} or {\tt sketo.tex}. (Knuth 516insists, for obvious reasons, that the one thing you may not call it is 517``plain.tex.'') If additional characters such as the accented letters 518of Turkish are made part of the basic input coding table, then they 519are likely to exist in upper and lower case pairs. Each lower case 520code is given itself as a lower case {\tt\bs lccode}, and the code of 521its upper case equivalent as its {\tt\bs uccode}. These can be used 522to force conversion from one case to the other, but the {\tt\bs 523lccode} serves an additional purpose. When \TeX\ enters the program 524loop which searches for discretionary hyphen nodes in each word, it 525first unpicks all ligatures such as {\bf ffi} and then evaluates the 526resultant list from the beginning, 527working on any given word only so long as every character it 528finds has a valid {\tt\bs lccode}. Any node that is not a simple 529character with a valid {\tt\bs lccode} causes the routine to terminate; 530the sequence so marked is supplied with no discretionary 531hyphen nodes at all, and therefore cannot be broken by the 532line-breaking algorithm. This is what prevents hyphenation in the 533case of the Turkish word given above. 534 535\medskip 536\centerline{\elevenrm Input Code Interpretation} 537\smallskip 538 539\noindent 540The Turkish text-editing system described above 541is driven from a keyboard mapped to conform as 542closely as possible to the standard Turkish typewriter keyboard. 543This mapping is not used directly in the design of the Ottoman 544Turkish font and, in its present form, 545is isolated from the actual \TeX\ input. After the 546raw input has been corrected, it is passed through a filter which 547converts the accented characters into character pairs (or, in a very 548few instances, into \TeX\ command sequences). These pairings are 549based on a proposal made more than ten years ago at the Orientalist 550Congress held in Paris, in 1974. Owing to the extraordinary richness 551of the Ottoman Turkish character set, it has been necessary to extend 552the old proposal, but it still retains the original principles, which 553are closely associated with the coding scheme used by the Onomasticon 554Arabicum project. The Onomasticon Arabicum uses a post-positive dot 555and a post-positive hyphen to indicate diacriticals, which is 556acceptable in a data-base of names, but not in continuous prose text. 557To provide the indications for Ottoman Turkish diacriticals, 558we have taken over the exclamation 559point `{\tt!}', the equals sign `{\tt=}', and the colon `{\tt:}'. 560 561The exclamation point is used for all the ``emphatic'' letters of the 562Arabic alphabet (the alphabet in which Turkish was written until 5631928). These are the letters \;{\d Dad} (usually pronounced as `{\bf z}' in 564Turkish, and hence paired with a non-Arabic letter known as \;{\.Zad}), 565\;{\d Sad}, \;{\d Ha'}, \;{\d Ta'} and \;{\d Za'}. 566The equals sign is used for all the 567consonants which are represented in Latin-letter transcriptions by a 568letter with a bar under, such as {\bf\b d} (\;{dhal}), more 569commonly written in 570Turkish as `{\bf\b z}', and also for vowels with a macron or, following the 571Turkish convention, a `hat' accent, and similar forms, chosen like the 572cupped `{\bf\u g}', because the equals sign is visually closer than the colon 573is. (Moreover, the colon is needed for a different variety of the 574letter `{\bf g}'.) The colon is a catch-all for everything else, but works 575out rather well visually, as it happens. The three post-positives are 576not accents, but regular characters, which use the \TeX\ convention of 577ligatures to invoke accented characters from the font, just as the 578second `{\bf f$\,$}' in the normal \TeX\ `{\bf ff}' 579ligature pair does. If a standard 580Latin-letter character does not have an associated ligature table in 581the font, a following colon will be unaffected. 582Thus, the letter `{\bf o}', 583when followed by a colon will produce `{\bf\"o}', but the letter `{\bf e}' when 584followed by a colon will produce `{\bf e:}'. The equals sign is returned to 585its normal function in math mode, and the colon and exclamation point 586can be invoked by the command sequences {\tt\bs:} and {\tt\bs bang} 587when the simple character will not work. 588 589This set of conventions produces an input file which can, if 590necessary, be edited on a ordinary terminal lacking the special 591Turkish character features, and which a Turkish speaker can become 592accustomed to without too much difficulty. When coupled with a 593well-designed macro file and a rewritten hyphenation table, it 594provides the possibility of naturalizing a \TeX\ environment into 595Turkish without any large investment in special purpose hardware and 596rewritten versions of non-standard (non-)\TeX. 597 598\medskip 599\centerline{\elevenrm The Font} 600\smallskip 601 602\noindent 603Donald Knuth's Computer Modern fonts come with a wide range of accents, which 604cover most of the requirements for Turkish. The only obvious lack is 605the flat cup which is used under both upper and lower case `{\bf h}' as 606an aesthetic variant for the simple bar under the letter. All the 607existing accents in Computer Modern are designed for consistency 608with the stroke-weights and proportions of the underlying alphabetic 609characters, and it is therefore very desirable to 610retain the details of this design in any associated font of accented 611characters. The vertical and horizontal positions 612may be altered and, for other languages than Turkish, the angle of 613acute and grave accents over upper case letters, but the basic 614proportions of each accent or diacritical remain unchanged. This is 615achieved by taking over the entire text of the Computer Modern 616character file {\tt accent.mf} and converting the {\tt beginchar 617$\ldots$ endchar} pairs to {\tt def} and {\tt enddef}. It is 618not quite so easy as that, but the process is essentially mechanical, 619and guarantees the preservation of all the essential design details for 620each accent. (The flat cup under `{\bf h}' is based on the slavic tie 621accent, turned upside down.) The resultant file, {\tt accdef.mf}, 622is now full of ``definitions'' which can be 623invoked as part of the program file for composite characters. 624Positioning, however, can not be entirely taken care of in the 625{\tt accdef.mf} file. The accents in {\tt accent.mf} 626are, for the most part, designed with a fixed reference 627point at the top of the image, but correct positioning usually 628requires a knowledge of where the bottom edge will be. It is 629herefore necessary to take some of the calculations from the accent 630definitions, and incorporate them into the description of the 631underlying character. For example, the superscript dot accent in the 632Computer Modern font is produced as follows. 633\medskip 634\Beginmft 635$\2{iff}\\{ligs}>0\?\2{cmchar}\7"Dot accent";$\par 636$\2{numeric}\\{dot\_diam}\SH ;\ \\{dot\_diam}\SH =\1{max}(\\{dot\_size}\SH ,% 637\\{cap\_curve}\SH );$\par 638$\2{beginchar}(\1{oct}\7"137",5u\SH ,\1{min}(\\{asc\_height}\SH ,\frac10/{7}% 639\\{x\_height}\SH +.5\\{dot\_diam}\SH ),0);$\par 640$\2{define\_whole\_blacker\_pixels}(\\{dot\_diam});$\par 641$\2{italcorr}h\SH \ast \\{slant}+.5\\{dot\_diam}\SH -2u\SH ;$\par 642$\2{adjust\_fit}(0,0);$\par 643$\2{pickup}\\{tiny}.\\{nib};\ \\{pos}_{1}(\\{dot\_diam},0);\ \\{pos}_{2}(\\{dot% 644\_diam},90);$\par 645$x_{1}=x_{2}=.5w;\ \\{top}\,y_{2r}=h+1;$\par 646$\2{if}\\{bot}\,y_{2l}<\\{x\_height}+o+\\{slab}\?y_{2l}:=\1{min}(y_{2r}-% 647\\{eps},\\{x\_height}+o+\\{slab}+.5\\{tiny});\3{fi}$\par 648$y_{1}=.5[y_{2l},y_{2r}];\ \\{dot}(1,2);\ \9 dot\par 649$\2{penlabels}(1,2);\3{endchar};$\par 650\Endmft 651\medskip 652 653\noindent 654The corresponding {\tt accdef.mf} definition is 655 656\medskip 657\Beginmft 658$\2{def}\\{dot\_accent}(\2{suffix}\$,@)(\2{expr}\\{dotY\_shift})=$\par 659$\2{save}@;$\par 660$\2{forsuffixes}\\{\$\$}=@,@_{\_}\?\2{transform}\\{\$\$};\3{endfor}$\par 661$\2{numeric}\\{dh}\SH ;\ \\{dh}\SH :=\1{min}(\\{asc\_height}\SH ,\frac10/{7}% 662\\{x\_height}\SH +.5\\{dot\_diam}\SH );$\par 663$\2{define\_whole\_blacker\_pixels}(\\{dh},\\{dot\_diam});$\par 664$\2{pickup}\\{tiny}.\\{nib};\ \\{pos}_{@\_1}(\\{dot\_diam},0);\ \\{pos}_{@\_2}(% 665\\{dot\_diam},90);$\par 666$x_{@\_1}=x_{@\_2}=x_{\$};\ \\{top}\,y_{@\_2r}=\\{dh}+1;$\par 667$\2{if}\\{bot}\,y_{@\_2l}<\\{x\_height}+o+\\{slab}\?y_{@\_2l}:=\1{min}(y_{@% 668\_2r}-\\{eps},\\{x\_height}+o+\\{slab}+.5\\{tiny});\3{fi}$\par 669$y_{@\_1}=.5[y_{@\_2l},y_{@\_2r}];$\par 670$\2{numeric}\\{dot\_span};\ \\{dot\_span}=\\{dh}-\\{bot}\,y_{@\_2l};$\par 671$@=\\{identity}\2{if}\\{dotY\_shift}<>0\?\6{shifted}(0,\\{dotY\_shift}+\\{dot% 672\_span})\3{fi};$\par 673$\2{for}n=1,2\?\2{forsuffixes}e=l,,r\?$\par 674\quad$z_{@}[n]e=z_{@\_}[n]e\6{transformed}@;\3{endfor}\3{endfor}$\par 675$\\{dot}(@_{1},@_{2});\ \9 dot\par 676$\2{penlabels}(@_{1},@_{2});\3{enddef};$\par 677\Endmft 678\medskip 679 680To get this into position over the letter `{\bf o}', requires the 681following program text, 682 683\medskip 684\Beginmft 685$\2{cmchar}\7"The letter dotted o";$\par 686$\\{dot\_sharp\_values};$\par 687$\2{beginchar}(\1{oct}\7"025",9u\SH ,\\{dot\_top}\SH ,0);$\par 688$\2{italcorr}\frac1/{3}[\\{x\_height}\SH ,\\{asc\_height}\SH ]\ast % 689\\{slant}-.5u\SH \2{if}\\{serifs}\?+.25\\{dot\_diam}\SH \3{fi};$\par 690$\2{adjust\_fit}(\2{if}\\{monospace}\?.5u\SH ,.5u\SH \2{else}\?0,0\3{fi});$\par 691$\\{penpos}_{1}(\\{vair},90);\ \\{penpos}_{3}(\\{vair}',-90);$\par 692$\\{penpos}_{2}(\\{curve},180);\ \\{penpos}_{4}(\\{curve},0);$\par 693$x_{2r}=\1{hround}\1{max}(.5u,1.25u-.5\\{curve});$\par 694$x_{4r}=w-x_{2r};\ x_{1}=x_{3}=.5w;\ y_{1r}=\\{x\_height}+\1{vround}1.5\\{oo};\ 695y_{3r}=-\\{oo};$\par 696$y_{2}=y_{4}=.5\\{x\_height}-\\{vair\_corr};\ y_{2l}:=y_{4l}:=.52\\{x% 697\_height};$\par 698$\2{penstroke}\\{pulled\_arc}_{e}(1,2)\AM \\{pulled\_arc}_{e}(2,3)$\par 699\quad${}\AM \\{pulled\_arc}_{e}(3,4)\AM \\{pulled\_arc}_{e}(4,1)\AM \1{cycle};\ 700\9 bowl\par 701$\2{numeric}\\{dot\_shift},\\{dot\_top};$\par 702$\2{define\_whole\_blacker\_pixels}(\\{dot\_diam},\\{dot\_top});$\par 703$\\{dot\_shift}=0;\ \9 in this case, the position happens to be correct\par 704$x_{7}=x_{1}-.8\\{dot\_diam};\ x_{8}=x_{7}+1.6\\{dot\_diam};$\par 705$\\{dot\_accent}(7,a,\\{dot\_shift});$\par 706$\\{dot\_accent}(8,b,\\{dot\_shift});$\par 707$\2{penlabels}(1,2,3,4,7,8);\3{endchar};$\par 708\Endmft 709\medskip 710 711\noindent 712in which the line 713 714\medskip 715\Beginmft 716$\\{dot\_sharp\_values};$\par 717\Endmft 718\medskip 719 720\noindent 721expands to a macro 722 723\medskip 724\Beginmft 725$\2{def}\\{dot\_sharp\_values}=$\par 726$\2{numeric}\\{dot\_diam}\SH ;\ \\{dot\_diam}\SH =\1{max}(\\{dot\_size}\SH ,% 727\\{cap\_curve}\SH );$\par 728$\2{numeric}\\{dot\_top}\SH ;\ \\{dot\_top}\SH =\1{min}(\\{asc\_height}\SH ,% 729\frac10/{7}\\{x\_height}\SH +.5\\{dot\_diam}\SH );$\par 730$\!\3{enddef};$\par 731\Endmft 732\medskip 733 734 735\noindent 736which repeats some of the calculations made in the definition of the dot 737accent. 738 739 740The composites that result from this programming effort look, for the 741most part, identical to the results of the application of the {\tt\bs 742accent} primitive to characters in the regular Computer Modern fonts. 743The one major difference comes in the shape of the ``hat'' accent over 744the letter `{\bf i}'. In this instance, the accent would spread beyond the 745left and right side bearings of the underlying character and mess up the 746letter spacing if it were 747not pinched in, so a special narrow hat accent is provided for `{\bf i}'. 748The proportions of each stroke remain essentially the same as 749those in the original model, but they form an acute angle over the top 750of the letter. Except in the case of this character and some of the 751uniquely Turkish dotted uppercase letters, 752it will probably be difficult to distinguish the two styles of accent 753in the final printed version even when they are 754intermingled in the same text. 755 756The creation of the composite characters is only the first stage in 757the development of the font. Next, the italic correction must be set 758for all the italic and slant fonts. This is the spacing that may be 759added to the right side of any slanted character to prevent it from 760running into something like a non-slanted closing parenthesis. There 761does not seem to be any way except visual inspection to discover an 762acceptable italic correction. One wants a fairly simple, general 763calculation, but one which will do rough justice 764to all slanted versions of the character. There were more proof 765copies generated to get the italic correction right than for any other 766feature of the font. (In the absence of any accessible system on which 767proofs could be displayed on the screen, a great many paper proofs had 768to be generated.) 769 770Following this comes the generation of ligature and kerning tables, 771which are necessarily quite large, and need to be carefully worked out 772since there is only a finite region of a {\tt tfm} file that can be 773devoted to them. The smaller of the two ligature tables, for the 774italic fonts, is shown in appendix A. It still needs one further refinement; 775the kernings appear in the order of English letter frequency, and it 776might be possible to gain a little efficiency by rearranging some of 777them. Notice that the `{\bf f$\,$}' ligatures are altogether eliminated. 778In Turkish it is essential to retain the distinction between the 779dotted and the undotted `{\bf i}, which cannot be done if the `{\bf 780fi}' ligature is used. The problem that arises, in fact is to provide 781adequate separation between the dotted `{\bf i}' and a preceding `{\bf 782f$\,$}'. 783 784In addition to the accented characters, it was necessary to design 785three additional characters for Ottoman Turkish. The simplest is 786a dot at about the bar height of lower case `{\bf e}'. This is 787used for a type of Persian suffix known as ``izafet,'' which is 788very common in Ottoman texts. The remaining two characters are 789representations of the Arabic letters ``Ayn'' and ``Hamza,'' which 790are conventionally represented by opening and closing single 791quotes in most fonts. The ``lazy man's \;{`ayn}'' (as just 792illustrated) is acceptable for the occasional reference, but not 793for extensive literary texts. Ayn is not an accent, it is a 794regular consonant of the Arabic alphabet and Hamza, though it can 795be omitted in many positions is also a consonant. What is needed 796is a pair of characters which are clearly distinguishable from 797single quotes, but sufficiently like them to conform with the 798general appearance of Computer Modern. The programs shown below, 799draw on the same standard definition as is used to generate the 800single quotes, but alter the position and the proportions. The 801bulb is uppermost in both instances, and is somewhat smaller than 802the bulb of the close quote. The tail is brought out further from 803the side of the bulb, and is tucked more tightly under. The 804versions for slanted and italic fonts use some special 805transformations to insure that the {\tenti \`ayn} (that was the 806character from the Ottoman font) is correctly formed. In 807effect, the character is built out to the left of the centerline, 808with a reverse slant, and then reflected back into the normal letter space. 809The program for these characters is Appendix B. 810 811A complete passage from our first proposed critical text edition is 812given below, first in \TeX\ input coding, and then as typeset. The 813text from which this passage was extracted runs to twelve pages, and 814was set without the benefit of a properly rewritten hyphenation table. 815By good luck, most of the English pattern hyphenations turned out to 816correspond with acceptable Turkish hyphenations, but it will certainly 817be necessary to make up a proper Turkish hyphenation table in the near 818future. When that is done, and an appropriate set of formatting 819macros has been written to isolate Turkish text from non-Turkish text 820and math mode, we will have a Turkish language adaptation of \TeX\ 821which can be exported onto any small \TeX\ system, with no alteration 822of the program whatsoever. The full range of standard Computer Modern 823font styles will be available, and will blend in perfectly with the 824normal unaccented library of Computer Modern fonts. We will not have 825a truly bilingual version of \TeX, but for a predominantly Turkish 826language environment we will be offering a cheaper and more accessible 827monolingual font-based adaptation. 828 829\bigskip 830{\verbatim {Tktexinput} } 831 832\bigskip 833\BeginTurkish 834\tentk 835\`A=s:ik!lik! zama=ninda \`is:k! va=sit!asi ve s:eyda=lik! \`a=lemi:nde 836s:evk! vesi=lesi:, vus!lat eyya=minda mah!abbet muk!tez:a=si, fi:ra=k! 837gu:nleri:nde h!urk!at i:k!ti:z:a=si, baha=r mev\-si:mi:nde s!oh!bet 838germi:yyeti:, mah!bu=blar mecli:si:nde s:ara=b keyfi:yyeti:, ca=na=neler 839i:bra=mi ve \`a=s:ik!lar i:k!da=mi ve fuz:ala= mus!a=h!abeti: ve \`uk!ala= 840i:lti:fa=ti, ehl..i: di:ller rag:beti: ve t!a=li:bler mi:nneti: i:le di:du:gu: 841ebya=t ve es:\`a=r, ki: her bi:ri:nu:n= lat!i=f ma\`a=ni=si: ca=m..i naz!ma 842s:ara=b..i rengi=n ve s:i=ri=n h=aya=la=ti bezm..i: s!afa=da nuk!l..i: 843s:ekkeri=n olup mu\`a=s:i:ra=n..i mecli:s..i: z=evk! bu meyh=a=nenu:n= 844ba=deci:si: ve h!ari=fa=n..i bezm..i: s:evk! bu ka=s:a=nenu:n= sebu=-kes:i: 845olmis:lardi. K!alem..i: i:\`ti:z=a=r bu h=a=me..i: i:nki:sa=r i:le bu evra=k!a 846tah!ri=re i:k!da=m ve bu ecza=ya tast!i=re i:hti:ma=m go:sterdi:. 847\EndTurkish 848\tenrm 849\vfil\eject 850 851\strut\vskip .75in 852\centerline{{\elevenrm Appendix A}} 853\smallskip 854\centerline{{\elevenrm The {\eleventt turkit.mf} driver file}} 855\bigskip 856\Beginmft 857\input turkit.tex 858\Endmft 859 860\vfil\eject 861 862\strut\vskip .75in 863\centerline{{\elevenrm Appendix B}} 864\smallskip 865\centerline{{\elevenrm The {\tt aynhmz.mf} file}} 866\Beginmft 867\input aynhmz.tex 868\vfil 869\Endmft 870 871 872 873 874 875 876 877 878\bye 879