• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1%**start of header
2\catcode`@=11 % borrow the private macros of PLAIN (with care)
3
4\newskip\ttglue
5
6\font\elevenrm=cmr10 scaled \magstephalf % roman text
7\font\eleventt=cmtt10 scaled \magstephalf % typewriter
8\font\elevenit=cmti10 scaled\magstephalf
9\font\ninebf=cmbx9
10\font\ninerm=cmr9
11\font\ninei=cmmi9
12\font\ninesy=cmsy9
13\font\ninett=cmtt9
14\font\ninesl=cmsl9
15\font\nineit=cmti9
16\font\sixi=cmmi6
17\font\sixsy=cmsy6
18\font\sixbf=cmbx6
19\skewchar\ninei='177
20\skewchar\ninesy='60
21\hyphenchar\ninett=-1
22
23\def\ninepoint{\def\rm{\fam0\ninerm}% set in nine point families
24 \textfont0=\ninerm \scriptfont0=\sixrm \scriptscriptfont0=\fiverm
25 \textfont1=\ninei \scriptfont1=\sixi \scriptscriptfont1=\fivei
26 \textfont2=\ninesy \scriptfont2=\sixsy \scriptscriptfont2=\fivesy
27 \textfont3=\tenex \scriptfont3=\tenex \scriptscriptfont3=\tenex
28 \textfont\itfam=\nineit \def\it{\fam\itfam\nineit}% \it is family 4
29 \textfont\slfam=\ninesl \def\sl{\fam\slfam\ninesl}% \sl is family 5
30 \textfont\bffam=\ninebf \scriptfont\bffam=\sixbf
31  \scriptscriptfont\bffam=\fivebf \def\bf{\fam\bffam\ninebf}% \bf is family 6
32 \textfont\ttfam=\ninett \def\tt{\fam\ttfam\ninett}% \tt is family 7
33 \tt \ttglue=.5em plus.25em minus.15em
34 \normalbaselineskip=11pt
35 \setbox\strutbox=\hbox{\vrule height8pt depth3pt width0pt}% strut for 11pt
36 \let\sc=\sevenrm \normalbaselines\rm}
37
38\font\scaps=cmcsc10 % for \LaTeX
39\font\lb=logobf10
40\chardef\bs=`\\ % backslash in a string
41\def\;#1{{\it #1\/}} % The simplest and most useful of all.
42
43\input mftmac
44\parindent=1.5pc % restore after mftmac clobbered it
45
46\font\ninetex=cmtex9 \hyphenchar\ninetex=-1
47\def\finstring"#1"{\ninetex"#1"\egroup}
48
49\def\Beginmft{\par\begingroup\ninepoint
50% \chardef:=`\: \chardef!=`\! % \chardef==`\=
51 \catcode`\:=12 \catcode`\!=12 \catcode`\==12
52 \def\mathsemicolon{\mskip\thickmuskip}
53 \let\;=\mathsemicolon
54 % nine-point type:
55 \def\bf{\fam\bffam\ninebf
56  \def\_{\kern.04em\vbox{\hrule width.3em height .6pt}\kern.08em}%
57  \ninebf}
58 \textfont\bffam=\ninebf \scriptfont\bffam=\sixbf
59  \scriptscriptfont\bffam=\fivebf
60 \baselineskip=11pt
61 \def\MF{{\manual hijk}\-{\manual lmnj}}
62 \let\big=\ninebig
63 \setbox\strutbox=\hbox{\vrule height8pt depth3pt width0pt}
64 \rm
65 \setbox\shorthyf=\hbox{-\kern-.05em}
66 \parindent=0pt
67 }
68\def\Endmft{\par
69     \endgroup}
70
71\input tkccode
72\font\tentk=tkr10
73% \font\elvntk=tkr11
74\font\tenti=tkti10
75% \font\elvnti=tkti11
76
77\def\BeginTurkish{\par \begingroup
78%   \chardef:=`\: \chardef!=`\! % \chardef==`\=
79   \def\`{\char'43}\def\'{\char'100}%
80   \catcode`\:=12 \catcode`\!=12 \catcode`\==12
81   \sfcode`\:=1000 \sfcode`\!=1000 \sfcode`\==1000 % Frenchspacing after
82%                                                    these
83   }
84\def\EndTurkish{\par %let's be quite certain about this!
85   \endgroup}
86\def\verbatim#1{\begingroup\ninepoint \frenchspacing
87  \def\do##1{\catcode`##1=12 } \dospecials
88  \catcode`\:=12  \catcode`\!=12  \catcode`\==12
89  \parskip 0pt \parindent 0pt
90  \catcode`\ =\active \catcode`\^^M=\active
91  \tt \def\par{\ \endgraf}\overfullrule=0pt \obeylines \obeyspaces
92  \input #1 \endgroup}
93% a blank line will be typeset at the end of the file;
94% if you're unlucky it will appear on a page by itself!
95{\obeyspaces\global\let =\ }
96
97\font\titlefont=cmr17
98\font\namefont=cmcsc10 scaled \magstep1
99\font\twelvebd=cmbx12
100\font\sixrm=cmr6
101
102\vsize=43pc
103\hsize=29pc
104\parindent=16pt
105\nopagenumbers
106\normalbaselines
107\footline={\hss\sixrm\folio\hss}
108\def\makefootline{\baselineskip3pc\line{\the\footline}}
109\def\plainoutput{\shipout\vbox{\makeheadline\pagebody\makefootline}%
110  \advancepageno
111  \vsize=45pc
112  \ifnum\outputpenalty>-\@MM \else\dosupereject\fi}
113\def\makeheadline{\vbox to\z@{\vskip-22.5\p@
114  \line{\vbox to8.5\p@{}\ifnum\count0=1
115         \else\ifodd\count0
116                \hfill\the\doctitle\ignorespaces
117             \else \the\authorname\hfill\fi\fi}\vss}\nointerlineskip}
118
119\newtoks\doctitle  \newtoks\authorname
120
121\doctitle={The Ottoman Texts Project}
122\authorname={Walter Andrews and Pierre MacKay}
123% Added to the general format file
124% an addition needed by mftmac
125\def\ninebig#1{{\hbox{$\textfont0=\tenrm\textfont2=\tensy
126   \left#1\vbox to7.25pt{}\right.\n@space$}}}
127\catcode`@=12 % at signs are no longer letters
128\tenrm
129\begingroup
130\titlefont
131\the\doctitle\hfil
132\vskip 3pc
133\namefont\the\authorname\hfil
134\tenrm
135\vskip 1pc
136{\obeylines
137Department of Near Eastern Languages and Civilization
138University of Washington
139Seattle, Washington 98195
140}
141\vskip 3pc
142\line{\hfill\twelvebd ABSTRACT\hfill}
143\vskip 3pc
144\leftskip 4pc \rightskip 4pc
145The Turkish orthographic reform of 1928, which required the abandonment
146of Arabic script in favor of a Latin letter alphabet, was accompanied
147by a cultural rejection of all literature from the
148Ottoman period of Turkish history.  As a result, only a small part of
149Ottoman Turkish literature has been made available in scholarly editions
150in the new orthography.  The Ottoman Texts Project is a cooperative
151effort of Turkish and North American scholars to provide new editions
152of these works using popular low-priced personal computer systems and
153standard general purpose software.  This paper describes an approach
154based on the adoption of \TeX\ as the preferred output system for
155publication.
156\par
157\endgroup
158\vskip 3pc
159%**end of header
160\font\eightrm=cmr8
161\def\sc{\eightrm}
162
163\font\huge=cmr17 scaled \magstep2
164\def\dropinitial#1#2{\def\biginitial#1{{\huge#1}}%
165  \def\makeinitial#1{\setbox8\hbox{\strut\vbox to 1.3ex
166    {\hbox{\biginitial#1}\vskip -4pc plus 3.5pc minus 3.5pc}}}%
167  \makeinitial#1%
168  \ifdim\parindent>1.3\wd8\dimen8=\parindent
169     \else\dimen8=1.3\wd8\fi
170  \hangindent=\dimen8\hangafter=-2
171  \noindent
172  \strut\hskip-1\dimen8\box8{\sc#2}}%
173
174\noindent
175The Ottoman Texts editing and typesetting project
176represents an attempt to provide a simple, low-cost
177system for the entry, editing, and typesetting of
178transcribed [romanized] Ottoman Turkish texts.  The
179purpose of developing such a system was to take advantage
180of the increasing availability of microcomputers
181world-wide and to induce the editors of Ottoman texts--
182especially Turkish editors--to employ electronic media
183for their editing tasks.  The benefits to scholars of
184having a large corpus of texts available in
185machine-readable form seem obvious, but overcoming
186``technology cringe'' on the part of scholars whose
187devotion to medieval literature stems in large part from
188a strong conservative-traditionalist ideological bent is
189no small task.  Nonetheless, the rewards of converting a
190significant number of such scholars would be quite high.
191The vast majority of significant Ottoman Turkish texts
192await up-to-date editing and the suggested technological
193change could have a major impact on the speed and
194accuracy of the editing process as well as on the
195development of lexicographical tools and on many areas of
196literary and linguistic study.
197
198        The situation in Ottoman studies that makes a
199switch to electronic media especially attractive at this
200time is rather complex and demands some historical
201introduction.  From its earliest years at about the
202beginning of the 14th century until early in the 20th
203century, the Ottoman dialect of Turkish was written in
204the Arabic script.  The political decline of the Ottoman
205Empire from its pinnacle of world power in the 16th
206century to its status as a moribund, defeated ally of
207Germany following World War I, was arrested in the first
208three decades of this century by a political and
209ideological revolution that saw the establishment of a
210Turkish Republic and an accompanying rejection of the
211literary, cultural, and religious institutions of the
212Ottoman past.  One aspect of the cultural revolution was
213the adoption of a latin letter alphabet for Turkish, a
214change which had among its
215consequences the expansion of literacy beyond a small
216elite circle to the general populace, a conscious effort
217to simplify the written language, and a resultant major
218decline in the ability to read and comprehend the Ottoman
219literary language in any of its forms.  The ethos of the
220early years of the Republic, to which the Ottoman Empire
221appeared as decadent and its culture as derivative, also
222meant that, at a time when the scholarly edition of older
223texts was becoming a growing concern in other parts of
224the world, in Turkey interest in things Ottoman,
225including Ottoman texts, was considered backward,
226anti-nationalist, counter-revolutionary, and
227wrong-headed.  As a result, very few texts were
228adequately edited and the population in general was
229further cut off from its historical past.  Since the
230Second World War, however, there has been an increased
231scholarly interest in Ottoman texts and in the
232transcription and edition of such texts.  This interest
233has grown with the growth of a tolerance for some
234reemergences of older ethical, religious, and cultural
235practices and attitudes.
236
237        It is clear that the particular situation in
238Turkey today lends itself to the adoption of editing
239methodologies that take advantage of computer technology:
240there is a large cadre of well-educated persons with very
241positive attitudes toward technological innovation; the
242Latin alphabet is used [with modifications for Ottoman
243transcription]; most of the basic editing work remains to
244be done; there is already great interest in the types of
245concordancing, indexing, lexicographical analysis, etc.
246that can be most easily done by computers.  Nonetheless,
247Ottoman studies is still an area that attracts persons
248who would be least likely to welcome technological
249innovation and so any change would need to bring
250immediate and obvious benefits.  When the editing project
251was initiated, it was decided that the result should have
252the following characteristics:
253
254\medskip
255\noindent1.\quad It should be easy to use even for the most
256unsophisticated user.
257
258\noindent2.\quad It should be adaptable to many different
259circumstances and should be easily supportable.
260
261\noindent3.\quad It should obviously eliminate the need for
262more than one entry of the basic text.  [This is, of
263course, common to all computer word-processing systems
264but it is such a major departure from the usual round of
265draft typings that its benefits must be emphasized to
266those who have not experienced it.]
267
268\noindent4.\quad  It should be capable of producing typeset
269camera-ready copy for printing. [This is a major
270potential benefit even in Turkey where the costs of more
271labor-intensive typesetting methods are growing rapidly.]
272
273\medskip
274
275        The project developed in several stages and was
276not without its problems and false starts.  The first
277stage involved convincing a noted Turkish scholar and
278respected editor of Ottoman texts to come to the
279University of Washington to attempt to edit the collected
280poems of a 16th century Ottoman poet using the IBM XT
281already employed by the Department of Near Eastern
282Languages and Civilizations for the development of
283Turkish character-sets.  Scholarly processes being what
284they are it turned out to be easier to bring the scholar
285than to have the necessary word-processing capabilities
286ready when he arrived.  As a result, a rather cumbersome
287combination of Microsoft's WORD, Rosesoft's ``smart key''
288program [PROKEY], and a series of BASIC programs
289developed by Robert Blum of the UW administration was
290used to enable the Turkish visitor to input and edit
291about 90\% of the poems in the collection [over 500 poems]
292in about three months.  The editor, who had had no
293previous experience of computers and no particular liking
294or aptitude for them, was an eager and willing convert to
295the process.  Prior to his departure, we were also able
296to employ a simple translation program which converted
297the character-set designed for the XT to \TeX\ notation
298and, subsequently, to produce a typeset sample of the
299edited text on the SUN
300minicomputer.  The reaction of our visitor to the
301results of this process, which was carried out with the
302help of two fellow scholars without the intervention of
303typists or typesetters, was pure delight and amazement.
304
305        In the ensuing months the project has been
306considerably refined and improved.  With the invaluable
307assistance of the UW Humanities and Arts Computing
308Center and its resident character-sets guru, Gerald
309Barnett, we have been able to develop a word-processing
310system that is simple, efficient, flexible, and low-cost.
311The system is based on Quicksoft's PC WRITE program used
312with EGA/VGA and compatable graphics hardware.\footnote{$^*$}{NOTE: At
313present, the system produces a host of irritating ``ghost
314diacritics'' when used with the IBM PS 2 graphics--these
315are a distraction more than a real hinderance but, as
316yet, we have no idea why they occur.}  The advantages of
317PC WRITE for this kind of word-processing are numerous
318but it is worth mentioning a few in some detail.
319
320        Given the goal of making this technology widely
321available among scholars and students [especially among
322foreign students and scholars], the fact that PC WRITE is
323low-cost, share-ware [\$89.00 with full support] makes it an
324attractive alternative.  Moreover, PC WRITE permits
325virtually limitless customization of keyboards, fonts,
326printer controls, etc. in a manner accessible to persons
327without any knowledge of programming or programming
328languages.  Using a simple set of programs--a program
329designed at Duke University for the creation of
330characters for display on an EGA driven monitor and a
331program being developed by Gerald Barnett of the UW for
332the production of downloadable printer fonts--we have
333been able to produce a word processing system that can
334display and edit an extended IBM character-set, which
335will allow the use of modern [roman alphabet] Turkish,
336the romanized transcription of Ottoman Turkish [Arabic
337alphabet], and a full English characters font.  One can
338also switch instantly between a standard IBM
339keyboard, an IBM keyboard adapted to Turkish
340characters, and the standard Turkish keyboard with
341extensions for the Ottoman character-set.  In addition,
342the system supports draft printing on the IBM Pro Printer
343and letter quality printing on the NEC and Toshiba 24 pin
344printers [with the use of a bi-directional tractor].
345
346        The extended IBM character-set uses 8 of the
347special European characters, 32 special Ottoman
348transcription characters [on ASCII codes 192--223], and 9
349special modern Turkish characters [on ASCII codes
350225--233], as well as the full English set.  All of the
351modern Turkish characters appear as characters on the
352modified standard keyboard.  The Ottoman Turkish
353characters [standard English characters with diacritics]
354are called up by two-key sequences.  For example, a ``d''
355with a dot under it is produced by striking ``/'' followed
356by ``d''; all other special Ottoman characters are produced
357by the same sequence [``/''$+$``character''].  Keyboard
358arrangement and the particular character used to call up
359the special characters can be easily modified to suit the
360preferences of the user.
361
362        One fortunate aspect of the print control features of PC
363WRITE for this project
364is that the print control program can be set up to
365support two different fonts for each character.
366Therefore, \TeX\ notation can be provided as an alternative
367for each character and translation from the usual
368word-processor font to \TeX\ notation can be done
369automatically by simply printing in the \TeX\ input character set to
370another file.  Because PC WRITE produces ``clean'' ASCII
371files, the material is immediately ready for typesetting
372in whatever \TeX\ environment is being used.
373
374\medskip
375\centerline{{\elevenrm Accented character sets in \TeX}}
376\smallskip
377
378\noindent
379In the few years since
380the official release of \TeX, a number of
381attempts have been made to adapt the program to languages other than
382English.  The best known successes have depended on adaptations of the
383program itself, partly because the standard release of \TeX\ can
384support only one system of hyphenation at a time, which makes a truly
385bilingual document quite difficult to produce.  These adaptations may
386be broadly classed as program-based extensions of the language.  The
387extension which is most obviously necessary is the addition of a
388primitive which can control the switch between one predigested
389hyphenation pattern and another.  Michael Ferguson's bilingual CNRS-\TeX,
390which was initially developed for an environment in the province of
391Quebec, where French and English are constantly intermingled, is one
392of the outstanding developments in this class of adaptations, and
393there are others as well.
394
395A second extension is needed to get around the problem of hyphenation
396in languages which make use of diacriticals and accents.  The basic
397form of \TeX\ will reject any word containing an accent from the
398evaluation routine
399which normally looks for acceptable hyphenation breaks.  In effect,
400any word with an accent is treated as if it were an unbreakable
401horizontal box, and is not evaluated for hyphenation at all.  This can
402make line-breaking very difficult, and several users of \TeX\ have
403found it necessary to introduce a loop into the program so that
404accents and diacriticals will be stripped out just before the entry to
405the hyphenation routine, and then returned to their remembered positions
406after the discretionary hyphen nodes have been inserted into the word.
407
408The disadvantage of both these systems is that the adapted program is no
409longer \TeX.  It is often possible to add the extra features in such a way
410that the resultant program will produce {\tt DVI} files that are
411indistinguishable from those generated by \TeX, but the extra features
412are not generally available on all systems which run \TeX, and the
413user is often excluded, therefore, from some of the most popular small
414system versions of \TeX.
415
416An alternative solution to the problem of accented languages, though
417not of bilingual hyphenation patterns, is a font-based, rather than a
418program-based approach.  Font characters may be generated with the
419accents already applied, and mapped into unused or little-used areas
420of the normal Computer Modern font table.  If these characters are
421then supplied with an appropriate \TeX\ {\tt\bs lccode} value, the
422hyphenation loop will recognize them as part of a sequence capable of
423being hyphenated.  For a monolingual application in a language which
424makes intensive use of accents and diacriticals, this can be an
425attractive approach, especially when there are reasons for wishing to
426preserve the ability to make use of small system versions of \TeX.
427This is the approach we have taken for Turkish \TeX.
428
429Turkish provides a delightfully vivid set of examples of accentuation
430and hyphenation.  The Latin-letter character set which has been in
431use since the orthographic reform of 1928 is extended, even in Modern
432Turkish, by means of a considerable number of diacriticals and accents.  A
433diligent search through the modern dictionary will produce several
434five- and six-letter words in which every character is accented, and an
435intensive search might come up with words as much as nine letters long
436with every character accented.  In critical editions of Ottoman texts,
437the number of accents more than doubles.  Modern Turkish knows only
438the accented and unaccented pair of letters `{\bf s}' and `{\bf\c s}', but
439Ottoman Turkish has `{\bf s}', `{\bf\c s}', `{\bf\d s}' and `{\bf\b s}', which
440represent four completely distinct characters in the Arabic alphabet.
441The letter `{\bf h}' shows almost as much variety, and so do several
442others.  Our Ottoman Turkish font has twenty-seven accent and letter
443composites, in addition to the basic twenty-six simple Latin letters.
444Moreover, all composites can exist in upper case forms as well as in
445lower case.
446
447When a character set is as heavily accented as this, it is desirable
448to make sure that the accents are positioned over their letters as
449exactly as possible.  The {\tt\bs accent} primitive in \TeX\ does a
450remarkably good job of positioning accents, but it depends on a very
451general algorithm, and tends to place accents exactly centered over
452or under the affected character, no matter what the appearance of
453that character may be.  Donald Knuth recognized this limitation in
454the very earliest stages of the development of \TeX, and has
455consistently recommended that frequently used combinations of
456character and accent be developed as composite single images in the
457font.  The center of a character is not always the best visual
458position for an accent; top accents should often be slipped just a
459bit to the right, and bottom accents just a bit to the left of the
460mechanically defined centerline of the character.  Height and depth
461of accents are similarly subject to aesthetic judgement.
462The{\tt\bs accent} primitive of \TeX\ works very well indeed for
463sparsely occurring accentuation, but not so well when accents occur
464in every second word.
465
466The problem of hyphenation in Turkish is even more striking.  Turkish
467is known as an ``agglutinating'' language, which means, in effect that
468each discrete logico-syntactic qualification of a basic word is
469expressed in a single syllable tacked onto all the other syllables in
470the word.  At the same time, it is a language in which consonant
471clusters are virtually unknown.  A Turkish word is made up of simple
472open and closed syllables, of the form {\tt cv} or {\tt cvc}, and in
473native words there is not even the distinction between long and short
474vowels.  The result is a language in which word-length tends to be
475greater than it is even in English, and where, as a result,
476hyphenation is often necessary.  The hyphenation rules are inherited
477from the syllabification of Arabic.  A syllable is assumed always to
478consist of an initial consonant (even when that consonant is no longer
479written) and to terminate in a vowel or in the next unvowelled
480consonant.  This pattern is followed so absolutely that it is
481permitted to break up native Turkish suffixes.  The plural suffix \;{
482-ler-} will be hyphenated as \;{-le-rine} in an environment where
483the {\tt -cv-cv-cv} pattern predominates.
484
485A set of hyphenation patterns for Turkish will therefore be quite
486simple to produce, but it will have no effect on most Turkish words
487unless something is done about the problem of accents.  A word such as
488\;{\c cektirilebilecek} ought to provide six discretionary
489hyphenation nodes: \;{\c cek-ti-ri-le-bi-le-cek}, but the {\tt\bs
490accent} primitive applied to the first letter will guarantee that the
491standard version of \TeX\ gives up any attempt to hyphenate it at
492all.\footnote{$^*$}{The word is a future participle, and describes
493something as being
494capable of being extracted at some time in the future--like a tooth.
495A morphological division of the word would produce a very different
496hyphenation pattern, \;{\c cek-tir-il-e-bil-ecek}, with only five nodes.}
497If the initial letter `{\bf \c c}' were a single character in a special
498font, and were provided with an {\tt\bs lccode} value, the {\tt\bs
499accent} primitive would no longer appear, and the word could be
500evaluated for hyphenation.
501
502Since the majority of \TeX\ users will never have to deal with {\tt\bs
503lccode}s at all, a word of explanation is in order here.  \TeX\
504is designed to take care of the problems of typesetting in a
505general manner, independent of the language of the text to be
506set.  The program recognizes that while many languages have
507paired upper and lower case character sets, not all do, and the
508order of the basic text character set may not be that of the
509Latin alphabet.  For this reason, specific upper and lower case
510pairings are not built into the program, but are supplied by
511macro definitions in {\tt plain.tex}.  Like all other
512definitions in plain.tex, they may be replaced, and it is quite
513possible to dispense with plain.tex altogether, and substitute
514another basic format file such as {\tt sadece.tex}, {\tt
515franc.tex}, {\tt einfach.tex} or {\tt sketo.tex}.  (Knuth
516insists, for obvious reasons, that the one thing you may not call it is
517``plain.tex.'')  If additional characters such as the accented letters
518of Turkish are made part of the basic input coding table, then they
519are likely to exist in upper and lower case pairs.  Each lower case
520code is given itself as a lower case {\tt\bs lccode}, and the code of
521its upper case equivalent as its {\tt\bs uccode}.  These can be used
522to force conversion from one case to the other, but the {\tt\bs
523lccode} serves an additional purpose.  When \TeX\ enters the program
524loop which searches for discretionary hyphen nodes in each word, it
525first unpicks all ligatures such as {\bf ffi} and then evaluates the
526resultant list from the beginning,
527working on any given word only so long as every character it
528finds has a valid {\tt\bs lccode}.  Any node that is not a simple
529character with a valid {\tt\bs lccode} causes the routine to terminate;
530the sequence so marked is supplied with no discretionary
531hyphen nodes at all, and therefore cannot be broken by the
532line-breaking algorithm.  This is what prevents hyphenation in the
533case of the Turkish word given above.
534
535\medskip
536\centerline{\elevenrm Input Code Interpretation}
537\smallskip
538
539\noindent
540The Turkish text-editing system described above
541is driven from a keyboard mapped to conform as
542closely as possible to the standard Turkish typewriter keyboard.
543This mapping is not used directly in the design of the Ottoman
544Turkish font and, in its present form,
545is isolated from the actual \TeX\ input.  After the
546raw input has been corrected, it is passed through a filter which
547converts the accented characters into character pairs (or, in a very
548few instances, into \TeX\ command sequences).  These pairings are
549based on a proposal made more than ten years ago at the Orientalist
550Congress held in Paris, in 1974.  Owing to the extraordinary richness
551of the Ottoman Turkish character set, it has been necessary to extend
552the old proposal, but it still retains the original principles, which
553are closely associated with the coding scheme used by the Onomasticon
554Arabicum project.  The Onomasticon Arabicum uses a post-positive dot
555and a post-positive hyphen to indicate diacriticals, which is
556acceptable in a data-base of names, but not in continuous prose text.
557To provide the indications for Ottoman Turkish diacriticals,
558we have taken over the exclamation
559point `{\tt!}', the equals sign `{\tt=}', and the colon `{\tt:}'.
560
561The exclamation point is used for all the ``emphatic'' letters of the
562Arabic alphabet (the alphabet in which Turkish was written until
5631928).  These are the letters \;{\d Dad} (usually pronounced as `{\bf z}' in
564Turkish, and hence paired with a non-Arabic letter known as \;{\.Zad}),
565\;{\d Sad}, \;{\d Ha'}, \;{\d Ta'} and \;{\d Za'}.
566The equals sign is used for all the
567consonants which are represented in Latin-letter transcriptions by a
568letter with a bar under, such as {\bf\b d} (\;{dhal}), more
569commonly written in
570Turkish as `{\bf\b z}', and also for vowels with a macron or, following the
571Turkish convention, a `hat' accent, and similar forms, chosen like the
572cupped `{\bf\u g}', because the equals sign is visually closer than the colon
573is.  (Moreover, the colon is needed for a different variety of the
574letter `{\bf g}'.)  The colon is a catch-all for everything else, but works
575out rather well visually, as it happens.  The three post-positives are
576not accents, but regular characters, which use the \TeX\ convention of
577ligatures to invoke accented characters from the font, just as the
578second `{\bf f$\,$}' in the normal \TeX\ `{\bf ff}'
579ligature pair does.  If a standard
580Latin-letter character does not have an associated ligature table in
581the font, a following colon will be unaffected.
582Thus, the letter `{\bf o}',
583when followed by a colon will produce `{\bf\"o}', but the letter `{\bf e}' when
584followed by a colon will produce `{\bf e:}'.  The equals sign is returned to
585its normal function in math mode, and the colon and exclamation point
586can be invoked by the command sequences {\tt\bs:} and {\tt\bs bang}
587when the simple character will not work.
588
589This set of conventions produces an input file which can, if
590necessary, be edited on a ordinary terminal lacking the special
591Turkish character features, and which a Turkish speaker can become
592accustomed to without too much difficulty.  When coupled with a
593well-designed macro file and a rewritten hyphenation table, it
594provides the possibility of naturalizing a \TeX\ environment into
595Turkish without any large investment in special purpose hardware and
596rewritten versions of non-standard (non-)\TeX.
597
598\medskip
599\centerline{\elevenrm The Font}
600\smallskip
601
602\noindent
603Donald Knuth's Computer Modern fonts come with a wide range of accents, which
604cover most of the requirements for Turkish.  The only obvious lack is
605the flat cup which is used under both upper and lower case `{\bf h}' as
606an aesthetic variant for the simple bar under the letter.  All the
607existing accents in Computer Modern are designed for consistency
608with the stroke-weights and proportions of the underlying alphabetic
609characters, and it is therefore very desirable to
610retain the details of this design in any associated font of accented
611characters.  The vertical and horizontal positions
612may be altered and, for other languages than Turkish, the angle of
613acute and grave accents over upper case letters, but the basic
614proportions of each accent or diacritical remain unchanged.  This is
615achieved by taking over the entire text of the Computer Modern
616character file {\tt accent.mf} and converting the {\tt beginchar
617$\ldots$ endchar} pairs to {\tt def} and {\tt enddef}.  It is
618not quite so easy as that, but the process is essentially mechanical,
619and guarantees the preservation of all the essential design details for
620each accent.  (The flat cup under `{\bf h}' is based on the slavic tie
621accent, turned upside down.)  The resultant file, {\tt accdef.mf},
622is now full of ``definitions'' which can be
623invoked as part of the program file for composite characters.
624Positioning, however, can not be entirely taken care of in the
625{\tt accdef.mf} file.  The accents in {\tt accent.mf}
626are, for the most part, designed with a fixed reference
627point at the top of the image, but correct positioning usually
628requires a knowledge of where the bottom edge will be.  It is
629herefore necessary to take some of the calculations from the accent
630definitions, and incorporate them into the description of the
631underlying character.  For example, the superscript dot accent in the
632Computer Modern font is produced as follows.
633\medskip
634\Beginmft
635$\2{iff}\\{ligs}>0\?\2{cmchar}\7"Dot accent";$\par
636$\2{numeric}\\{dot\_diam}\SH ;\ \\{dot\_diam}\SH =\1{max}(\\{dot\_size}\SH ,%
637\\{cap\_curve}\SH );$\par
638$\2{beginchar}(\1{oct}\7"137",5u\SH ,\1{min}(\\{asc\_height}\SH ,\frac10/{7}%
639\\{x\_height}\SH +.5\\{dot\_diam}\SH ),0);$\par
640$\2{define\_whole\_blacker\_pixels}(\\{dot\_diam});$\par
641$\2{italcorr}h\SH \ast \\{slant}+.5\\{dot\_diam}\SH -2u\SH ;$\par
642$\2{adjust\_fit}(0,0);$\par
643$\2{pickup}\\{tiny}.\\{nib};\ \\{pos}_{1}(\\{dot\_diam},0);\ \\{pos}_{2}(\\{dot%
644\_diam},90);$\par
645$x_{1}=x_{2}=.5w;\ \\{top}\,y_{2r}=h+1;$\par
646$\2{if}\\{bot}\,y_{2l}<\\{x\_height}+o+\\{slab}\?y_{2l}:=\1{min}(y_{2r}-%
647\\{eps},\\{x\_height}+o+\\{slab}+.5\\{tiny});\3{fi}$\par
648$y_{1}=.5[y_{2l},y_{2r}];\ \\{dot}(1,2);\ \9 dot\par
649$\2{penlabels}(1,2);\3{endchar};$\par
650\Endmft
651\medskip
652
653\noindent
654The corresponding {\tt accdef.mf} definition is
655
656\medskip
657\Beginmft
658$\2{def}\\{dot\_accent}(\2{suffix}\$,@)(\2{expr}\\{dotY\_shift})=$\par
659$\2{save}@;$\par
660$\2{forsuffixes}\\{\$\$}=@,@_{\_}\?\2{transform}\\{\$\$};\3{endfor}$\par
661$\2{numeric}\\{dh}\SH ;\ \\{dh}\SH :=\1{min}(\\{asc\_height}\SH ,\frac10/{7}%
662\\{x\_height}\SH +.5\\{dot\_diam}\SH );$\par
663$\2{define\_whole\_blacker\_pixels}(\\{dh},\\{dot\_diam});$\par
664$\2{pickup}\\{tiny}.\\{nib};\ \\{pos}_{@\_1}(\\{dot\_diam},0);\ \\{pos}_{@\_2}(%
665\\{dot\_diam},90);$\par
666$x_{@\_1}=x_{@\_2}=x_{\$};\ \\{top}\,y_{@\_2r}=\\{dh}+1;$\par
667$\2{if}\\{bot}\,y_{@\_2l}<\\{x\_height}+o+\\{slab}\?y_{@\_2l}:=\1{min}(y_{@%
668\_2r}-\\{eps},\\{x\_height}+o+\\{slab}+.5\\{tiny});\3{fi}$\par
669$y_{@\_1}=.5[y_{@\_2l},y_{@\_2r}];$\par
670$\2{numeric}\\{dot\_span};\ \\{dot\_span}=\\{dh}-\\{bot}\,y_{@\_2l};$\par
671$@=\\{identity}\2{if}\\{dotY\_shift}<>0\?\6{shifted}(0,\\{dotY\_shift}+\\{dot%
672\_span})\3{fi};$\par
673$\2{for}n=1,2\?\2{forsuffixes}e=l,,r\?$\par
674\quad$z_{@}[n]e=z_{@\_}[n]e\6{transformed}@;\3{endfor}\3{endfor}$\par
675$\\{dot}(@_{1},@_{2});\ \9 dot\par
676$\2{penlabels}(@_{1},@_{2});\3{enddef};$\par
677\Endmft
678\medskip
679
680To get this into position over the letter `{\bf o}', requires the
681following program text,
682
683\medskip
684\Beginmft
685$\2{cmchar}\7"The letter dotted o";$\par
686$\\{dot\_sharp\_values};$\par
687$\2{beginchar}(\1{oct}\7"025",9u\SH ,\\{dot\_top}\SH ,0);$\par
688$\2{italcorr}\frac1/{3}[\\{x\_height}\SH ,\\{asc\_height}\SH ]\ast %
689\\{slant}-.5u\SH \2{if}\\{serifs}\?+.25\\{dot\_diam}\SH \3{fi};$\par
690$\2{adjust\_fit}(\2{if}\\{monospace}\?.5u\SH ,.5u\SH \2{else}\?0,0\3{fi});$\par
691$\\{penpos}_{1}(\\{vair},90);\ \\{penpos}_{3}(\\{vair}',-90);$\par
692$\\{penpos}_{2}(\\{curve},180);\ \\{penpos}_{4}(\\{curve},0);$\par
693$x_{2r}=\1{hround}\1{max}(.5u,1.25u-.5\\{curve});$\par
694$x_{4r}=w-x_{2r};\ x_{1}=x_{3}=.5w;\ y_{1r}=\\{x\_height}+\1{vround}1.5\\{oo};\
695y_{3r}=-\\{oo};$\par
696$y_{2}=y_{4}=.5\\{x\_height}-\\{vair\_corr};\ y_{2l}:=y_{4l}:=.52\\{x%
697\_height};$\par
698$\2{penstroke}\\{pulled\_arc}_{e}(1,2)\AM \\{pulled\_arc}_{e}(2,3)$\par
699\quad${}\AM \\{pulled\_arc}_{e}(3,4)\AM \\{pulled\_arc}_{e}(4,1)\AM \1{cycle};\
700\9 bowl\par
701$\2{numeric}\\{dot\_shift},\\{dot\_top};$\par
702$\2{define\_whole\_blacker\_pixels}(\\{dot\_diam},\\{dot\_top});$\par
703$\\{dot\_shift}=0;\ \9 in this case, the position happens to be correct\par
704$x_{7}=x_{1}-.8\\{dot\_diam};\ x_{8}=x_{7}+1.6\\{dot\_diam};$\par
705$\\{dot\_accent}(7,a,\\{dot\_shift});$\par
706$\\{dot\_accent}(8,b,\\{dot\_shift});$\par
707$\2{penlabels}(1,2,3,4,7,8);\3{endchar};$\par
708\Endmft
709\medskip
710
711\noindent
712in which the line
713
714\medskip
715\Beginmft
716$\\{dot\_sharp\_values};$\par
717\Endmft
718\medskip
719
720\noindent
721expands to a macro
722
723\medskip
724\Beginmft
725$\2{def}\\{dot\_sharp\_values}=$\par
726$\2{numeric}\\{dot\_diam}\SH ;\ \\{dot\_diam}\SH =\1{max}(\\{dot\_size}\SH ,%
727\\{cap\_curve}\SH );$\par
728$\2{numeric}\\{dot\_top}\SH ;\ \\{dot\_top}\SH =\1{min}(\\{asc\_height}\SH ,%
729\frac10/{7}\\{x\_height}\SH +.5\\{dot\_diam}\SH );$\par
730$\!\3{enddef};$\par
731\Endmft
732\medskip
733
734
735\noindent
736which repeats some of the calculations made in the definition of the dot
737accent.
738
739
740The composites that result from this programming effort look, for the
741most part, identical to the results of the application of the {\tt\bs
742accent} primitive to characters in the regular Computer Modern fonts.
743The one major difference comes in the shape of the ``hat'' accent over
744the letter `{\bf i}'.  In this instance, the accent would spread beyond the
745left and right side bearings of the underlying character and mess up the
746letter spacing if it were
747not pinched in, so a special narrow hat accent is provided for `{\bf i}'.
748The proportions of each stroke remain essentially the same as
749those in the original model, but they form an acute angle over the top
750of the letter.  Except in the case of this character and some of the
751uniquely Turkish dotted uppercase letters,
752it will probably be difficult to distinguish the two styles of accent
753in the final printed version even when they are
754intermingled in the same text.
755
756The creation of the composite characters is only the first stage in
757the development of the font.  Next, the italic correction must be set
758for all the italic and slant fonts.  This is the spacing that may be
759added to the right side of any slanted character to prevent it from
760running into something like a non-slanted closing parenthesis.  There
761does not seem to be any way except visual inspection to discover an
762acceptable italic correction.  One wants a fairly simple, general
763calculation, but one which will do rough justice
764to all slanted versions of the character.  There were more proof
765copies generated to get the italic correction right than for any other
766feature of the font.  (In the absence of any accessible system on which
767proofs could be displayed on the screen, a great many paper proofs had
768to be generated.)
769
770Following this comes the generation of ligature and kerning tables,
771which are necessarily quite large, and need to be carefully worked out
772since there is only a finite region of a {\tt tfm} file that can be
773devoted to them.  The smaller of the two ligature tables, for the
774italic fonts, is shown in appendix A.  It still needs one further refinement;
775the kernings appear in the order of English letter frequency, and it
776might be possible to gain a little efficiency by rearranging some of
777them.  Notice that the `{\bf f$\,$}' ligatures are altogether eliminated.
778In Turkish it is essential to retain the distinction between the
779dotted and the undotted `{\bf i}, which cannot be done if the `{\bf
780fi}' ligature is used.  The problem that arises, in fact is to provide
781adequate separation between the dotted `{\bf i}' and a preceding `{\bf
782f$\,$}'.
783
784In addition to the accented characters, it was necessary to design
785three additional characters for Ottoman Turkish.  The simplest is
786a dot at about the bar height of lower case `{\bf e}'.  This is
787used for a type of Persian suffix known as ``izafet,'' which is
788very common in Ottoman texts.  The remaining two characters are
789representations of the Arabic letters ``Ayn'' and ``Hamza,'' which
790are conventionally represented by opening and closing single
791quotes in most fonts.  The ``lazy man's \;{`ayn}'' (as just
792illustrated) is acceptable for the occasional reference, but not
793for extensive literary texts.  Ayn is not an accent, it is a
794regular consonant of the Arabic alphabet and Hamza, though it can
795be omitted in many positions is also a consonant.  What is needed
796is a pair of characters which are clearly distinguishable from
797single quotes, but sufficiently like them to conform with the
798general appearance of Computer Modern.  The programs shown below,
799draw on the same standard definition as is used to generate the
800single quotes, but alter the position and the proportions.  The
801bulb is uppermost in both instances, and is somewhat smaller than
802the bulb of the close quote.  The tail is brought out further from
803the side of the bulb, and is tucked more tightly under.  The
804versions for slanted and italic fonts use some special
805transformations to insure that the {\tenti \`ayn} (that was the
806character from the Ottoman font) is correctly formed.  In
807effect, the character is built out to the left of the centerline,
808with a reverse slant, and then reflected back into the normal letter space.
809The program for these characters is Appendix B.
810
811A complete passage from our first proposed critical text edition is
812given below, first in \TeX\ input coding, and then as typeset.  The
813text from which this passage was extracted runs to twelve pages, and
814was set without the benefit of a properly rewritten hyphenation table.
815By good luck, most of the English pattern hyphenations turned out to
816correspond with acceptable Turkish hyphenations, but it will certainly
817be necessary to make up a proper Turkish hyphenation table in the near
818future.  When that is done, and an appropriate set of formatting
819macros has been written to isolate Turkish text from non-Turkish text
820and math mode, we will have a Turkish language adaptation of \TeX\
821which can be exported onto any small \TeX\ system, with no alteration
822of the program whatsoever.  The full range of standard Computer Modern
823font styles will be available, and will blend in perfectly with the
824normal unaccented library of Computer Modern fonts.  We will not have
825a truly bilingual version of \TeX, but for a predominantly Turkish
826language environment we will be offering a cheaper and more accessible
827monolingual font-based adaptation.
828
829\bigskip
830{\verbatim {Tktexinput} }
831
832\bigskip
833\BeginTurkish
834\tentk
835\`A=s:ik!lik! zama=ninda \`is:k! va=sit!asi ve s:eyda=lik! \`a=lemi:nde
836s:evk! vesi=lesi:, vus!lat eyya=minda mah!abbet muk!tez:a=si, fi:ra=k!
837gu:nleri:nde h!urk!at i:k!ti:z:a=si, baha=r mev\-si:mi:nde s!oh!bet
838germi:yyeti:, mah!bu=blar mecli:si:nde s:ara=b keyfi:yyeti:, ca=na=neler
839i:bra=mi ve \`a=s:ik!lar i:k!da=mi ve fuz:ala= mus!a=h!abeti: ve \`uk!ala=
840i:lti:fa=ti, ehl..i: di:ller rag:beti: ve t!a=li:bler mi:nneti: i:le di:du:gu:
841ebya=t ve es:\`a=r, ki: her bi:ri:nu:n= lat!i=f ma\`a=ni=si: ca=m..i naz!ma
842s:ara=b..i rengi=n ve s:i=ri=n h=aya=la=ti bezm..i: s!afa=da nuk!l..i:
843s:ekkeri=n olup mu\`a=s:i:ra=n..i mecli:s..i: z=evk! bu meyh=a=nenu:n=
844ba=deci:si: ve h!ari=fa=n..i bezm..i: s:evk! bu ka=s:a=nenu:n= sebu=-kes:i:
845olmis:lardi.  K!alem..i: i:\`ti:z=a=r bu h=a=me..i: i:nki:sa=r i:le bu evra=k!a
846tah!ri=re i:k!da=m ve bu ecza=ya tast!i=re i:hti:ma=m go:sterdi:.
847\EndTurkish
848\tenrm
849\vfil\eject
850
851\strut\vskip .75in
852\centerline{{\elevenrm Appendix A}}
853\smallskip
854\centerline{{\elevenrm The {\eleventt turkit.mf} driver file}}
855\bigskip
856\Beginmft
857\input turkit.tex
858\Endmft
859
860\vfil\eject
861
862\strut\vskip .75in
863\centerline{{\elevenrm Appendix B}}
864\smallskip
865\centerline{{\elevenrm The {\tt aynhmz.mf} file}}
866\Beginmft
867\input aynhmz.tex
868\vfil
869\Endmft
870
871
872
873
874
875
876
877
878\bye
879