### File: macroman.pl ### Version 0.1, September 15, 1999 ### Written by Ross Moore ### ### CP1252 encoding information ### ### based on latin1.pl ## Copyright (C) 1999 by Ross Moore ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # # $CHARSET = "macroman"; $INPUTENC='macroman'; # empty implies 'latin1' #Character ranges for lower --> upper-case conversion $sclower = "\\207-\\237\\276\\277\\317\\330"; $scupper = "\\347\\313\\345\\200\\314\\201\\202\\203\\351\\346\\350\\352\\355\\353\\354" . "\\204\\356\\361\\357\\205\\315\\362\\364\\363\\206\\256\\257\\316\\331"; #extra pattern match preceding lower --> upper-case conversion $scextra = "s/\\247/ss/g"; %extra_small_caps = ( '167' , 'ss' ); %extra_small_caps_inv = ( '222' , 'FI', '223', 'FL'); %low_entities = ( '135', '231' ,'136', '203' ,'137', '229' ,'138', '128' ,'139', '204' ,'140', '129' ,'141', '130' ,'142', '131' ,'143', '233' ,'144', '230' ,'145', '232' ,'146', '234' ,'147', '237' ,'148', '235' ,'149', '236' ,'150', '132' ,'151', '238' ,'152', '241' ,'153', '239' ,'154', '133' ,'155', '205' ,'156', '242' ,'157', '244' ,'158', '243' ,'159', '134' ,'190', '174' ,'191', '175' ,'207', '206' ,'216', '217' ); sub do_cmd_oe { join('', &iso_map("oe", "lig"), $_[0]);} sub do_cmd_OE { join('', &iso_map("OE", "lig"), $_[0]);} # inhibit later wrapping for an image $raw_arg_cmds{'oe'} = $raw_arg_cmds{'OE'} = -1 ; #sub do_cmd_l { join('', &iso_map("l", "strok"), $_[0]);} #sub do_cmd_L { join('', &iso_map("L", "strok"), $_[0]);} #sub do_cmd_ng { join('', &iso_map("eng", ""), $_[0]);} #sub do_cmd_DH { join('', &iso_map("D", "strok"), $_[0]);} #sub do_cmd_dh { join('', &iso_map("d", "strok"), $_[0]);} sub do_cmd_ss { join('', &iso_map("sz", "lig"), $_[0]);} #sub do_cmd_texteuro { join('', &iso_map("euro", ""), $_[0]);} sub do_cmd_quotesinglbase { join('', &iso_map("sbquo", ""), $_[0]);} sub do_cmd_quotedblbase { join('', &iso_map("dbquo", ""), $_[0]);} sub do_cmd_textflorin { join('', &iso_map("florin", ""), $_[0]);} sub do_cmd_dots { join('', &iso_map("ellip", ""), $_[0]);} sub do_cmd_dag { join('', &iso_map("dagger", ""), $_[0]);} sub do_cmd_ddag { join('', &iso_map("Dagger", ""), $_[0]);} sub do_cmd_textperthousand { join('', &iso_map("permil", ""), $_[0]);} sub do_cmd_guilsinglleft { join('', &iso_map("lsaquo", ""), $_[0]);} sub do_cmd_guilsinglright { join('', &iso_map("rsaquo", ""), $_[0]);} sub do_cmd_textquoteleft { join('', &iso_map("lsquo", ""), $_[0]);} sub do_cmd_textquoteright { join('', &iso_map("rsquo", ""), $_[0]);} sub do_cmd_textquotedblleft { join('', &iso_map("ldquo", ""), $_[0]);} sub do_cmd_textquotedblright { join('', &iso_map("rdquo", ""), $_[0]);} sub do_cmd_textbullet { join('', &iso_map("bullet", ""), $_[0]);} sub do_cmd_textendash { join('', &iso_map("ndash", ""), $_[0]);} sub do_cmd_textemdash { join('', &iso_map("mdash", ""), $_[0]);} sub do_cmd_texttrademark { join('', &iso_map("trade", ""), $_[0]);} sub do_cmd_textdegree { join('', &iso_map("deg", ""), $_[0]);} sub do_cmd_textexclamdown { join('', &iso_map("iexcl", ""), $_[0]);} sub do_cmd_textcent { join('', &iso_map("cent", ""), $_[0]);} sub do_cmd_textcurrency { join('', &iso_map("curren", ""), $_[0]);} sub do_cmd_textyen { join('', &iso_map("yen", ""), $_[0]);} sub do_cmd_textbrokenbar { join('', &iso_map("brvbar", ""), $_[0]);} sub do_cmd_textregistered { join('', &iso_map("reg", ""), $_[0]);} sub do_cmd_textquestiondown { join('', &iso_map("iquest", ""), $_[0]);} sub do_cmd_textperiodcentered { join('', &iso_map("middot", ""), $_[0]);} sub do_cmd_guillemotleft { join('', &iso_map("laquo", ""), $_[0]);} sub do_cmd_guillemotright { join('', &iso_map("raquo", ""), $_[0]);} #sub do_cmd_textonequarter { join('', &iso_map("frac14", ""), $_[0]);} #sub do_cmd_textonehalf { join('', &iso_map("frac12", ""), $_[0]);} #sub do_cmd_textthreequarters { join('', &iso_map("frac34", ""), $_[0]);} sub do_cmd_mathdegree { join('', &iso_map("deg", ""), $_[0]);} sub do_cmd_mathonesuperior { join('', &iso_map("sup1", ""), $_[0]);} sub do_cmd_mathtwosuperior { join('', &iso_map("sup2", ""), $_[0]);} sub do_cmd_maththreesuperior { join('', &iso_map("sup3", ""), $_[0]);} sub do_cmd_mathordmasculine { join('', &iso_map("ordm", ""), $_[0]);} sub do_cmd_mathordfeminine { join('', &iso_map("ordf", ""), $_[0]);} sub do_cmd_P { join('', &iso_map("para", ""), $_[0]);} sub do_cmd_S { join('', &iso_map("sect", ""), $_[0]);} sub do_cmd_pm { join('', &iso_map("plusmn", ""), $_[0]);} sub do_cmd_div { join('', &iso_map("divide", ""), $_[0]);} sub do_cmd_times { join('', &iso_map("times", ""), $_[0]);} #sub do_cmd_minus { join('', &iso_map("shy", ""), $_[0]);} sub do_cmd_copyright { join('', &iso_map("copy", ""), $_[0]);} sub do_cmd_pounds { join('', &iso_map("pound", ""), $_[0]);} sub do_cmd_cents { join('', &iso_map("cent", ""), $_[0]);} sub do_cmd_lnot { join('', &iso_map("not", ""), $_[0]);} sub do_cmd_cdot { join('', &iso_map("middot", ""), $_[0]);} sub do_cmd_micron { join('', &iso_map("micro", ""), $_[0]);} # non-iso-latin characters sub do_cmd_approx { join('', &iso_map("approx", ""), $_[0]);} sub do_cmd_infty { join('', &iso_map("infin", ""), $_[0]);} sub do_cmd_int { join('', &iso_map("int", ""), $_[0]);} sub do_cmd_geq { join('', &iso_map("ge", ""), $_[0]);} sub do_cmd_leq { join('', &iso_map("le", ""), $_[0]);} sub do_cmd_neq { join('', &iso_map("ne", ""), $_[0]);} sub do_cmd_partial { join('', &iso_map("part", ""), $_[0]);} sub do_cmd_surd { join('', &iso_map("radic", ""), $_[0]);} sub do_cmd_diamond { join('', &iso_map("diamond", ""), $_[0]);} sub do_cmd_textapplelogo { join('', &iso_map("apple", ""), $_[0]);} # ...including Greeks: sub do_cmd_Delta { join('', &iso_map("Delta", ""), $_[0]);} sub do_cmd_Pi { join('', &iso_map("Pi", ""), $_[0]);} sub do_cmd_pi { join('', &iso_map("pi", ""), $_[0]);} sub do_cmd_Sigma { join('', &iso_map("Sigma", ""), $_[0]);} sub do_cmd_Omega { join('', &iso_map("Omega", ""), $_[0]);} %macroman_character_map = ( 'AElig', '®', # capital AE diphthong (ligature) 'Aacute', 'ç', # capital A, acute accent 'Acirc', 'å', # capital A, circumflex accent 'Agrave', 'Ë', # capital A, grave accent 'Aring', '', # capital A, ring 'Atilde', 'Ì', # capital A, tilde 'Auml', '€', # capital A, dieresis or umlaut mark 'Ccedil', '‚', # capital C, cedilla # 'ETH', 'Ð', # capital Eth, Icelandic 'Eacute', 'ƒ', # capital E, acute accent 'Ecirc', 'æ', # capital E, circumflex accent 'Egrave', 'é', # capital E, grave accent 'Euml', 'è', # capital E, dieresis or umlaut mark 'Iacute', 'ê', # capital I, acute accent 'Icirc', 'ë', # capital I, circumflex accent 'Igrave', 'í', # capital I, grave accent 'Iuml', 'ì', # capital I, dieresis or umlaut mark 'Ntilde', '„', # capital N, tilde 'OElig', 'Î', 'Oacute', 'î', # capital O, acute accent 'Ocirc', 'ï', # capital O, circumflex accent 'Ograve', 'ñ', # capital O, grave accent 'Oslash', '¯', # capital O, slash 'Otilde', 'Í', # capital O, tilde 'Ouml', '…', # capital O, dieresis or umlaut mark # 'THORN', 'Þ', # capital THORN, Icelandic 'Uacute', 'ò', # capital U, acute accent 'Ucirc', 'ó', # capital U, circumflex accent 'Ugrave', 'ô', # capital U, grave accent 'Uuml', '†', # capital U, dieresis or umlaut mark 'Yuml' , 'Ù', # 'aacute', '‡', # small a, acute accent 'acirc', '‰', # small a, circumflex accent 'aelig', '¾', # small ae diphthong (ligature) 'agrave', 'à', # small a, grave accent 'amp', '&', # ampersand 'aring', 'Œ', # small a, ring 'atilde', '‹', # small a, tilde 'auml', 'Š', # small a, dieresis or umlaut mark 'ccedil', '', # small c, cedilla 'inodot', 'õ', # dotless i 'eacute', 'Ž', # small e, acute accent 'ecirc', '', # small e, circumflex accent 'egrave', '', # small e, grave accent # 'eth', 'ð', # small eth, Icelandic 'euml', '‘', # small e, dieresis or umlaut mark 'filig', 'Þ', # fi ligature 'fllig', 'ß', # fl ligature 'gt', '>', # greater than 'iacute', '’', # small i, acute accent 'icirc', '”', # small i, circumflex accent 'igrave', '“', # small i, grave accent 'iuml', '•', # small i, dieresis or umlaut mark 'lt', '<', # less than 'ntilde', '–', # small n, tilde 'oacute', '—', # small o, acute accent 'ocirc', '™', # small o, circumflex accent 'oelig', 'Ï', 'ograve', '˜', # small o, grave accent 'oslash', '¿', # small o, slash 'otilde', '›', # small o, tilde 'ouml', 'š', # small o, dieresis or umlaut mark 'szlig', '§', # small sharp s, German (sz ligature) # 'thorn', 'þ', # small thorn, Icelandic 'uacute', 'œ', # small u, acute accent 'ucirc', 'ž', # small u, circumflex accent 'ugrave', '', # small u, grave accent 'uuml', 'Ÿ', # small u, dieresis or umlaut mark 'yuml', 'Ø', # small y, dieresis or umlaut mark 'quot', '"', # double quote # These have HTML mnemonic names for HTML 4.0 ... 'lsaquo', 'Ü', # 'rsaquo', 'Ý', # 'lsquo', 'Ô', 'rsquo', 'Õ', 'ldquo', 'Ò', 'rdquo', 'Ó', 'sbquo', 'â', 'dbquo', 'ã', 'laquo', 'Ç', 'raquo', 'È', 'curren', 'Û', # currency symbol 'hellip', 'É', # ellipsis dots 'cent', '¢', # cents sign 'pound', '£', # pound sign 'yen', '´', # yen symbol 'florin', 'Ä', # florin symbol 'dagger', ' ', 'Dagger', 'à', # double-dagger symbol 'permil', '&228;', # per thousand symbol 'frasl', 'Ú', # fraction bar 'bull', '¥', 'shy', '-', 'mdash', 'Ñ', # emdash 'ndash', 'Ð', # endash 'trade', 'ª', # trademark symbol # 'nbsp', ' ', # non-breaking space 'iexcl', 'Á', # exclamation mark - upside down 'iquest', 'À', # inverted question 'sect', '¤', # section symbol 'copy', '©', # copyright mark 'ordm', '¼', 'ordf', '»', 'not', 'Â', # logical not symbol 'reg', '¨', 'deg', '¡', 'plusmn', '±', 'micro', 'µ', 'para', '¦', # paragraph symbol 'ge', '³', 'le', '²', 'ne', '­', 'int', '¡', 'infin', '°', 'part', '¶', 'radic', 'Ã', 'diamond', '×', 'apple', 'ð', 'middot', 'á', 'divide', 'Ö', # These are character types without arguments ... 'grave' , "`", 'acute' , "«", 'circ', 'ö', 'breve', 'ù', # breve accent 'caron', 'ÿ', # caron accent 'cedil', 'ü', # cedilla accent 'ogon', 'þ', # ogonek accent 'tilde', '~', 'tilacc', '÷', # tilde accent 'ring', 'û', # ring accent 'dot', 'ú', 'uml', '¬', # dieresis or umlaut accent 'macr', 'ø', # macron accent 'dblac', 'ý', # Hungarian umlaut accent 'cedil', "¸" ); %macroman_character_map_inv = ( '^' , '\\^{}', '>' , '\\ensuremath{>}', '<' , '\\ensuremath{<}', '&' , '\\&', '~' , '\\~{}', '€' , '\\"{A}', '' , '\\r{A}', '‚' , '\\c{C}', 'ƒ' , '\\\'{E}', '„' , '\\~{N}', '…' , '\\"{O}', '†' , '\\"{U}', '‡' , '\\\'{a}', 'ˆ' , '\\`{a}', '‰' , '\\^{a}', 'Š' , '\\"{a}', '‹' , '\\~{a}', 'Œ' , '\\r{a}', '' , '\\c{c}', 'Ž' , '\\\'{e}', '' , '\\`{e}', '' , '\\^{e}', '‘' , '\\"{e}', '’' , '\\\'{\\i}', '“' , '\\`{\\i}', '”' , '\\^{\\i}', '•' , '\\"{\\i}', '–' , '\\~{n}', '—' , '\\\'{o}', '˜' , '\\`{o}', '™' , '\\^{o}', 'š' , '\\"{o}', '›' , '\\~{o}', 'œ' , '\\\'{u}', '' , '\\`{u}', 'ž' , '\\^{u}', 'Ÿ' , '\\"{u}', ' ' , '\\ensuremath{\\dag{}}', '¡' , '\\textdegree{}', '¢' , '\\textcent{}', '£' , '\\pounds{}', '¤' , '\\S{}', '¥' , '\\textbullet{}', '¦' , '\\P{}', '§' , '\\ss{}', '¨' , '\\textregistered{}', '©' , '\\copyright{}', 'ª' , '\\texttrademark{}', '«' , '\\\'{}', '¬' , '\\"{}', '­' , '\\ensuremath{\\neq{}}', '®' , '\\AE{}', '¯' , '\\O{}', '°' , '\\ensuremath{\\infty{}}', '±' , '\\ensuremath{\\pm}', '²' , '\\ensuremath{\\leq{}}', '³' , '\\ensuremath{\\geq{}}', '´' , '\\textyen{}', 'µ' , '\\ensuremath{\\mu}', '¶' , '\\ensuremath{\\partial{}}', '·' , '\\ensuremath{\\Sigma{}}', '¸' , '\\ensuremath{\\Pi{}}', '¹' , '\\ensuremath{\\pi{}}', 'º' , '\\ensuremath{\\int{}}', '»' , '\\textordfeminine{}', '¼' , '\\textordmasculine{}', '½' , '\\ensuremath{\\Omega{}}', '¾' , '\\ae{}', '¿' , '\\o{}', # 'À' , '\\textquestiondown{}', 'À' , '?`', # 'Á' , '\\textexclamdown{}', 'Á' , '!`', 'Â' , '\\ensuremath{\\lnot{}}', 'Ã' , '\\ensuremath{\\surd{}}', 'Ä' , '\\textflorin{}', 'Å' , '\\ensuremath{\\approx{}}', 'Æ' , '\\ensuremath{\\Delta{}}', 'Ç' , '\\guillemotleft{}', 'È' , '\\guillemotright{}', 'É' , '\\dots{}', 'Ê' , '\\nobreakspace{}', 'Ë' , '\\`{A}', 'Ì' , '\\~{A}', 'Í' , '\\~{O}', 'Î' , '\\OE{}', 'Ï' , '\\oe{}', # 'Ð' , '\\textendash{}', 'Ð' , '{--}', # 'Ñ' , '\\textemdash{}', 'Ñ' , '{---}', # 'Ò' , '\\textquotedblleft{}', 'Ò' , '{``}', # 'Ó' , '\\textquotedblright{}', 'Ó' , '{\'\'}', # 'Ô' , '\\textquoteleft{}', 'Ô' , '{`}', # 'Õ' , '\\textquoteright{}', 'Õ' , '{\'}', 'Ö' , '\\ensuremath{\\div}', '×' , '\\ensuremath{\\diamond}', 'Ø' , '\\"{y}', 'Ù' , '\\"{Y}', 'Ú' , '/', 'Û' , '\\textcurrency{}', 'Ü' , '\\guilsinglleft{}', 'Ý' , '\\guilsinglright{}', 'Þ' , 'fi', 'ß' , 'fl', 'à' , '\\ensuremath{\\ddag}', # 'á' , '\\textperiodcentered{}', 'á' , '\\ensuremath{\\cdot{}}', 'â' , '\\quotesinglbase{}', 'ã' , '\\quotedblbase{}', 'ä' , '\\textperthousand{}', 'å' , '\\^{A}', 'æ' , '\\^{E}', 'ç' , '\\\'{A}', 'è' , '\\"{E}', 'é' , '\\`{E}', 'ê' , '\\\'{I}', 'ë' , '\\^{I}', 'ì' , '\\"{I}', 'í' , '\\`{I}', 'î' , '\\\'{O}', 'ï' , '\\^{O}', 'ð' , '\\textapplelogo{}', 'ñ' , '\\`{O}', 'ò' , '\\\'{U}', 'ó' , '\\^{U}', 'ô' , '\\`{U}', 'õ' , '\\i{}', 'ö' , '\\^{}', '÷' , '\\~{}', 'ø' , '\\={}', 'ù' , '\\u{}', 'ú' , '\\.{}', 'û' , '\\r{}', 'ü' , '\\c{\ }', 'ý' , '\\H{ }', 'þ' , '\\k{\ }', 'ÿ' , '\\v{}', ); 1;