isIdentifierChar to numeric

This commit is contained in:
Alistair Braidwood 2012-10-18 14:50:22 +01:00 committed by Marijn Haverbeke
parent c20cdbf8fc
commit 9bdbbaaf96
2 changed files with 40 additions and 30 deletions

View File

@ -372,10 +372,14 @@
// Test whether a given character is part of an identifier.
function isIdentifierChar(ch) {
return ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") ||
(ch >= "0" && ch <= "9") || ch === "$" || ch === "_" ||
(ch >= "\xaa" && nonASCIIidentifier.test(ch)));
function isIdentifierChar(code) {
if (code < 48) return code === 36;
if (code < 58) return true;
if (code < 65) return false;
if (code < 91) return true;
if (code < 97) return code === 95;
if (code < 123)return true;
return code >= 0xaa && nonASCIIidentifier.test(String.fromCharCode(code));
}
// ## Tokenizer
@ -688,11 +692,11 @@
str += String.fromCharCode(parseInt(octal, 8));
tokPos += octal.length - 1;
} else if (ch === "x") {
str += readHexChar(2);
str += String.fromCharCode(readHexChar(2));
} else if (ch === "u") {
str += readHexChar(4);
str += String.fromCharCode(readHexChar(4));
} else if (ch === "U") {
str += readHexChar(8);
str += String.fromCharCode(readHexChar(8));
} else {
switch (ch) {
case "n" : str += "\n"; break;
@ -720,7 +724,7 @@
function readHexChar(len) {
var n = readInt(16, len);
if (n === null) raise(tokStart, "Bad character escape sequence");
return String.fromCharCode(n);
return n;
}
// Used to signal to callers of `readWord1` whether the word
@ -739,21 +743,22 @@
containsEsc = false;
var word, first = true, start = tokPos;
for (;;) {
var ch = input.charAt(tokPos);
var ch = input.charCodeAt(tokPos);
if (isIdentifierChar(ch)) {
if (containsEsc) word += ch;
if (containsEsc) word += input.charAt(tokPos);
++tokPos;
} else if (ch === "\\") {
} else if (ch === 92) { // "\"
if (!containsEsc) word = input.slice(start, tokPos);
containsEsc = true;
if (input.charAt(++tokPos) != "u")
if (input.charCodeAt(++tokPos) != 117) // "u"
raise(tokPos, "Expecting Unicode escape sequence \\uXXXX");
++tokPos;
var esc = readHexChar(4);
if (!esc) raise(tokPos - 1, "Invalid Unicode escape");
if (!(first ? isIdentifierStart(esc.charCodeAt(0)) : isIdentifierChar(esc)))
var escStr = String.fromCharCode(esc);
if (!escStr) raise(tokPos - 1, "Invalid Unicode escape");
if (!(first ? isIdentifierStart(esc) : isIdentifierChar(esc)))
raise(tokPos - 4, "Invalid Unicode escape");
word += esc;
word += escStr;
} else {
break;
}

View File

@ -205,10 +205,14 @@ line break). Used to count lines.</p> </td> <td class="c
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">97</span><span class="p">)</span> <span class="k">return</span> <span class="nx">code</span> <span class="o">===</span> <span class="mi">95</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">123</span><span class="p">)</span><span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="k">return</span> <span class="nx">code</span> <span class="o">&gt;=</span> <span class="mh">0xaa</span> <span class="o">&amp;&amp;</span> <span class="nx">nonASCIIidentifierStart</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">code</span><span class="p">));</span>
<span class="p">}</span></pre></div> </td> </tr> <tr id="section-48"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-48">&#182;</a> </div> <p>Test whether a given character is part of an identifier.</p> </td> <td class="code"> <div class="highlight"><pre> <span class="kd">function</span> <span class="nx">isIdentifierChar</span><span class="p">(</span><span class="nx">ch</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="p">((</span><span class="nx">ch</span> <span class="o">&gt;=</span> <span class="s2">&quot;a&quot;</span> <span class="o">&amp;&amp;</span> <span class="nx">ch</span> <span class="o">&lt;=</span> <span class="s2">&quot;z&quot;</span><span class="p">)</span> <span class="o">||</span> <span class="p">(</span><span class="nx">ch</span> <span class="o">&gt;=</span> <span class="s2">&quot;A&quot;</span> <span class="o">&amp;&amp;</span> <span class="nx">ch</span> <span class="o">&lt;=</span> <span class="s2">&quot;Z&quot;</span><span class="p">)</span> <span class="o">||</span>
<span class="p">(</span><span class="nx">ch</span> <span class="o">&gt;=</span> <span class="s2">&quot;0&quot;</span> <span class="o">&amp;&amp;</span> <span class="nx">ch</span> <span class="o">&lt;=</span> <span class="s2">&quot;9&quot;</span><span class="p">)</span> <span class="o">||</span> <span class="nx">ch</span> <span class="o">===</span> <span class="s2">&quot;$&quot;</span> <span class="o">||</span> <span class="nx">ch</span> <span class="o">===</span> <span class="s2">&quot;_&quot;</span> <span class="o">||</span>
<span class="p">(</span><span class="nx">ch</span> <span class="o">&gt;=</span> <span class="s2">&quot;\xaa&quot;</span> <span class="o">&amp;&amp;</span> <span class="nx">nonASCIIidentifier</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">ch</span><span class="p">)));</span>
<span class="p">}</span></pre></div> </td> </tr> <tr id="section-48"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-48">&#182;</a> </div> <p>Test whether a given character is part of an identifier.</p> </td> <td class="code"> <div class="highlight"><pre> <span class="kd">function</span> <span class="nx">isIdentifierChar</span><span class="p">(</span><span class="nx">code</span><span class="p">)</span> <span class="p">{</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">48</span><span class="p">)</span> <span class="k">return</span> <span class="nx">code</span> <span class="o">===</span> <span class="mi">36</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">58</span><span class="p">)</span> <span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">65</span><span class="p">)</span> <span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">91</span><span class="p">)</span> <span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">97</span><span class="p">)</span> <span class="k">return</span> <span class="nx">code</span> <span class="o">===</span> <span class="mi">95</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">code</span> <span class="o">&lt;</span> <span class="mi">123</span><span class="p">)</span><span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="k">return</span> <span class="nx">code</span> <span class="o">&gt;=</span> <span class="mh">0xaa</span> <span class="o">&amp;&amp;</span> <span class="nx">nonASCIIidentifier</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">code</span><span class="p">));</span>
<span class="p">}</span></pre></div> </td> </tr> <tr id="section-49"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-49">&#182;</a> </div> <h2>Tokenizer</h2> </td> <td class="code"> <div class="highlight"><pre></pre></div> </td> </tr> <tr id="section-50"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-50">&#182;</a> </div> <p>These are used when <code>options.locations</code> is on, in order to track
the current line number and start of line offset, in order to set
<code>tokStartLoc</code> and <code>tokEndLoc</code>.</p> </td> <td class="code"> <div class="highlight"><pre> <span class="kd">function</span> <span class="nx">nextLineStart</span><span class="p">()</span> <span class="p">{</span>
@ -458,11 +462,11 @@ will return <code>null</code> unless the integer has exactly <code>len</code> di
<span class="nx">str</span> <span class="o">+=</span> <span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nb">parseInt</span><span class="p">(</span><span class="nx">octal</span><span class="p">,</span> <span class="mi">8</span><span class="p">));</span>
<span class="nx">tokPos</span> <span class="o">+=</span> <span class="nx">octal</span><span class="p">.</span><span class="nx">length</span> <span class="o">-</span> <span class="mi">1</span><span class="p">;</span>
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="nx">ch</span> <span class="o">===</span> <span class="s2">&quot;x&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">str</span> <span class="o">+=</span> <span class="nx">readHexChar</span><span class="p">(</span><span class="mi">2</span><span class="p">);</span>
<span class="nx">str</span> <span class="o">+=</span> <span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">readHexChar</span><span class="p">(</span><span class="mi">2</span><span class="p">));</span>
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="nx">ch</span> <span class="o">===</span> <span class="s2">&quot;u&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">str</span> <span class="o">+=</span> <span class="nx">readHexChar</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="nx">str</span> <span class="o">+=</span> <span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">readHexChar</span><span class="p">(</span><span class="mi">4</span><span class="p">));</span>
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="nx">ch</span> <span class="o">===</span> <span class="s2">&quot;U&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">str</span> <span class="o">+=</span> <span class="nx">readHexChar</span><span class="p">(</span><span class="mi">8</span><span class="p">);</span>
<span class="nx">str</span> <span class="o">+=</span> <span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">readHexChar</span><span class="p">(</span><span class="mi">8</span><span class="p">));</span>
<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
<span class="k">switch</span> <span class="p">(</span><span class="nx">ch</span><span class="p">)</span> <span class="p">{</span>
<span class="k">case</span> <span class="s2">&quot;n&quot;</span> <span class="o">:</span> <span class="nx">str</span> <span class="o">+=</span> <span class="s2">&quot;\n&quot;</span><span class="p">;</span> <span class="k">break</span><span class="p">;</span>
@ -486,7 +490,7 @@ will return <code>null</code> unless the integer has exactly <code>len</code> di
<span class="p">}</span></pre></div> </td> </tr> <tr id="section-69"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-69">&#182;</a> </div> <p>Used to read character escape sequences ('\x', '\u', '\U').</p> </td> <td class="code"> <div class="highlight"><pre> <span class="kd">function</span> <span class="nx">readHexChar</span><span class="p">(</span><span class="nx">len</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">n</span> <span class="o">=</span> <span class="nx">readInt</span><span class="p">(</span><span class="mi">16</span><span class="p">,</span> <span class="nx">len</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">n</span> <span class="o">===</span> <span class="kc">null</span><span class="p">)</span> <span class="nx">raise</span><span class="p">(</span><span class="nx">tokStart</span><span class="p">,</span> <span class="s2">&quot;Bad character escape sequence&quot;</span><span class="p">);</span>
<span class="k">return</span> <span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">n</span><span class="p">);</span>
<span class="k">return</span> <span class="nx">n</span><span class="p">;</span>
<span class="p">}</span></pre></div> </td> </tr> <tr id="section-70"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-70">&#182;</a> </div> <p>Used to signal to callers of <code>readWord1</code> whether the word
contained any escape sequences. This is needed because words with
escape sequences must not be interpreted as keywords.</p> </td> <td class="code"> <div class="highlight"><pre> <span class="kd">var</span> <span class="nx">containsEsc</span><span class="p">;</span></pre></div> </td> </tr> <tr id="section-71"> <td class="docs"> <div class="pilwrap"> <a class="pilcrow" href="#section-71">&#182;</a> </div> <p>Read an identifier, and return it as a string. Sets <code>containsEsc</code>
@ -497,21 +501,22 @@ containeds an escape, as a micro-optimization.</p> </td>
<span class="nx">containsEsc</span> <span class="o">=</span> <span class="kc">false</span><span class="p">;</span>
<span class="kd">var</span> <span class="nx">word</span><span class="p">,</span> <span class="nx">first</span> <span class="o">=</span> <span class="kc">true</span><span class="p">,</span> <span class="nx">start</span> <span class="o">=</span> <span class="nx">tokPos</span><span class="p">;</span>
<span class="k">for</span> <span class="p">(;;)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">ch</span> <span class="o">=</span> <span class="nx">input</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="nx">tokPos</span><span class="p">);</span>
<span class="kd">var</span> <span class="nx">ch</span> <span class="o">=</span> <span class="nx">input</span><span class="p">.</span><span class="nx">charCodeAt</span><span class="p">(</span><span class="nx">tokPos</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">isIdentifierChar</span><span class="p">(</span><span class="nx">ch</span><span class="p">))</span> <span class="p">{</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">containsEsc</span><span class="p">)</span> <span class="nx">word</span> <span class="o">+=</span> <span class="nx">ch</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">containsEsc</span><span class="p">)</span> <span class="nx">word</span> <span class="o">+=</span> <span class="nx">input</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="nx">tokPos</span><span class="p">);</span>
<span class="o">++</span><span class="nx">tokPos</span><span class="p">;</span>
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="nx">ch</span> <span class="o">===</span> <span class="s2">&quot;\\&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="nx">ch</span> <span class="o">===</span> <span class="mi">92</span><span class="p">)</span> <span class="p">{</span> <span class="c1">// &quot;\&quot;</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nx">containsEsc</span><span class="p">)</span> <span class="nx">word</span> <span class="o">=</span> <span class="nx">input</span><span class="p">.</span><span class="nx">slice</span><span class="p">(</span><span class="nx">start</span><span class="p">,</span> <span class="nx">tokPos</span><span class="p">);</span>
<span class="nx">containsEsc</span> <span class="o">=</span> <span class="kc">true</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">input</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="o">++</span><span class="nx">tokPos</span><span class="p">)</span> <span class="o">!=</span> <span class="s2">&quot;u&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">input</span><span class="p">.</span><span class="nx">charCodeAt</span><span class="p">(</span><span class="o">++</span><span class="nx">tokPos</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">117</span><span class="p">)</span> <span class="c1">// &quot;u&quot;</span>
<span class="nx">raise</span><span class="p">(</span><span class="nx">tokPos</span><span class="p">,</span> <span class="s2">&quot;Expecting Unicode escape sequence \\uXXXX&quot;</span><span class="p">);</span>
<span class="o">++</span><span class="nx">tokPos</span><span class="p">;</span>
<span class="kd">var</span> <span class="nx">esc</span> <span class="o">=</span> <span class="nx">readHexChar</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nx">esc</span><span class="p">)</span> <span class="nx">raise</span><span class="p">(</span><span class="nx">tokPos</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Invalid Unicode escape&quot;</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="p">(</span><span class="nx">first</span> <span class="o">?</span> <span class="nx">isIdentifierStart</span><span class="p">(</span><span class="nx">esc</span><span class="p">.</span><span class="nx">charCodeAt</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="o">:</span> <span class="nx">isIdentifierChar</span><span class="p">(</span><span class="nx">esc</span><span class="p">)))</span>
<span class="kd">var</span> <span class="nx">escStr</span> <span class="o">=</span> <span class="nb">String</span><span class="p">.</span><span class="nx">fromCharCode</span><span class="p">(</span><span class="nx">esc</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nx">escStr</span><span class="p">)</span> <span class="nx">raise</span><span class="p">(</span><span class="nx">tokPos</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Invalid Unicode escape&quot;</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="p">(</span><span class="nx">first</span> <span class="o">?</span> <span class="nx">isIdentifierStart</span><span class="p">(</span><span class="nx">esc</span><span class="p">)</span> <span class="o">:</span> <span class="nx">isIdentifierChar</span><span class="p">(</span><span class="nx">esc</span><span class="p">)))</span>
<span class="nx">raise</span><span class="p">(</span><span class="nx">tokPos</span> <span class="o">-</span> <span class="mi">4</span><span class="p">,</span> <span class="s2">&quot;Invalid Unicode escape&quot;</span><span class="p">);</span>
<span class="nx">word</span> <span class="o">+=</span> <span class="nx">esc</span><span class="p">;</span>
<span class="nx">word</span> <span class="o">+=</span> <span class="nx">escStr</span><span class="p">;</span>
<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
<span class="k">break</span><span class="p">;</span>
<span class="p">}</span>