ref: 3c881e8b133876fc8cff92106616bb93166bf113
parent: a33e94a67326d7774e7373b83597d84ee1eb347b
parent: 65c3b339b547b49d1c1d11c56d86ca81f756e9be
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Mar 21 18:49:08 EDT 2018
Merge remote-tracking branch 'ori/master' into libmath
--- a/bench/bigfactorial.myr
+++ /dev/null
@@ -1,28 +1,0 @@
-use std
-use testr
-
-const main = {
- testr.bench([
- [.name="bigfactorial-1", .fn={ctx; bigfact(1)}],
- [.name="bigfactorial-100", .fn={ctx; bigfact(100)}],
- [.name="bigfactorial-1000", .fn={ctx; bigfact(1000)}],
- [.name="bigfactorial-10000", .fn={ctx; bigfact(10000)}],
- ][:])
-}
-
-const bigfact = {n
- var i
- var x, y
-
- if n == 0
- x = std.mkbigint(1)
- else
- x = std.mkbigint(n)
- for i = n-1; i > 0; i--
- y = std.mkbigint(i)
- std.bigmul(x, y)
- std.bigfree(y)
- ;;
- ;;
- -> x
-}
--- /dev/null
+++ b/bench/bigint.myr
@@ -1,0 +1,209 @@
+use std
+use testr
+
+const main = {
+ var a, b
+
+ a = std.get(std.bigparse(astr))
+ b = std.get(std.bigparse(bstr))
+ testr.bench([
+ [.name="bigfactorial-1", .fn={ctx; bigfact(1)}],
+ [.name="bigfactorial-100", .fn={ctx; bigfact(100)}],
+ [.name="bigfactorial-1000", .fn={ctx; bigfact(1000)}],
+ [.name="bigfactorial-10000", .fn={ctx; bigfact(10000)}],
+ [.name="bigmul-bothbig", .fn={ctx; bigmul(a, b)}],
+ ][:])
+}
+
+const bigfact = {n
+ var i
+ var x, y
+
+ if n == 0
+ x = std.mkbigint(1)
+ else
+ x = std.mkbigint(n)
+ for i = n-1; i > 0; i--
+ y = std.mkbigint(i)
+ std.bigmul(x, y)
+ std.bigfree(y)
+ ;;
+ ;;
+ -> x
+}
+
+const bigmul = {a, b
+ var r = std.bigdup(a)
+ std.bigmul(r, b)
+ std.bigfree(r)
+}
+
+const astr = \
+ "786226786586600439317972610307453185844816038396887410361680" \
+ "057562738205768169440311788359575359662484806302309290559269" \
+ "528840503089778892166969404943179665354542662022501031093355" \
+ "271292443882546985002869031472840087285045002214375586489512" \
+ "119373071089907222993450120378930997902435685145827038085819" \
+ "296865519485069915539726113453392326014373721767082018422958" \
+ "717062721702582361925462085790615747641302901942695904395406" \
+ "834224622529709782228408774414640957447279927460707815981786" \
+ "406994864430875491075424852796017575153912350736693997340109" \
+ "179809249816546470560796013608371676540452470243110540567406" \
+ "131454930387632550432649269771702882918734039217249418671214" \
+ "578343865544182288638782234366945485699781398626899358422106" \
+ "382221586857747438786541396870348009298608214885868257313676" \
+ "649092812753573767742580234440720920671581633553397695353934" \
+ "132303144338968914681227568810598784423402415568066389616154" \
+ "614818275979465719251274076724898041828550631986321592027149" \
+ "396140957537591558324156214045032930669195479259917491373556" \
+ "8106323004563791966979449"
+
+const bstr = \
+ "187308423231504228611111080270112029294766282678254231118742" \
+ "976869035200603975231371287575334271604770730635239117286259" \
+ "633494749566591369947211092881454729202366597231676526591300" \
+ "648610814270646993620412724160164594497114189417456618983125" \
+ "754243263403220712648959862959656559260934591165571538609438" \
+ "590305501075673585878506798890948928998460512317734318100422" \
+ "301601872241161203780242712104705885977145795396152336901291" \
+ "797346494631790697490596412053544611041844456774577364325024" \
+ "070127433788225479200261038961006069382091341032772858886595" \
+ "126748124988322385484529859500424755864562815273650872459428" \
+ "604927038475035756775266526245762679156608674463485106692013" \
+ "334198263770435008762560094658051091261225051459463607555576" \
+ "909910323885506799911997243762894976997772525420734032010908" \
+ "232750510323630849750298879815023574738441149575389970852774" \
+ "177147447530826963987253002135706371359716529615192991331710" \
+ "358753881164433191999293286713740248283177543479283727214184" \
+ "724131647548393947909056785714667606883184085430899204162328" \
+ "826158851207379006944693295498006311569980249630659828437857" \
+ "675974052589856026178343278810044863262932813277009490639496" \
+ "536609737088886342712009625141736821206684450062603392153153" \
+ "404209821474892409175922249782691829080890709213170466577080" \
+ "377570831150029278547755147996902849300234357700933105059734" \
+ "535663014087527221170181231192295192940913638374876317838628" \
+ "513685521076746695302116096246663697634145614610623764326966" \
+ "034284535477345974781679205896638416144421212858502427308326" \
+ "932579573129945442468876557939601952816014080189349604230705" \
+ "365868935617410975446640273108649254906769023875954001915851" \
+ "664244700370024600464468562174205614316632651533572360375076" \
+ "397825425411906188567274646160369108041601789830804365153321" \
+ "786129972802577217675779977803595583990850309499110057500386" \
+ "963299465049980241112611167139170503250450996990310096129565" \
+ "074487177158657223017144483992415686889386032110805070566222" \
+ "391021704709176968022469625318920962164345682090022548949639" \
+ "373535013410140815528983041981416489359560548762773247027894" \
+ "433410249929507117822644701257908554656601684665483333856314" \
+ "445354748390977894664864352404291796587312266255127874784721" \
+ "619161441066868319035508018794580703871852715612431050120428" \
+ "135204914805088588056364008960061473425114169699056353553362" \
+ "684434702224467541950013135029769757734688969972162892564754" \
+ "794607854348784493170718776237831436345037722624425771190246" \
+ "018034960769040875816438953729709133678842811906949629452358" \
+ "733021110486943979165015168688484556308945464367476269193790" \
+ "416269749963298838914536589681414703854517183672658308513799" \
+ "079924527379662830333236086766377066200832536550279672992236" \
+ "534386204300849885908109898105830852146495986415657102811369" \
+ "595247130333705192417745868596617533375480391948415664293960" \
+ "986757335528872844228230415255122202991202927184519763895621" \
+ "784166281785570962980369244162505438453718048719964848583896" \
+ "857048160613386597344498469472976896546659619163220731196501" \
+ "323014018615871231780371169751103683603599982508157269363828" \
+ "411288512876302246976722591644117294608452539733909759953439" \
+ "531429839711890887198366410539288744251028511505016177386244" \
+ "749167411006850944434211305749841593738801775687547388254201" \
+ "939297777489971469491544250094576226609276643868397292318619" \
+ "638481092861366771094145813562129333311647611902685299968882" \
+ "120832495897029738418349759779717411822241304202704141876186" \
+ "021609477309152514796122336409731627673520687854759447557901" \
+ "364641037659836873710690927438194862217026533565020830538006" \
+ "474368344215360348610383201842027979922422061958838533809305" \
+ "769494429697360443510783300535985639987094328411985571590736" \
+ "668585260182179277329957988272484981238401320570887133681587" \
+ "104377431152847065007690135501166962851804129900971535475096" \
+ "780466463541055999770138832441321686999264335797733246187289" \
+ "627201885855368556615429895387994196080244415898422345868310" \
+ "490709239989887413180596388426579419833690268988006284505869" \
+ "050445508634068307421234527095591085036542160387367691944010" \
+ "975408030387045757333117117106642362746817079296595077266231" \
+ "539797438547718217585348401525110815431508041372063435381246" \
+ "031899285980848878201413896797811316833560553995491439595681" \
+ "923196663368947274989655741723930512233824969005097885029422" \
+ "357501820683035694227690760029389720555091326855256135152515" \
+ "991629667196092383555620493513052376816166150463402645212712" \
+ "906520400777603115930223645552970514327317217278507643140717" \
+ "182239925721890342870110303375808665304693222547743946076755" \
+ "692015880794045769310816721195424546206360150602831845794754" \
+ "107728490276051145716712025134135902681643628483224712234443" \
+ "063152270612970654343583032461561909892954923011301907408278" \
+ "407275948095435658167567889516277464704601392558782479050096" \
+ "372379916668232181458496952539316561733622359194587739442387" \
+ "588748382930523021381163994808898807073006256512121958841602" \
+ "953834143318421508854501873168004451093624401020578718864158" \
+ "163753449732176772997318191567270326934788485419221293796231" \
+ "298858119919950554081790900335797631743704599354917110833385" \
+ "431529946758628104742509312229561143992532198765988099227018" \
+ "878514141238218580104368183434902069663104345510818435635700" \
+ "414562606631845260797394601892129923774496571513678144171246" \
+ "566200747779503333919046242725933701038577717836624878658828" \
+ "401949953074213765717982884761623247802534761098061468371860" \
+ "040646573304592111813415676017629612099762583556889291591502" \
+ "037421011749177580329597161049249542989001421868003098607452" \
+ "447624875123240558047810930160637311667163115623275956058331" \
+ "757826897673135754479768852033554041590179245281724637243354" \
+ "117485618855501973830027844010664951610244096398130144699577" \
+ "573289088165216285450511045687327534575629779122305699816415" \
+ "839649332244222046600972555138062951807105050363438375687919" \
+ "992990152747901390216324885281609322926002453655984740862104" \
+ "800118089506795694008885846789831227726569500145265086949437" \
+ "988620853249497033861484487843582927839639572167539512092182" \
+ "766072426433981477788791769424931914440282201493136920334470" \
+ "631849934882150702051061806916010852413506785949573428717190" \
+ "652565822614387096957090183695781683632118089627541885999806" \
+ "415805712006844406519039722769587152345216028824048408671591" \
+ "996748454264778680080651811924705017452522895119289297677596" \
+ "399980344578097690385847397582483006778980686294815929593822" \
+ "079423101338051192607368480059375406606683609868638996461314" \
+ "025218799102080893035335496954072017203857904998271542125754" \
+ "657205938132168124359996375519697778439582860828566963816699" \
+ "746486300437187004803313727795498535830803859623878348202161" \
+ "779205893343935527641213944242785604444240371520170466676321" \
+ "928607939131173535257360320584979666827307689152405952207350" \
+ "699524761489708601133045296730863484253942716135384938226198" \
+ "568386927589550770858283642852352203645776295407127244203589" \
+ "644784502503420416982059860877101382070973271189722892984326" \
+ "410970961430713847387443991297677367803419085937220978538980" \
+ "441628473685496662320890458789716737710737534633335780887058" \
+ "229693686098443784495418741977154423684238599047296839730434" \
+ "085083647717961771343882046771178192554297991147688856565555" \
+ "090757465961600177144271662429462033026661376496701392139827" \
+ "805716034927247277392566480664873339284583953871193119773255" \
+ "759149930467692321302473790340664010828943491220928942540701" \
+ "809650948952021861576751017761266006860909899638475036335626" \
+ "074637477900802957227409245252385131499626970250373571243577" \
+ "269099399542105493137222898746523529168177015284588584593530" \
+ "535422847893997078481093842801730870845975425823397648701679" \
+ "269953351511574749765536142563534666242970982516216461548676" \
+ "091848844824472900050116980087609760065613615848853809078135" \
+ "539024293905056113075795105768447478038151819261968647482323" \
+ "595339889631890319991305740056522600422147869065281727002061" \
+ "985563827492146406348739661318028756838105073748842194458561" \
+ "389348774902798361516170139490919233843823391129815389669442" \
+ "389886962769612654436729345525800917085158998450443138829807" \
+ "226401369403848923672976087487989192421758085312025736674941" \
+ "465891203009498155699006389389940020187366385171748881799189" \
+ "859057487614182907767561954322147267560355277722058958977274" \
+ "007000776522440864120260304346026144122668525606742093385842" \
+ "877851187985354298444881951517964028828337147992244078392289" \
+ "033891527938598539476575275618308528833647822827687596807355" \
+ "015006465320113872907922736279079949111368490254190424134142" \
+ "526305891930318351790395305770103209586168680236305757463917" \
+ "504215452060579791599906033628162787284365361089361880425156" \
+ "013152471178783132851787237084697582323149702243584748375544" \
+ "864216759785482917668710098191387194325613366050379525378498" \
+ "060821148303033711198142478670004863329544955847264359312410" \
+ "124051026217318607753467506730576567256247484381058493859200" \
+ "638791430791009825392526429942820641023745462165966092444179" \
+ "625962380866337824952291146979992952948121844170079781600828" \
+ "3296406611"
+
--- a/bench/bld.sub
+++ b/bench/bld.sub
@@ -23,8 +23,8 @@
lib ../lib/crypto:crypto
lib ../lib/testr:testr
;;
-bench bigfactorial =
- bigfactorial.myr
+bench bigint =
+ bigint.myr
lib ../lib/std:std
lib ../lib/sys:sys
lib ../lib/testr:testr
--- /dev/null
+++ b/lib/crypto/ctbig.myr
@@ -1,0 +1,272 @@
+use std
+
+use "ct"
+
+pkg crypto =
+ type ctbig = struct
+ nbit : std.size
+ dig : uint32[:] /* little endian, no leading zeros. */
+ ;;
+
+ generic mkctbign : (v : @a, nbit : std.size -> ctbig#) :: numeric,integral @a
+ const mkctbigle : (v : byte[:], nbit : std.size -> ctbig#)
+ //const mkctbigbe : (v : byte[:], nbit : std.size -> ctbig#)
+
+ const ctfree : (v : ctbig# -> void)
+ const ctbigdup : (v : ctbig# -> ctbig#)
+ const ctlike : (v : ctbig# -> ctbig#)
+ const ct2big : (v : ctbig# -> std.bigint#)
+ const big2ct : (v : std.bigint#, ndig : std.size -> ctbig#)
+
+ const ctadd : (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+ const ctsub : (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+ const ctmul : (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+ //const ctdivmod : (r : ctbig#, m : ctbig#, a : ctbig#, b : ctbig# -> void)
+ //const ctmodpow : (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+
+ const ctiszero : (v : ctbig# -> bool)
+ const cteq : (a : ctbig#, b : ctbig# -> bool)
+ const ctne : (a : ctbig#, b : ctbig# -> bool)
+ const ctgt : (a : ctbig#, b : ctbig# -> bool)
+ const ctge : (a : ctbig#, b : ctbig# -> bool)
+ const ctlt : (a : ctbig#, b : ctbig# -> bool)
+ const ctle : (a : ctbig#, b : ctbig# -> bool)
+;;
+
+const Base = 0x100000000ul
+
+generic mkctbign = {v : @a, nbit : std.size :: integral,numeric @a
+ var a
+ var val
+
+ a = std.zalloc()
+
+ val = (v : uint64)
+ a.nbit = nbit
+ a.dig = std.slalloc(ndig(nbit))
+ if nbit > 0
+ a.dig[0] = (val : uint32)
+ ;;
+ if nbit > 32
+ a.dig[1] = (val >> 32 : uint32)
+ ;;
+ -> a
+}
+
+const ct2big = {ct
+ -> std.mk([
+ .sign=1,
+ .dig=std.sldup(ct.dig)
+ ])
+}
+
+const big2ct = {ct, nbit
+ var v, n, l
+
+ n = ndig(nbit)
+ l = std.min(n, ct.dig.len)
+ v = std.slzalloc(n)
+ std.slcp(v, ct.dig[:l])
+ -> std.mk([
+ .nbit=nbit,
+ .dig=v,
+ ])
+}
+
+const mkctbigle = {v, nbit
+ var a, last, i, o, off
+
+ /*
+ It's ok to depend on the length of v here: we can leak the
+ size of the numbers.
+ */
+ o = 0
+ a = std.slzalloc(ndig(nbit))
+ for i = 0; i + 4 <= v.len; i += 4
+ a[o++] = \
+ (v[i + 0] << 0 : uint32) | \
+ (v[i + 1] << 8 : uint32) | \
+ (v[i + 2] << 16 : uint32) | \
+ (v[i + 3] << 24 : uint32)
+ ;;
+
+ last = 0
+ for i; i < v.len; i++
+ off = i & 0x3
+ last |= (v[off] : uint32) << (8 *off)
+ ;;
+ a[o++] = last
+ -> std.mk([.nbit=nbit, .dig=a])
+}
+
+const ctlike = {v
+ -> std.mk([
+ .nbit = v.nbit,
+ .dig=std.slzalloc(v.dig.len),
+ ])
+}
+
+const ctbigdup = {v
+ -> std.mk([
+ .nbit=v.nbit,
+ .dig=std.sldup(v.dig),
+ ])
+}
+
+const ctfree = {v
+ std.slfree(v.dig)
+ std.free(v)
+}
+
+const ctadd = {r, a, b
+ var v, i, carry, n
+
+ checksz(a, b)
+ checksz(a, r)
+
+ carry = 0
+ n = max(a.dig.len, b.dig.len)
+ for i = 0; i < n; i++
+ v = (a.dig[i] : uint64) + (b.dig[i] : uint64) + carry;
+ r.dig[i] = (v : uint32)
+ carry >>= 32
+ ;;
+}
+
+const ctsub = {r, a, b
+ var borrow, v, i
+
+ checksz(a, b)
+ checksz(a, r)
+
+ borrow = 0
+ for i = 0; i < a.dig.len; i++
+ v = (a.dig[i] : uint64) - (b.dig[i] : uint64) - borrow
+ borrow = (v & (1<<63)) >> 63
+ v = mux(borrow, v + Base, v)
+ r.dig[i] = (v : uint32)
+ ;;
+}
+
+const ctmul = {r, a, b
+ var i, j
+ var ai, bj, wij
+ var carry, t
+ var w
+
+ checksz(a, b)
+ checksz(a, r)
+
+ w = std.slzalloc(a.dig.len + b.dig.len)
+ for j = 0; j < b.dig.len; j++
+ carry = 0
+ for i = 0; i < a.dig.len; i++
+ ai = (a.dig[i] : uint64)
+ bj = (b.dig[j] : uint64)
+ wij = (w[i+j] : uint64)
+ t = ai * bj + wij + carry
+ w[i+j] = (t : uint32)
+ carry = t >> 32
+ ;;
+ w[i + j] = (carry : uint32)
+ ;;
+ /* safe to leak that a == r; not data dependent */
+ std.slgrow(&w, a.dig.len)
+ if a == r
+ std.slfree(a.dig)
+ ;;
+ r.dig = w[:a.dig.len]
+}
+
+//const ctmodpow = {res, a, b
+// /* find rinv, mprime */
+//
+// /* convert to monty space */
+//
+// /* do the modpow */
+//
+// /* and come back */
+//}
+
+const ctiszero = {a
+ var z, zz
+
+ z = 1
+ for var i = 0; i < a.dig.len; i++
+ zz = mux(a.dig[i], 0, 1)
+ z = mux(zz, z, 0)
+ ;;
+ -> (z : bool)
+}
+
+const cteq = {a, b
+ var z, d, e
+
+ checksz(a, b)
+
+ e = 1
+ for var i = 0; i < a.dig.len; i++
+ z = a.dig[i] - b.dig[i]
+ d = mux(z, 1, 0)
+ e = mux(e, d, 0)
+ ;;
+ -> (e : bool)
+}
+
+const ctne = {a, b
+ var v
+
+ v = (cteq(a, b) : byte)
+ -> (not(v) : bool)
+}
+
+const ctgt = {a, b
+ var e, d, g
+
+ checksz(a, b)
+
+ g = 0
+ for var i = 0; i < a.dig.len; i++
+ e = not(a.dig[i] - b.dig[i])
+ d = gt(a.dig[i], b.dig[i])
+ g = mux(e, g, d)
+ ;;
+ -> (g : bool)
+}
+
+const ctge = {a, b
+ var v
+
+ v = (ctlt(a, b) : byte)
+ -> (not(v) : bool)
+}
+
+const ctlt = {a, b
+ var e, d, l
+
+ checksz(a, b)
+
+ l = 0
+ for var i = 0; i < a.dig.len; i++
+ e = not(a.dig[i] - b.dig[i])
+ d = gt(a.dig[i], b.dig[i])
+ l = mux(e, l, d)
+ ;;
+ -> (l : bool)
+}
+
+const ctle = {a, b
+ var v
+
+ v = (ctgt(a, b) : byte)
+ -> (not(v) : bool)
+}
+
+const ndig = {nbit
+ -> (nbit + 8*sizeof(uint32) - 1)/sizeof(uint32)
+}
+
+const checksz = {a, b
+ std.assert(a.nbit == b.nbit, "mismatched bit sizes")
+ std.assert(a.dig.len == b.dig.len, "mismatched backing sizes")
+}
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -130,7 +130,6 @@
re.expired = Zthr
re.free = Zthr
re.nfree = 0
- re.nexttid = 0
re.nthr = 0
}
@@ -184,6 +183,7 @@
re.str = str
re.strp = 0
+ re.nexttid = 0
bestmatch = Zthr
states = std.mkbs()
--- a/lib/std/bigint.myr
+++ b/lib/std/bigint.myr
@@ -92,6 +92,7 @@
extern const put : (fmt : byte[:], args : ... -> size)
const Base = 0x100000000ul
+const Kmin = 64
generic mkbigint = {v : @a :: integral,numeric @a
var a
@@ -124,7 +125,7 @@
;;
for i = 0; i + 4 <= v.len; i += 4
- std.slpush(&a.dig, \
+ slpush(&a.dig, \
(v[i + 0] << 0 : uint32) | \
(v[i + 1] << 8 : uint32) | \
(v[i + 2] << 16 : uint32) | \
@@ -135,7 +136,7 @@
off = i & 0x3
last |= (v[off] : uint32) << (8 *off)
;;
- std.slpush(&a.dig, last)
+ slpush(&a.dig, last)
-> trim(a)
}
@@ -170,7 +171,7 @@
}
const bigclear = {v
- std.slfree(v.dig)
+ slfree(v.dig)
v.sign = 0
v.dig = [][:]
-> v
@@ -259,7 +260,7 @@
fit in one digit.
*/
v = mkbigint(1)
- for c : std.bychar(str)
+ for c : bychar(str)
if c == '_'
continue
;;
@@ -452,7 +453,7 @@
for i = 0; i < n; i++
v = (a.dig[i] : uint64) + carry;
if i < b.dig.len
- v += ((b.dig[i] : uint64))
+ v += (b.dig[i] : uint64)
;;
if v >= Base
@@ -515,10 +516,7 @@
/* a *= b */
const bigmul = {a, b
- var i, j
- var ai, bj, wij
- var carry, t
- var w
+ var s
if a.sign == 0 || b.sign == 0
a.sign = 0
@@ -526,11 +524,84 @@
a.dig = [][:]
-> a
elif a.sign != b.sign
- a.sign = -1
+ s = -1
else
- a.sign = 1
+ s = 1
;;
+ umul(a, b)
+
+ a.sign = s
+ -> trim(a)
+}
+
+const umul = {a, b
+ var r
+
+ if a.dig.len < Kmin || b.dig.len < Kmin
+ smallmul(a, b)
+ else
+ r = mkbigint(0)
+ kmul(r, a, b)
+ bigmove(a, r)
+ ;;
+}
+
+const kmul = {r, a, b
+ var x0, x1, y0, y1, n
+ var z0, z1, z2, t0
+
+ if a.dig.len < b.dig.len
+ t0 = a
+ a = b
+ b = t0
+ ;;
+ n = min(a.dig.len / 2, b.dig.len - 1)
+
+ x0 = [.sign=1, .dig=a.dig[:n]]
+ x1 = [.sign=1, .dig=a.dig[n:]]
+ y0 = [.sign=1, .dig=b.dig[:n]]
+ y1 = [.sign=1, .dig=b.dig[n:]]
+
+ z0 = bigdup(&x0)
+ trim(z0)
+ umul(z0, &y0)
+
+ z2 = bigdup(&x1)
+ trim(z2)
+ umul(z2, &y1)
+
+
+ z1 = bigdup(&x0)
+ trim(z1)
+ bigsub(z1, &x1)
+ t0 = bigdup(&y1)
+ bigsub(t0, &y0)
+
+ umul(z1, t0)
+ bigadd(z1, z0)
+ bigadd(z1, z2)
+
+ bigshli(z1, 32*n)
+ bigshli(z2, 32*2*n)
+
+ bigclear(r)
+ bigadd(r, z0)
+ bigadd(r, z1)
+ bigadd(r, z2)
+
+ bigfree(z0)
+ bigfree(z1)
+ bigfree(z2)
+ bigfree(t0)
+}
+
+const smallmul = {a, b
+ var i, j
+ var ai, bj, wij
+ var carry, t
+ var w
+
w = slzalloc(a.dig.len + b.dig.len)
for j = 0; j < b.dig.len; j++
carry = 0
@@ -546,7 +617,6 @@
;;
slfree(a.dig)
a.dig = w
- -> trim(a)
}
const bigdiv = {a : bigint#, b : bigint# -> bigint#
--- a/lib/std/cmp.myr
+++ b/lib/std/cmp.myr
@@ -10,7 +10,7 @@
`After
;;
- generic numcmp : (a : @a, b : @a -> order)
+ generic numcmp : (a : @a, b : @a -> order) :: numeric @a
const strcmp : (a : byte[:], b : byte[:] -> order)
const strncmp : (a : byte[:], b : byte[:], n : size -> order)
const strcasecmp : (a : byte[:], b : byte[:] -> order)
--- a/lib/std/fmt.myr
+++ b/lib/std/fmt.myr
@@ -539,7 +539,7 @@
const intfmt = {sb, opts, signed, bits : uint64, nbits
var isneg
var sval, val
- var b : byte[32]
+ var b : byte[64]
var i, j, npad
var base
--- a/mbld/bld.sub
+++ b/mbld/bld.sub
@@ -1,5 +1,6 @@
bin mbld =
build.myr
+ cpufeatures+x64.s
deps.myr
libs.myr
install.myr
--- /dev/null
+++ b/mbld/cpufeatures+x64.s
@@ -1,0 +1,10 @@
+.globl bld$cpufeatures
+.globl bld$_cpufeatures
+bld$cpufeatures:
+bld$_cpufeatures:
+ mov $0x1, %eax
+ cpuid
+ mov %ecx, %eax
+ rol $32, %rax
+ shrd $32, %rdx, %rax
+ ret
--- a/mbld/main.myr
+++ b/mbld/main.myr
@@ -29,6 +29,7 @@
.opts = [
[.opt='j', .arg="jobs", .desc="build with at most 'jobs' jobs"],
[.opt='t', .arg="tag", .desc="build with specified systag"],
+ [.opt='T', .arg="tag", .desc="build with only the specified systag"],
[.opt='S', .desc="generate assembly when building"],
[.opt='I', .arg="inc", .desc="add 'inc' to your include path"],
[.opt='R', .arg="runsrc", .desc="source to compile and run"],
@@ -51,6 +52,7 @@
| ('I', arg): std.slpush(&bld.opt_incpaths, arg)
| ('B', arg): bld.opt_instbase = arg
| ('t', tag): std.slpush(&tags, tag)
+ | ('T', tag): std.slpush(&bld.opt_alltags, tag)
| ('j', arg): bld.opt_maxproc = std.getv(std.intparse(arg), 1)
| ('R', arg): runsrc = arg
| ('o', arg): bld.opt_objdir = arg
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -7,9 +7,11 @@
var opt_arch : byte[:]
var opt_sys : byte[:]
var opt_sysvers : (int, int, int)
+ var opt_cpufeatures : uint64
var opt_runtime : byte[:]
var opt_genasm : bool
var opt_incpaths : byte[:][:]
+ var opt_alltags : byte[:][:]
var opt_mcflags : byte[:][:]
var opt_museflags : byte[:][:]
var opt_ldflags : byte[:][:]
@@ -30,14 +32,19 @@
const initopts : (-> void)
const parseversion : (v : byte[:] -> (int, int, int))
+
+ /* not exactly portable, but good enough for now */
+ const CpuidSSE4 : uint64= 0x180000
+ extern const cpufeatures : (-> uint64)
;;
var opt_arch = ""
var opt_sys = ""
var opt_binname = ""
+var opt_cpufeatures = 0ul
var opt_libname = ""
var opt_runtime = ""
-var opt_incpaths /* FIXME: taking a constant slice is a nonconstant initializer */
+var opt_incpaths = [][:]
var opt_instbase = ""
var opt_destdir = ""
var opt_sysvers
@@ -48,6 +55,7 @@
var opt_mcflags = [][:]
var opt_museflags = [][:]
var opt_ldflags = [][:]
+var opt_alltags = [][:]
var opt_objdir = "obj"
var opt_genasm = false
var opt_silent = false
@@ -75,8 +83,10 @@
| unknown: std.fatal("unknown architecture \"{}\"\n", unknown)
;;
+ /* from cpuid with EAX=1; EDX at top, ECX at bottom */
+ opt_cpufeatures = cpufeatures()
+
opt_maxproc = 2*(thread.ncpu() : std.size)
- opt_incpaths = [][:]
opt_instbase = config.Instroot
opt_destdir = std.getenvv("DESTDIR", "")
opt_mc = std.getenvv("MYR_MC", "6m")
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -142,27 +142,38 @@
}
const addsysattrs = {b, tags
- std.htput(b.tags, opt_sys, opt_sysvers)
+ if opt_alltags.len > 0
+ for t : opt_alltags
+ tag(b, t)
+ ;;
+ else
+ std.htput(b.tags, opt_sys, opt_sysvers)
- match opt_sys
- | "freebsd": tag(b, "posixy")
- | "netbsd": tag(b, "posixy")
- | "openbsd": tag(b, "posixy")
- | "osx": tag(b, "posixy")
- | "linux": tag(b, "posixy")
- | "plan9":
- | unknown: std.fatal("unknown system \"{}\"\n", unknown)
- ;;
+ match opt_sys
+ | "freebsd": tag(b, "posixy")
+ | "netbsd": tag(b, "posixy")
+ | "openbsd": tag(b, "posixy")
+ | "osx": tag(b, "posixy")
+ | "linux": tag(b, "posixy")
+ | "plan9":
+ | unknown: std.fatal("unknown system \"{}\"\n", unknown)
+ ;;
- match opt_arch
- | "x64": tag(b, "x64")
- | unknown: std.fatal("unknown architecture {}\n", unknown)
- ;;
- for t : tags
- tag(b, t)
- ;;
+ match opt_arch
+ | "x64":
+ tag(b, "x64")
+ if opt_cpufeatures & CpuidSSE4 == CpuidSSE4
+ tag(b, "sse4")
+ ;;
+ | unknown:
+ std.fatal("unknown architecture {}\n", unknown)
+ ;;
- loadtagfile(b, "bld.tag")
+ for t : tags
+ tag(b, t)
+ ;;
+ loadtagfile(b, "bld.tag")
+ ;;
}
const loadtagfile = {b, tagfile
--- a/mk/bootstrap/bootstrap+Darwin-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Darwin-x86_64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/start.o lib/thread/start+osx-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -140,5 +141,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
+ ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
true
--- a/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/exit.o lib/thread/exit+freebsd-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+Linux-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Linux-x86_64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/exit.o lib/thread/exit+linux-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+NetBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+NetBSD-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
$pwd/6/6m -I . -I lib/sys lib/std/option.myr
@@ -139,5 +140,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/exit.o lib/thread/exit+openbsd-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures+x64.s mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+Plan9-amd64.sh
+++ b/mk/bootstrap/bootstrap+Plan9-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config+plan9-x64.myr
+ 6a -o mbld/cpufeatures.6 mbld/cpufeatures+x64.s
6a -o lib/thread/atomic-impl.6 lib/thread/atomic-impl+plan9-x64.s
6a -o lib/std/getbp.6 lib/std/getbp+plan9-x64.s
$pwd/6/6.out -I lib/sys lib/std/errno+plan9.myr
@@ -138,5 +139,5 @@
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- 6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
+ 6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/cpufeatures.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
true